Home | History | Annotate | Download | only in SelectionDAG
      1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
     11 // both before and after the DAG is legalized.
     12 //
     13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
     14 // primarily intended to handle simplification opportunities that are implicit
     15 // in the LLVM IR and exposed by the various codegen lowering phases.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/CodeGen/SelectionDAG.h"
     20 #include "llvm/ADT/SetVector.h"
     21 #include "llvm/ADT/SmallBitVector.h"
     22 #include "llvm/ADT/SmallPtrSet.h"
     23 #include "llvm/ADT/Statistic.h"
     24 #include "llvm/Analysis/AliasAnalysis.h"
     25 #include "llvm/CodeGen/MachineFrameInfo.h"
     26 #include "llvm/CodeGen/MachineFunction.h"
     27 #include "llvm/IR/DataLayout.h"
     28 #include "llvm/IR/DerivedTypes.h"
     29 #include "llvm/IR/Function.h"
     30 #include "llvm/IR/LLVMContext.h"
     31 #include "llvm/Support/CommandLine.h"
     32 #include "llvm/Support/Debug.h"
     33 #include "llvm/Support/ErrorHandling.h"
     34 #include "llvm/Support/MathExtras.h"
     35 #include "llvm/Support/raw_ostream.h"
     36 #include "llvm/Target/TargetLowering.h"
     37 #include "llvm/Target/TargetOptions.h"
     38 #include "llvm/Target/TargetRegisterInfo.h"
     39 #include "llvm/Target/TargetSubtargetInfo.h"
     40 #include <algorithm>
     41 using namespace llvm;
     42 
     43 #define DEBUG_TYPE "dagcombine"
     44 
     45 STATISTIC(NodesCombined   , "Number of dag nodes combined");
     46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
     47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
     48 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
     49 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
     50 STATISTIC(SlicedLoads, "Number of load sliced");
     51 
     52 namespace {
     53   static cl::opt<bool>
     54     CombinerAA("combiner-alias-analysis", cl::Hidden,
     55                cl::desc("Enable DAG combiner alias-analysis heuristics"));
     56 
     57   static cl::opt<bool>
     58     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
     59                cl::desc("Enable DAG combiner's use of IR alias analysis"));
     60 
     61   static cl::opt<bool>
     62     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
     63                cl::desc("Enable DAG combiner's use of TBAA"));
     64 
     65 #ifndef NDEBUG
     66   static cl::opt<std::string>
     67     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
     68                cl::desc("Only use DAG-combiner alias analysis in this"
     69                         " function"));
     70 #endif
     71 
     72   /// Hidden option to stress test load slicing, i.e., when this option
     73   /// is enabled, load slicing bypasses most of its profitability guards.
     74   static cl::opt<bool>
     75   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
     76                     cl::desc("Bypass the profitability model of load "
     77                              "slicing"),
     78                     cl::init(false));
     79 
     80   static cl::opt<bool>
     81     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
     82                       cl::desc("DAG combiner may split indexing from loads"));
     83 
     84 //------------------------------ DAGCombiner ---------------------------------//
     85 
     86   class DAGCombiner {
     87     SelectionDAG &DAG;
     88     const TargetLowering &TLI;
     89     CombineLevel Level;
     90     CodeGenOpt::Level OptLevel;
     91     bool LegalOperations;
     92     bool LegalTypes;
     93     bool ForCodeSize;
     94 
     95     /// \brief Worklist of all of the nodes that need to be simplified.
     96     ///
     97     /// This must behave as a stack -- new nodes to process are pushed onto the
     98     /// back and when processing we pop off of the back.
     99     ///
    100     /// The worklist will not contain duplicates but may contain null entries
    101     /// due to nodes being deleted from the underlying DAG.
    102     SmallVector<SDNode *, 64> Worklist;
    103 
    104     /// \brief Mapping from an SDNode to its position on the worklist.
    105     ///
    106     /// This is used to find and remove nodes from the worklist (by nulling
    107     /// them) when they are deleted from the underlying DAG. It relies on
    108     /// stable indices of nodes within the worklist.
    109     DenseMap<SDNode *, unsigned> WorklistMap;
    110 
    111     /// \brief Set of nodes which have been combined (at least once).
    112     ///
    113     /// This is used to allow us to reliably add any operands of a DAG node
    114     /// which have not yet been combined to the worklist.
    115     SmallPtrSet<SDNode *, 64> CombinedNodes;
    116 
    117     // AA - Used for DAG load/store alias analysis.
    118     AliasAnalysis &AA;
    119 
    120     /// When an instruction is simplified, add all users of the instruction to
    121     /// the work lists because they might get more simplified now.
    122     void AddUsersToWorklist(SDNode *N) {
    123       for (SDNode *Node : N->uses())
    124         AddToWorklist(Node);
    125     }
    126 
    127     /// Call the node-specific routine that folds each particular type of node.
    128     SDValue visit(SDNode *N);
    129 
    130   public:
    131     /// Add to the worklist making sure its instance is at the back (next to be
    132     /// processed.)
    133     void AddToWorklist(SDNode *N) {
    134       // Skip handle nodes as they can't usefully be combined and confuse the
    135       // zero-use deletion strategy.
    136       if (N->getOpcode() == ISD::HANDLENODE)
    137         return;
    138 
    139       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
    140         Worklist.push_back(N);
    141     }
    142 
    143     /// Remove all instances of N from the worklist.
    144     void removeFromWorklist(SDNode *N) {
    145       CombinedNodes.erase(N);
    146 
    147       auto It = WorklistMap.find(N);
    148       if (It == WorklistMap.end())
    149         return; // Not in the worklist.
    150 
    151       // Null out the entry rather than erasing it to avoid a linear operation.
    152       Worklist[It->second] = nullptr;
    153       WorklistMap.erase(It);
    154     }
    155 
    156     void deleteAndRecombine(SDNode *N);
    157     bool recursivelyDeleteUnusedNodes(SDNode *N);
    158 
    159     /// Replaces all uses of the results of one DAG node with new values.
    160     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    161                       bool AddTo = true);
    162 
    163     /// Replaces all uses of the results of one DAG node with new values.
    164     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
    165       return CombineTo(N, &Res, 1, AddTo);
    166     }
    167 
    168     /// Replaces all uses of the results of one DAG node with new values.
    169     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
    170                       bool AddTo = true) {
    171       SDValue To[] = { Res0, Res1 };
    172       return CombineTo(N, To, 2, AddTo);
    173     }
    174 
    175     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
    176 
    177   private:
    178 
    179     /// Check the specified integer node value to see if it can be simplified or
    180     /// if things it uses can be simplified by bit propagation.
    181     /// If so, return true.
    182     bool SimplifyDemandedBits(SDValue Op) {
    183       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
    184       APInt Demanded = APInt::getAllOnesValue(BitWidth);
    185       return SimplifyDemandedBits(Op, Demanded);
    186     }
    187 
    188     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
    189 
    190     bool CombineToPreIndexedLoadStore(SDNode *N);
    191     bool CombineToPostIndexedLoadStore(SDNode *N);
    192     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
    193     bool SliceUpLoad(SDNode *N);
    194 
    195     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
    196     ///   load.
    197     ///
    198     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
    199     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
    200     /// \param EltNo index of the vector element to load.
    201     /// \param OriginalLoad load that EVE came from to be replaced.
    202     /// \returns EVE on success SDValue() on failure.
    203     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    204         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
    205     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
    206     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
    207     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
    208     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
    209     SDValue PromoteIntBinOp(SDValue Op);
    210     SDValue PromoteIntShiftOp(SDValue Op);
    211     SDValue PromoteExtend(SDValue Op);
    212     bool PromoteLoad(SDValue Op);
    213 
    214     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
    215                          SDValue Trunc, SDValue ExtLoad, SDLoc DL,
    216                          ISD::NodeType ExtType);
    217 
    218     /// Call the node-specific routine that knows how to fold each
    219     /// particular type of node. If that doesn't do anything, try the
    220     /// target-specific DAG combines.
    221     SDValue combine(SDNode *N);
    222 
    223     // Visitation implementation - Implement dag node combining for different
    224     // node types.  The semantics are as follows:
    225     // Return Value:
    226     //   SDValue.getNode() == 0 - No change was made
    227     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
    228     //   otherwise              - N should be replaced by the returned Operand.
    229     //
    230     SDValue visitTokenFactor(SDNode *N);
    231     SDValue visitMERGE_VALUES(SDNode *N);
    232     SDValue visitADD(SDNode *N);
    233     SDValue visitSUB(SDNode *N);
    234     SDValue visitADDC(SDNode *N);
    235     SDValue visitSUBC(SDNode *N);
    236     SDValue visitADDE(SDNode *N);
    237     SDValue visitSUBE(SDNode *N);
    238     SDValue visitMUL(SDNode *N);
    239     SDValue useDivRem(SDNode *N);
    240     SDValue visitSDIV(SDNode *N);
    241     SDValue visitUDIV(SDNode *N);
    242     SDValue visitREM(SDNode *N);
    243     SDValue visitMULHU(SDNode *N);
    244     SDValue visitMULHS(SDNode *N);
    245     SDValue visitSMUL_LOHI(SDNode *N);
    246     SDValue visitUMUL_LOHI(SDNode *N);
    247     SDValue visitSMULO(SDNode *N);
    248     SDValue visitUMULO(SDNode *N);
    249     SDValue visitIMINMAX(SDNode *N);
    250     SDValue visitAND(SDNode *N);
    251     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
    252     SDValue visitOR(SDNode *N);
    253     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
    254     SDValue visitXOR(SDNode *N);
    255     SDValue SimplifyVBinOp(SDNode *N);
    256     SDValue visitSHL(SDNode *N);
    257     SDValue visitSRA(SDNode *N);
    258     SDValue visitSRL(SDNode *N);
    259     SDValue visitRotate(SDNode *N);
    260     SDValue visitBSWAP(SDNode *N);
    261     SDValue visitCTLZ(SDNode *N);
    262     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
    263     SDValue visitCTTZ(SDNode *N);
    264     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
    265     SDValue visitCTPOP(SDNode *N);
    266     SDValue visitSELECT(SDNode *N);
    267     SDValue visitVSELECT(SDNode *N);
    268     SDValue visitSELECT_CC(SDNode *N);
    269     SDValue visitSETCC(SDNode *N);
    270     SDValue visitSETCCE(SDNode *N);
    271     SDValue visitSIGN_EXTEND(SDNode *N);
    272     SDValue visitZERO_EXTEND(SDNode *N);
    273     SDValue visitANY_EXTEND(SDNode *N);
    274     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
    275     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
    276     SDValue visitTRUNCATE(SDNode *N);
    277     SDValue visitBITCAST(SDNode *N);
    278     SDValue visitBUILD_PAIR(SDNode *N);
    279     SDValue visitFADD(SDNode *N);
    280     SDValue visitFSUB(SDNode *N);
    281     SDValue visitFMUL(SDNode *N);
    282     SDValue visitFMA(SDNode *N);
    283     SDValue visitFDIV(SDNode *N);
    284     SDValue visitFREM(SDNode *N);
    285     SDValue visitFSQRT(SDNode *N);
    286     SDValue visitFCOPYSIGN(SDNode *N);
    287     SDValue visitSINT_TO_FP(SDNode *N);
    288     SDValue visitUINT_TO_FP(SDNode *N);
    289     SDValue visitFP_TO_SINT(SDNode *N);
    290     SDValue visitFP_TO_UINT(SDNode *N);
    291     SDValue visitFP_ROUND(SDNode *N);
    292     SDValue visitFP_ROUND_INREG(SDNode *N);
    293     SDValue visitFP_EXTEND(SDNode *N);
    294     SDValue visitFNEG(SDNode *N);
    295     SDValue visitFABS(SDNode *N);
    296     SDValue visitFCEIL(SDNode *N);
    297     SDValue visitFTRUNC(SDNode *N);
    298     SDValue visitFFLOOR(SDNode *N);
    299     SDValue visitFMINNUM(SDNode *N);
    300     SDValue visitFMAXNUM(SDNode *N);
    301     SDValue visitBRCOND(SDNode *N);
    302     SDValue visitBR_CC(SDNode *N);
    303     SDValue visitLOAD(SDNode *N);
    304 
    305     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
    306     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
    307 
    308     SDValue visitSTORE(SDNode *N);
    309     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
    310     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
    311     SDValue visitBUILD_VECTOR(SDNode *N);
    312     SDValue visitCONCAT_VECTORS(SDNode *N);
    313     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
    314     SDValue visitVECTOR_SHUFFLE(SDNode *N);
    315     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
    316     SDValue visitINSERT_SUBVECTOR(SDNode *N);
    317     SDValue visitMLOAD(SDNode *N);
    318     SDValue visitMSTORE(SDNode *N);
    319     SDValue visitMGATHER(SDNode *N);
    320     SDValue visitMSCATTER(SDNode *N);
    321     SDValue visitFP_TO_FP16(SDNode *N);
    322     SDValue visitFP16_TO_FP(SDNode *N);
    323 
    324     SDValue visitFADDForFMACombine(SDNode *N);
    325     SDValue visitFSUBForFMACombine(SDNode *N);
    326     SDValue visitFMULForFMACombine(SDNode *N);
    327 
    328     SDValue XformToShuffleWithZero(SDNode *N);
    329     SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS);
    330 
    331     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
    332 
    333     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
    334     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
    335     SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2);
    336     SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2,
    337                              SDValue N3, ISD::CondCode CC,
    338                              bool NotExtCompare = false);
    339     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
    340                           SDLoc DL, bool foldBooleans = true);
    341 
    342     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    343                            SDValue &CC) const;
    344     bool isOneUseSetCC(SDValue N) const;
    345 
    346     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    347                                          unsigned HiOp);
    348     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
    349     SDValue CombineExtLoad(SDNode *N);
    350     SDValue combineRepeatedFPDivisors(SDNode *N);
    351     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
    352     SDValue BuildSDIV(SDNode *N);
    353     SDValue BuildSDIVPow2(SDNode *N);
    354     SDValue BuildUDIV(SDNode *N);
    355     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
    356     SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
    357     SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
    358                                  SDNodeFlags *Flags);
    359     SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
    360                                  SDNodeFlags *Flags);
    361     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    362                                bool DemandHighBits = true);
    363     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
    364     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
    365                               SDValue InnerPos, SDValue InnerNeg,
    366                               unsigned PosOpcode, unsigned NegOpcode,
    367                               SDLoc DL);
    368     SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL);
    369     SDValue ReduceLoadWidth(SDNode *N);
    370     SDValue ReduceLoadOpStoreWidth(SDNode *N);
    371     SDValue TransformFPLoadStorePair(SDNode *N);
    372     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
    373     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
    374 
    375     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
    376 
    377     /// Walk up chain skipping non-aliasing memory nodes,
    378     /// looking for aliasing nodes and adding them to the Aliases vector.
    379     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
    380                           SmallVectorImpl<SDValue> &Aliases);
    381 
    382     /// Return true if there is any possibility that the two addresses overlap.
    383     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
    384 
    385     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
    386     /// chain (aliasing node.)
    387     SDValue FindBetterChain(SDNode *N, SDValue Chain);
    388 
    389     /// Do FindBetterChain for a store and any possibly adjacent stores on
    390     /// consecutive chains.
    391     bool findBetterNeighborChains(StoreSDNode *St);
    392 
    393     /// Holds a pointer to an LSBaseSDNode as well as information on where it
    394     /// is located in a sequence of memory operations connected by a chain.
    395     struct MemOpLink {
    396       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
    397       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
    398       // Ptr to the mem node.
    399       LSBaseSDNode *MemNode;
    400       // Offset from the base ptr.
    401       int64_t OffsetFromBase;
    402       // What is the sequence number of this mem node.
    403       // Lowest mem operand in the DAG starts at zero.
    404       unsigned SequenceNum;
    405     };
    406 
    407     /// This is a helper function for visitMUL to check the profitability
    408     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
    409     /// MulNode is the original multiply, AddNode is (add x, c1),
    410     /// and ConstNode is c2.
    411     bool isMulAddWithConstProfitable(SDNode *MulNode,
    412                                      SDValue &AddNode,
    413                                      SDValue &ConstNode);
    414 
    415     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
    416     /// constant build_vector of the stored constant values in Stores.
    417     SDValue getMergedConstantVectorStore(SelectionDAG &DAG,
    418                                          SDLoc SL,
    419                                          ArrayRef<MemOpLink> Stores,
    420                                          SmallVectorImpl<SDValue> &Chains,
    421                                          EVT Ty) const;
    422 
    423     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
    424     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
    425     /// the type of the loaded value to be extended.  LoadedVT returns the type
    426     /// of the original loaded value.  NarrowLoad returns whether the load would
    427     /// need to be narrowed in order to match.
    428     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    429                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
    430                           bool &NarrowLoad);
    431 
    432     /// This is a helper function for MergeConsecutiveStores. When the source
    433     /// elements of the consecutive stores are all constants or all extracted
    434     /// vector elements, try to merge them into one larger store.
    435     /// \return True if a merged store was created.
    436     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
    437                                          EVT MemVT, unsigned NumStores,
    438                                          bool IsConstantSrc, bool UseVector);
    439 
    440     /// This is a helper function for MergeConsecutiveStores.
    441     /// Stores that may be merged are placed in StoreNodes.
    442     /// Loads that may alias with those stores are placed in AliasLoadNodes.
    443     void getStoreMergeAndAliasCandidates(
    444         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
    445         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
    446 
    447     /// Merge consecutive store operations into a wide store.
    448     /// This optimization uses wide integers or vectors when possible.
    449     /// \return True if some memory operations were changed.
    450     bool MergeConsecutiveStores(StoreSDNode *N);
    451 
    452     /// \brief Try to transform a truncation where C is a constant:
    453     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
    454     ///
    455     /// \p N needs to be a truncation and its first operand an AND. Other
    456     /// requirements are checked by the function (e.g. that trunc is
    457     /// single-use) and if missed an empty SDValue is returned.
    458     SDValue distributeTruncateThroughAnd(SDNode *N);
    459 
    460   public:
    461     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
    462         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
    463           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
    464       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
    465     }
    466 
    467     /// Runs the dag combiner on all nodes in the work list
    468     void Run(CombineLevel AtLevel);
    469 
    470     SelectionDAG &getDAG() const { return DAG; }
    471 
    472     /// Returns a type large enough to hold any valid shift amount - before type
    473     /// legalization these can be huge.
    474     EVT getShiftAmountTy(EVT LHSTy) {
    475       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
    476       if (LHSTy.isVector())
    477         return LHSTy;
    478       auto &DL = DAG.getDataLayout();
    479       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
    480                         : TLI.getPointerTy(DL);
    481     }
    482 
    483     /// This method returns true if we are running before type legalization or
    484     /// if the specified VT is legal.
    485     bool isTypeLegal(const EVT &VT) {
    486       if (!LegalTypes) return true;
    487       return TLI.isTypeLegal(VT);
    488     }
    489 
    490     /// Convenience wrapper around TargetLowering::getSetCCResultType
    491     EVT getSetCCResultType(EVT VT) const {
    492       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
    493     }
    494   };
    495 }
    496 
    497 
    498 namespace {
    499 /// This class is a DAGUpdateListener that removes any deleted
    500 /// nodes from the worklist.
    501 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
    502   DAGCombiner &DC;
    503 public:
    504   explicit WorklistRemover(DAGCombiner &dc)
    505     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
    506 
    507   void NodeDeleted(SDNode *N, SDNode *E) override {
    508     DC.removeFromWorklist(N);
    509   }
    510 };
    511 }
    512 
    513 //===----------------------------------------------------------------------===//
    514 //  TargetLowering::DAGCombinerInfo implementation
    515 //===----------------------------------------------------------------------===//
    516 
    517 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
    518   ((DAGCombiner*)DC)->AddToWorklist(N);
    519 }
    520 
    521 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
    522   ((DAGCombiner*)DC)->removeFromWorklist(N);
    523 }
    524 
    525 SDValue TargetLowering::DAGCombinerInfo::
    526 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
    527   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
    528 }
    529 
    530 SDValue TargetLowering::DAGCombinerInfo::
    531 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
    532   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
    533 }
    534 
    535 
    536 SDValue TargetLowering::DAGCombinerInfo::
    537 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
    538   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
    539 }
    540 
    541 void TargetLowering::DAGCombinerInfo::
    542 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    543   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
    544 }
    545 
    546 //===----------------------------------------------------------------------===//
    547 // Helper Functions
    548 //===----------------------------------------------------------------------===//
    549 
    550 void DAGCombiner::deleteAndRecombine(SDNode *N) {
    551   removeFromWorklist(N);
    552 
    553   // If the operands of this node are only used by the node, they will now be
    554   // dead. Make sure to re-visit them and recursively delete dead nodes.
    555   for (const SDValue &Op : N->ops())
    556     // For an operand generating multiple values, one of the values may
    557     // become dead allowing further simplification (e.g. split index
    558     // arithmetic from an indexed load).
    559     if (Op->hasOneUse() || Op->getNumValues() > 1)
    560       AddToWorklist(Op.getNode());
    561 
    562   DAG.DeleteNode(N);
    563 }
    564 
    565 /// Return 1 if we can compute the negated form of the specified expression for
    566 /// the same cost as the expression itself, or 2 if we can compute the negated
    567 /// form more cheaply than the expression itself.
    568 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    569                                const TargetLowering &TLI,
    570                                const TargetOptions *Options,
    571                                unsigned Depth = 0) {
    572   // fneg is removable even if it has multiple uses.
    573   if (Op.getOpcode() == ISD::FNEG) return 2;
    574 
    575   // Don't allow anything with multiple uses.
    576   if (!Op.hasOneUse()) return 0;
    577 
    578   // Don't recurse exponentially.
    579   if (Depth > 6) return 0;
    580 
    581   switch (Op.getOpcode()) {
    582   default: return false;
    583   case ISD::ConstantFP:
    584     // Don't invert constant FP values after legalize.  The negated constant
    585     // isn't necessarily legal.
    586     return LegalOperations ? 0 : 1;
    587   case ISD::FADD:
    588     // FIXME: determine better conditions for this xform.
    589     if (!Options->UnsafeFPMath) return 0;
    590 
    591     // After operation legalization, it might not be legal to create new FSUBs.
    592     if (LegalOperations &&
    593         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
    594       return 0;
    595 
    596     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    597     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    598                                     Options, Depth + 1))
    599       return V;
    600     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    601     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    602                               Depth + 1);
    603   case ISD::FSUB:
    604     // We can't turn -(A-B) into B-A when we honor signed zeros.
    605     if (!Options->UnsafeFPMath) return 0;
    606 
    607     // fold (fneg (fsub A, B)) -> (fsub B, A)
    608     return 1;
    609 
    610   case ISD::FMUL:
    611   case ISD::FDIV:
    612     if (Options->HonorSignDependentRoundingFPMath()) return 0;
    613 
    614     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
    615     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    616                                     Options, Depth + 1))
    617       return V;
    618 
    619     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    620                               Depth + 1);
    621 
    622   case ISD::FP_EXTEND:
    623   case ISD::FP_ROUND:
    624   case ISD::FSIN:
    625     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
    626                               Depth + 1);
    627   }
    628 }
    629 
    630 /// If isNegatibleForFree returns true, return the newly negated expression.
    631 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    632                                     bool LegalOperations, unsigned Depth = 0) {
    633   const TargetOptions &Options = DAG.getTarget().Options;
    634   // fneg is removable even if it has multiple uses.
    635   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
    636 
    637   // Don't allow anything with multiple uses.
    638   assert(Op.hasOneUse() && "Unknown reuse!");
    639 
    640   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    641 
    642   const SDNodeFlags *Flags = Op.getNode()->getFlags();
    643 
    644   switch (Op.getOpcode()) {
    645   default: llvm_unreachable("Unknown code");
    646   case ISD::ConstantFP: {
    647     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
    648     V.changeSign();
    649     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
    650   }
    651   case ISD::FADD:
    652     // FIXME: determine better conditions for this xform.
    653     assert(Options.UnsafeFPMath);
    654 
    655     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    656     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    657                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    658       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    659                          GetNegatedExpression(Op.getOperand(0), DAG,
    660                                               LegalOperations, Depth+1),
    661                          Op.getOperand(1), Flags);
    662     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    663     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    664                        GetNegatedExpression(Op.getOperand(1), DAG,
    665                                             LegalOperations, Depth+1),
    666                        Op.getOperand(0), Flags);
    667   case ISD::FSUB:
    668     // We can't turn -(A-B) into B-A when we honor signed zeros.
    669     assert(Options.UnsafeFPMath);
    670 
    671     // fold (fneg (fsub 0, B)) -> B
    672     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
    673       if (N0CFP->isZero())
    674         return Op.getOperand(1);
    675 
    676     // fold (fneg (fsub A, B)) -> (fsub B, A)
    677     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    678                        Op.getOperand(1), Op.getOperand(0), Flags);
    679 
    680   case ISD::FMUL:
    681   case ISD::FDIV:
    682     assert(!Options.HonorSignDependentRoundingFPMath());
    683 
    684     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
    685     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    686                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    687       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    688                          GetNegatedExpression(Op.getOperand(0), DAG,
    689                                               LegalOperations, Depth+1),
    690                          Op.getOperand(1), Flags);
    691 
    692     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    693     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    694                        Op.getOperand(0),
    695                        GetNegatedExpression(Op.getOperand(1), DAG,
    696                                             LegalOperations, Depth+1), Flags);
    697 
    698   case ISD::FP_EXTEND:
    699   case ISD::FSIN:
    700     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    701                        GetNegatedExpression(Op.getOperand(0), DAG,
    702                                             LegalOperations, Depth+1));
    703   case ISD::FP_ROUND:
    704       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
    705                          GetNegatedExpression(Op.getOperand(0), DAG,
    706                                               LegalOperations, Depth+1),
    707                          Op.getOperand(1));
    708   }
    709 }
    710 
    711 // Return true if this node is a setcc, or is a select_cc
    712 // that selects between the target values used for true and false, making it
    713 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
    714 // the appropriate nodes based on the type of node we are checking. This
    715 // simplifies life a bit for the callers.
    716 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    717                                     SDValue &CC) const {
    718   if (N.getOpcode() == ISD::SETCC) {
    719     LHS = N.getOperand(0);
    720     RHS = N.getOperand(1);
    721     CC  = N.getOperand(2);
    722     return true;
    723   }
    724 
    725   if (N.getOpcode() != ISD::SELECT_CC ||
    726       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
    727       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
    728     return false;
    729 
    730   if (TLI.getBooleanContents(N.getValueType()) ==
    731       TargetLowering::UndefinedBooleanContent)
    732     return false;
    733 
    734   LHS = N.getOperand(0);
    735   RHS = N.getOperand(1);
    736   CC  = N.getOperand(4);
    737   return true;
    738 }
    739 
    740 /// Return true if this is a SetCC-equivalent operation with only one use.
    741 /// If this is true, it allows the users to invert the operation for free when
    742 /// it is profitable to do so.
    743 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    744   SDValue N0, N1, N2;
    745   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
    746     return true;
    747   return false;
    748 }
    749 
    750 /// Returns true if N is a BUILD_VECTOR node whose
    751 /// elements are all the same constant or undefined.
    752 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) {
    753   BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N);
    754   if (!C)
    755     return false;
    756 
    757   APInt SplatUndef;
    758   unsigned SplatBitSize;
    759   bool HasAnyUndefs;
    760   EVT EltVT = N->getValueType(0).getVectorElementType();
    761   return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize,
    762                              HasAnyUndefs) &&
    763           EltVT.getSizeInBits() >= SplatBitSize);
    764 }
    765 
    766 // \brief Returns the SDNode if it is a constant integer BuildVector
    767 // or constant integer.
    768 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) {
    769   if (isa<ConstantSDNode>(N))
    770     return N.getNode();
    771   if (ISD::isBuildVectorOfConstantSDNodes(N.getNode()))
    772     return N.getNode();
    773   return nullptr;
    774 }
    775 
    776 // \brief Returns the SDNode if it is a constant float BuildVector
    777 // or constant float.
    778 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
    779   if (isa<ConstantFPSDNode>(N))
    780     return N.getNode();
    781   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
    782     return N.getNode();
    783   return nullptr;
    784 }
    785 
    786 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    787 // int.
    788 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
    789   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
    790     return CN;
    791 
    792   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    793     BitVector UndefElements;
    794     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
    795 
    796     // BuildVectors can truncate their operands. Ignore that case here.
    797     // FIXME: We blindly ignore splats which include undef which is overly
    798     // pessimistic.
    799     if (CN && UndefElements.none() &&
    800         CN->getValueType(0) == N.getValueType().getScalarType())
    801       return CN;
    802   }
    803 
    804   return nullptr;
    805 }
    806 
    807 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    808 // float.
    809 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
    810   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
    811     return CN;
    812 
    813   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    814     BitVector UndefElements;
    815     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
    816 
    817     if (CN && UndefElements.none())
    818       return CN;
    819   }
    820 
    821   return nullptr;
    822 }
    823 
    824 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL,
    825                                     SDValue N0, SDValue N1) {
    826   EVT VT = N0.getValueType();
    827   if (N0.getOpcode() == Opc) {
    828     if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
    829       if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) {
    830         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
    831         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
    832           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
    833         return SDValue();
    834       }
    835       if (N0.hasOneUse()) {
    836         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
    837         // use
    838         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
    839         if (!OpNode.getNode())
    840           return SDValue();
    841         AddToWorklist(OpNode.getNode());
    842         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
    843       }
    844     }
    845   }
    846 
    847   if (N1.getOpcode() == Opc) {
    848     if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
    849       if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) {
    850         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
    851         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
    852           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
    853         return SDValue();
    854       }
    855       if (N1.hasOneUse()) {
    856         // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one
    857         // use
    858         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0);
    859         if (!OpNode.getNode())
    860           return SDValue();
    861         AddToWorklist(OpNode.getNode());
    862         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
    863       }
    864     }
    865   }
    866 
    867   return SDValue();
    868 }
    869 
    870 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    871                                bool AddTo) {
    872   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    873   ++NodesCombined;
    874   DEBUG(dbgs() << "\nReplacing.1 ";
    875         N->dump(&DAG);
    876         dbgs() << "\nWith: ";
    877         To[0].getNode()->dump(&DAG);
    878         dbgs() << " and " << NumTo-1 << " other values\n");
    879   for (unsigned i = 0, e = NumTo; i != e; ++i)
    880     assert((!To[i].getNode() ||
    881             N->getValueType(i) == To[i].getValueType()) &&
    882            "Cannot combine value to value of different type!");
    883 
    884   WorklistRemover DeadNodes(*this);
    885   DAG.ReplaceAllUsesWith(N, To);
    886   if (AddTo) {
    887     // Push the new nodes and any users onto the worklist
    888     for (unsigned i = 0, e = NumTo; i != e; ++i) {
    889       if (To[i].getNode()) {
    890         AddToWorklist(To[i].getNode());
    891         AddUsersToWorklist(To[i].getNode());
    892       }
    893     }
    894   }
    895 
    896   // Finally, if the node is now dead, remove it from the graph.  The node
    897   // may not be dead if the replacement process recursively simplified to
    898   // something else needing this node.
    899   if (N->use_empty())
    900     deleteAndRecombine(N);
    901   return SDValue(N, 0);
    902 }
    903 
    904 void DAGCombiner::
    905 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    906   // Replace all uses.  If any nodes become isomorphic to other nodes and
    907   // are deleted, make sure to remove them from our worklist.
    908   WorklistRemover DeadNodes(*this);
    909   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    910 
    911   // Push the new node and any (possibly new) users onto the worklist.
    912   AddToWorklist(TLO.New.getNode());
    913   AddUsersToWorklist(TLO.New.getNode());
    914 
    915   // Finally, if the node is now dead, remove it from the graph.  The node
    916   // may not be dead if the replacement process recursively simplified to
    917   // something else needing this node.
    918   if (TLO.Old.getNode()->use_empty())
    919     deleteAndRecombine(TLO.Old.getNode());
    920 }
    921 
    922 /// Check the specified integer node value to see if it can be simplified or if
    923 /// things it uses can be simplified by bit propagation. If so, return true.
    924 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    925   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    926   APInt KnownZero, KnownOne;
    927   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
    928     return false;
    929 
    930   // Revisit the node.
    931   AddToWorklist(Op.getNode());
    932 
    933   // Replace the old value with the new one.
    934   ++NodesCombined;
    935   DEBUG(dbgs() << "\nReplacing.2 ";
    936         TLO.Old.getNode()->dump(&DAG);
    937         dbgs() << "\nWith: ";
    938         TLO.New.getNode()->dump(&DAG);
    939         dbgs() << '\n');
    940 
    941   CommitTargetLoweringOpt(TLO);
    942   return true;
    943 }
    944 
    945 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    946   SDLoc dl(Load);
    947   EVT VT = Load->getValueType(0);
    948   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
    949 
    950   DEBUG(dbgs() << "\nReplacing.9 ";
    951         Load->dump(&DAG);
    952         dbgs() << "\nWith: ";
    953         Trunc.getNode()->dump(&DAG);
    954         dbgs() << '\n');
    955   WorklistRemover DeadNodes(*this);
    956   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    957   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    958   deleteAndRecombine(Load);
    959   AddToWorklist(Trunc.getNode());
    960 }
    961 
    962 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    963   Replace = false;
    964   SDLoc dl(Op);
    965   if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) {
    966     EVT MemVT = LD->getMemoryVT();
    967     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    968       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
    969                                                        : ISD::EXTLOAD)
    970       : LD->getExtensionType();
    971     Replace = true;
    972     return DAG.getExtLoad(ExtType, dl, PVT,
    973                           LD->getChain(), LD->getBasePtr(),
    974                           MemVT, LD->getMemOperand());
    975   }
    976 
    977   unsigned Opc = Op.getOpcode();
    978   switch (Opc) {
    979   default: break;
    980   case ISD::AssertSext:
    981     return DAG.getNode(ISD::AssertSext, dl, PVT,
    982                        SExtPromoteOperand(Op.getOperand(0), PVT),
    983                        Op.getOperand(1));
    984   case ISD::AssertZext:
    985     return DAG.getNode(ISD::AssertZext, dl, PVT,
    986                        ZExtPromoteOperand(Op.getOperand(0), PVT),
    987                        Op.getOperand(1));
    988   case ISD::Constant: {
    989     unsigned ExtOpc =
    990       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    991     return DAG.getNode(ExtOpc, dl, PVT, Op);
    992   }
    993   }
    994 
    995   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    996     return SDValue();
    997   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
    998 }
    999 
   1000 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
   1001   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
   1002     return SDValue();
   1003   EVT OldVT = Op.getValueType();
   1004   SDLoc dl(Op);
   1005   bool Replace = false;
   1006   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1007   if (!NewOp.getNode())
   1008     return SDValue();
   1009   AddToWorklist(NewOp.getNode());
   1010 
   1011   if (Replace)
   1012     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1013   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
   1014                      DAG.getValueType(OldVT));
   1015 }
   1016 
   1017 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   1018   EVT OldVT = Op.getValueType();
   1019   SDLoc dl(Op);
   1020   bool Replace = false;
   1021   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1022   if (!NewOp.getNode())
   1023     return SDValue();
   1024   AddToWorklist(NewOp.getNode());
   1025 
   1026   if (Replace)
   1027     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1028   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
   1029 }
   1030 
   1031 /// Promote the specified integer binary operation if the target indicates it is
   1032 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1033 /// i32 since i16 instructions are longer.
   1034 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
   1035   if (!LegalOperations)
   1036     return SDValue();
   1037 
   1038   EVT VT = Op.getValueType();
   1039   if (VT.isVector() || !VT.isInteger())
   1040     return SDValue();
   1041 
   1042   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1043   // promoting it.
   1044   unsigned Opc = Op.getOpcode();
   1045   if (TLI.isTypeDesirableForOp(Opc, VT))
   1046     return SDValue();
   1047 
   1048   EVT PVT = VT;
   1049   // Consult target whether it is a good idea to promote this operation and
   1050   // what's the right type to promote it to.
   1051   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1052     assert(PVT != VT && "Don't know what type to promote to!");
   1053 
   1054     bool Replace0 = false;
   1055     SDValue N0 = Op.getOperand(0);
   1056     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
   1057     if (!NN0.getNode())
   1058       return SDValue();
   1059 
   1060     bool Replace1 = false;
   1061     SDValue N1 = Op.getOperand(1);
   1062     SDValue NN1;
   1063     if (N0 == N1)
   1064       NN1 = NN0;
   1065     else {
   1066       NN1 = PromoteOperand(N1, PVT, Replace1);
   1067       if (!NN1.getNode())
   1068         return SDValue();
   1069     }
   1070 
   1071     AddToWorklist(NN0.getNode());
   1072     if (NN1.getNode())
   1073       AddToWorklist(NN1.getNode());
   1074 
   1075     if (Replace0)
   1076       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
   1077     if (Replace1)
   1078       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
   1079 
   1080     DEBUG(dbgs() << "\nPromoting ";
   1081           Op.getNode()->dump(&DAG));
   1082     SDLoc dl(Op);
   1083     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1084                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
   1085   }
   1086   return SDValue();
   1087 }
   1088 
   1089 /// Promote the specified integer shift operation if the target indicates it is
   1090 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1091 /// i32 since i16 instructions are longer.
   1092 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
   1093   if (!LegalOperations)
   1094     return SDValue();
   1095 
   1096   EVT VT = Op.getValueType();
   1097   if (VT.isVector() || !VT.isInteger())
   1098     return SDValue();
   1099 
   1100   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1101   // promoting it.
   1102   unsigned Opc = Op.getOpcode();
   1103   if (TLI.isTypeDesirableForOp(Opc, VT))
   1104     return SDValue();
   1105 
   1106   EVT PVT = VT;
   1107   // Consult target whether it is a good idea to promote this operation and
   1108   // what's the right type to promote it to.
   1109   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1110     assert(PVT != VT && "Don't know what type to promote to!");
   1111 
   1112     bool Replace = false;
   1113     SDValue N0 = Op.getOperand(0);
   1114     if (Opc == ISD::SRA)
   1115       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
   1116     else if (Opc == ISD::SRL)
   1117       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
   1118     else
   1119       N0 = PromoteOperand(N0, PVT, Replace);
   1120     if (!N0.getNode())
   1121       return SDValue();
   1122 
   1123     AddToWorklist(N0.getNode());
   1124     if (Replace)
   1125       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
   1126 
   1127     DEBUG(dbgs() << "\nPromoting ";
   1128           Op.getNode()->dump(&DAG));
   1129     SDLoc dl(Op);
   1130     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1131                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
   1132   }
   1133   return SDValue();
   1134 }
   1135 
   1136 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
   1137   if (!LegalOperations)
   1138     return SDValue();
   1139 
   1140   EVT VT = Op.getValueType();
   1141   if (VT.isVector() || !VT.isInteger())
   1142     return SDValue();
   1143 
   1144   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1145   // promoting it.
   1146   unsigned Opc = Op.getOpcode();
   1147   if (TLI.isTypeDesirableForOp(Opc, VT))
   1148     return SDValue();
   1149 
   1150   EVT PVT = VT;
   1151   // Consult target whether it is a good idea to promote this operation and
   1152   // what's the right type to promote it to.
   1153   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1154     assert(PVT != VT && "Don't know what type to promote to!");
   1155     // fold (aext (aext x)) -> (aext x)
   1156     // fold (aext (zext x)) -> (zext x)
   1157     // fold (aext (sext x)) -> (sext x)
   1158     DEBUG(dbgs() << "\nPromoting ";
   1159           Op.getNode()->dump(&DAG));
   1160     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
   1161   }
   1162   return SDValue();
   1163 }
   1164 
   1165 bool DAGCombiner::PromoteLoad(SDValue Op) {
   1166   if (!LegalOperations)
   1167     return false;
   1168 
   1169   EVT VT = Op.getValueType();
   1170   if (VT.isVector() || !VT.isInteger())
   1171     return false;
   1172 
   1173   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1174   // promoting it.
   1175   unsigned Opc = Op.getOpcode();
   1176   if (TLI.isTypeDesirableForOp(Opc, VT))
   1177     return false;
   1178 
   1179   EVT PVT = VT;
   1180   // Consult target whether it is a good idea to promote this operation and
   1181   // what's the right type to promote it to.
   1182   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1183     assert(PVT != VT && "Don't know what type to promote to!");
   1184 
   1185     SDLoc dl(Op);
   1186     SDNode *N = Op.getNode();
   1187     LoadSDNode *LD = cast<LoadSDNode>(N);
   1188     EVT MemVT = LD->getMemoryVT();
   1189     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
   1190       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
   1191                                                        : ISD::EXTLOAD)
   1192       : LD->getExtensionType();
   1193     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
   1194                                    LD->getChain(), LD->getBasePtr(),
   1195                                    MemVT, LD->getMemOperand());
   1196     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
   1197 
   1198     DEBUG(dbgs() << "\nPromoting ";
   1199           N->dump(&DAG);
   1200           dbgs() << "\nTo: ";
   1201           Result.getNode()->dump(&DAG);
   1202           dbgs() << '\n');
   1203     WorklistRemover DeadNodes(*this);
   1204     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   1205     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
   1206     deleteAndRecombine(N);
   1207     AddToWorklist(Result.getNode());
   1208     return true;
   1209   }
   1210   return false;
   1211 }
   1212 
   1213 /// \brief Recursively delete a node which has no uses and any operands for
   1214 /// which it is the only use.
   1215 ///
   1216 /// Note that this both deletes the nodes and removes them from the worklist.
   1217 /// It also adds any nodes who have had a user deleted to the worklist as they
   1218 /// may now have only one use and subject to other combines.
   1219 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
   1220   if (!N->use_empty())
   1221     return false;
   1222 
   1223   SmallSetVector<SDNode *, 16> Nodes;
   1224   Nodes.insert(N);
   1225   do {
   1226     N = Nodes.pop_back_val();
   1227     if (!N)
   1228       continue;
   1229 
   1230     if (N->use_empty()) {
   1231       for (const SDValue &ChildN : N->op_values())
   1232         Nodes.insert(ChildN.getNode());
   1233 
   1234       removeFromWorklist(N);
   1235       DAG.DeleteNode(N);
   1236     } else {
   1237       AddToWorklist(N);
   1238     }
   1239   } while (!Nodes.empty());
   1240   return true;
   1241 }
   1242 
   1243 //===----------------------------------------------------------------------===//
   1244 //  Main DAG Combiner implementation
   1245 //===----------------------------------------------------------------------===//
   1246 
   1247 void DAGCombiner::Run(CombineLevel AtLevel) {
   1248   // set the instance variables, so that the various visit routines may use it.
   1249   Level = AtLevel;
   1250   LegalOperations = Level >= AfterLegalizeVectorOps;
   1251   LegalTypes = Level >= AfterLegalizeTypes;
   1252 
   1253   // Add all the dag nodes to the worklist.
   1254   for (SDNode &Node : DAG.allnodes())
   1255     AddToWorklist(&Node);
   1256 
   1257   // Create a dummy node (which is not added to allnodes), that adds a reference
   1258   // to the root node, preventing it from being deleted, and tracking any
   1259   // changes of the root.
   1260   HandleSDNode Dummy(DAG.getRoot());
   1261 
   1262   // while the worklist isn't empty, find a node and
   1263   // try and combine it.
   1264   while (!WorklistMap.empty()) {
   1265     SDNode *N;
   1266     // The Worklist holds the SDNodes in order, but it may contain null entries.
   1267     do {
   1268       N = Worklist.pop_back_val();
   1269     } while (!N);
   1270 
   1271     bool GoodWorklistEntry = WorklistMap.erase(N);
   1272     (void)GoodWorklistEntry;
   1273     assert(GoodWorklistEntry &&
   1274            "Found a worklist entry without a corresponding map entry!");
   1275 
   1276     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
   1277     // N is deleted from the DAG, since they too may now be dead or may have a
   1278     // reduced number of uses, allowing other xforms.
   1279     if (recursivelyDeleteUnusedNodes(N))
   1280       continue;
   1281 
   1282     WorklistRemover DeadNodes(*this);
   1283 
   1284     // If this combine is running after legalizing the DAG, re-legalize any
   1285     // nodes pulled off the worklist.
   1286     if (Level == AfterLegalizeDAG) {
   1287       SmallSetVector<SDNode *, 16> UpdatedNodes;
   1288       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
   1289 
   1290       for (SDNode *LN : UpdatedNodes) {
   1291         AddToWorklist(LN);
   1292         AddUsersToWorklist(LN);
   1293       }
   1294       if (!NIsValid)
   1295         continue;
   1296     }
   1297 
   1298     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
   1299 
   1300     // Add any operands of the new node which have not yet been combined to the
   1301     // worklist as well. Because the worklist uniques things already, this
   1302     // won't repeatedly process the same operand.
   1303     CombinedNodes.insert(N);
   1304     for (const SDValue &ChildN : N->op_values())
   1305       if (!CombinedNodes.count(ChildN.getNode()))
   1306         AddToWorklist(ChildN.getNode());
   1307 
   1308     SDValue RV = combine(N);
   1309 
   1310     if (!RV.getNode())
   1311       continue;
   1312 
   1313     ++NodesCombined;
   1314 
   1315     // If we get back the same node we passed in, rather than a new node or
   1316     // zero, we know that the node must have defined multiple values and
   1317     // CombineTo was used.  Since CombineTo takes care of the worklist
   1318     // mechanics for us, we have no work to do in this case.
   1319     if (RV.getNode() == N)
   1320       continue;
   1321 
   1322     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1323            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
   1324            "Node was deleted but visit returned new node!");
   1325 
   1326     DEBUG(dbgs() << " ... into: ";
   1327           RV.getNode()->dump(&DAG));
   1328 
   1329     // Transfer debug value.
   1330     DAG.TransferDbgValues(SDValue(N, 0), RV);
   1331     if (N->getNumValues() == RV.getNode()->getNumValues())
   1332       DAG.ReplaceAllUsesWith(N, RV.getNode());
   1333     else {
   1334       assert(N->getValueType(0) == RV.getValueType() &&
   1335              N->getNumValues() == 1 && "Type mismatch");
   1336       SDValue OpV = RV;
   1337       DAG.ReplaceAllUsesWith(N, &OpV);
   1338     }
   1339 
   1340     // Push the new node and any users onto the worklist
   1341     AddToWorklist(RV.getNode());
   1342     AddUsersToWorklist(RV.getNode());
   1343 
   1344     // Finally, if the node is now dead, remove it from the graph.  The node
   1345     // may not be dead if the replacement process recursively simplified to
   1346     // something else needing this node. This will also take care of adding any
   1347     // operands which have lost a user to the worklist.
   1348     recursivelyDeleteUnusedNodes(N);
   1349   }
   1350 
   1351   // If the root changed (e.g. it was a dead load, update the root).
   1352   DAG.setRoot(Dummy.getValue());
   1353   DAG.RemoveDeadNodes();
   1354 }
   1355 
   1356 SDValue DAGCombiner::visit(SDNode *N) {
   1357   switch (N->getOpcode()) {
   1358   default: break;
   1359   case ISD::TokenFactor:        return visitTokenFactor(N);
   1360   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   1361   case ISD::ADD:                return visitADD(N);
   1362   case ISD::SUB:                return visitSUB(N);
   1363   case ISD::ADDC:               return visitADDC(N);
   1364   case ISD::SUBC:               return visitSUBC(N);
   1365   case ISD::ADDE:               return visitADDE(N);
   1366   case ISD::SUBE:               return visitSUBE(N);
   1367   case ISD::MUL:                return visitMUL(N);
   1368   case ISD::SDIV:               return visitSDIV(N);
   1369   case ISD::UDIV:               return visitUDIV(N);
   1370   case ISD::SREM:
   1371   case ISD::UREM:               return visitREM(N);
   1372   case ISD::MULHU:              return visitMULHU(N);
   1373   case ISD::MULHS:              return visitMULHS(N);
   1374   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   1375   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   1376   case ISD::SMULO:              return visitSMULO(N);
   1377   case ISD::UMULO:              return visitUMULO(N);
   1378   case ISD::SMIN:
   1379   case ISD::SMAX:
   1380   case ISD::UMIN:
   1381   case ISD::UMAX:               return visitIMINMAX(N);
   1382   case ISD::AND:                return visitAND(N);
   1383   case ISD::OR:                 return visitOR(N);
   1384   case ISD::XOR:                return visitXOR(N);
   1385   case ISD::SHL:                return visitSHL(N);
   1386   case ISD::SRA:                return visitSRA(N);
   1387   case ISD::SRL:                return visitSRL(N);
   1388   case ISD::ROTR:
   1389   case ISD::ROTL:               return visitRotate(N);
   1390   case ISD::BSWAP:              return visitBSWAP(N);
   1391   case ISD::CTLZ:               return visitCTLZ(N);
   1392   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   1393   case ISD::CTTZ:               return visitCTTZ(N);
   1394   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   1395   case ISD::CTPOP:              return visitCTPOP(N);
   1396   case ISD::SELECT:             return visitSELECT(N);
   1397   case ISD::VSELECT:            return visitVSELECT(N);
   1398   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   1399   case ISD::SETCC:              return visitSETCC(N);
   1400   case ISD::SETCCE:             return visitSETCCE(N);
   1401   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   1402   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   1403   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   1404   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   1405   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   1406   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   1407   case ISD::BITCAST:            return visitBITCAST(N);
   1408   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   1409   case ISD::FADD:               return visitFADD(N);
   1410   case ISD::FSUB:               return visitFSUB(N);
   1411   case ISD::FMUL:               return visitFMUL(N);
   1412   case ISD::FMA:                return visitFMA(N);
   1413   case ISD::FDIV:               return visitFDIV(N);
   1414   case ISD::FREM:               return visitFREM(N);
   1415   case ISD::FSQRT:              return visitFSQRT(N);
   1416   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
   1417   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   1418   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   1419   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
   1420   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
   1421   case ISD::FP_ROUND:           return visitFP_ROUND(N);
   1422   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
   1423   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   1424   case ISD::FNEG:               return visitFNEG(N);
   1425   case ISD::FABS:               return visitFABS(N);
   1426   case ISD::FFLOOR:             return visitFFLOOR(N);
   1427   case ISD::FMINNUM:            return visitFMINNUM(N);
   1428   case ISD::FMAXNUM:            return visitFMAXNUM(N);
   1429   case ISD::FCEIL:              return visitFCEIL(N);
   1430   case ISD::FTRUNC:             return visitFTRUNC(N);
   1431   case ISD::BRCOND:             return visitBRCOND(N);
   1432   case ISD::BR_CC:              return visitBR_CC(N);
   1433   case ISD::LOAD:               return visitLOAD(N);
   1434   case ISD::STORE:              return visitSTORE(N);
   1435   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
   1436   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
   1437   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
   1438   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   1439   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   1440   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
   1441   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   1442   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   1443   case ISD::MGATHER:            return visitMGATHER(N);
   1444   case ISD::MLOAD:              return visitMLOAD(N);
   1445   case ISD::MSCATTER:           return visitMSCATTER(N);
   1446   case ISD::MSTORE:             return visitMSTORE(N);
   1447   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   1448   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   1449   }
   1450   return SDValue();
   1451 }
   1452 
   1453 SDValue DAGCombiner::combine(SDNode *N) {
   1454   SDValue RV = visit(N);
   1455 
   1456   // If nothing happened, try a target-specific DAG combine.
   1457   if (!RV.getNode()) {
   1458     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1459            "Node was deleted but visit returned NULL!");
   1460 
   1461     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
   1462         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
   1463 
   1464       // Expose the DAG combiner to the target combiner impls.
   1465       TargetLowering::DAGCombinerInfo
   1466         DagCombineInfo(DAG, Level, false, this);
   1467 
   1468       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
   1469     }
   1470   }
   1471 
   1472   // If nothing happened still, try promoting the operation.
   1473   if (!RV.getNode()) {
   1474     switch (N->getOpcode()) {
   1475     default: break;
   1476     case ISD::ADD:
   1477     case ISD::SUB:
   1478     case ISD::MUL:
   1479     case ISD::AND:
   1480     case ISD::OR:
   1481     case ISD::XOR:
   1482       RV = PromoteIntBinOp(SDValue(N, 0));
   1483       break;
   1484     case ISD::SHL:
   1485     case ISD::SRA:
   1486     case ISD::SRL:
   1487       RV = PromoteIntShiftOp(SDValue(N, 0));
   1488       break;
   1489     case ISD::SIGN_EXTEND:
   1490     case ISD::ZERO_EXTEND:
   1491     case ISD::ANY_EXTEND:
   1492       RV = PromoteExtend(SDValue(N, 0));
   1493       break;
   1494     case ISD::LOAD:
   1495       if (PromoteLoad(SDValue(N, 0)))
   1496         RV = SDValue(N, 0);
   1497       break;
   1498     }
   1499   }
   1500 
   1501   // If N is a commutative binary node, try commuting it to enable more
   1502   // sdisel CSE.
   1503   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
   1504       N->getNumValues() == 1) {
   1505     SDValue N0 = N->getOperand(0);
   1506     SDValue N1 = N->getOperand(1);
   1507 
   1508     // Constant operands are canonicalized to RHS.
   1509     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
   1510       SDValue Ops[] = {N1, N0};
   1511       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
   1512                                             N->getFlags());
   1513       if (CSENode)
   1514         return SDValue(CSENode, 0);
   1515     }
   1516   }
   1517 
   1518   return RV;
   1519 }
   1520 
   1521 /// Given a node, return its input chain if it has one, otherwise return a null
   1522 /// sd operand.
   1523 static SDValue getInputChainForNode(SDNode *N) {
   1524   if (unsigned NumOps = N->getNumOperands()) {
   1525     if (N->getOperand(0).getValueType() == MVT::Other)
   1526       return N->getOperand(0);
   1527     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
   1528       return N->getOperand(NumOps-1);
   1529     for (unsigned i = 1; i < NumOps-1; ++i)
   1530       if (N->getOperand(i).getValueType() == MVT::Other)
   1531         return N->getOperand(i);
   1532   }
   1533   return SDValue();
   1534 }
   1535 
   1536 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   1537   // If N has two operands, where one has an input chain equal to the other,
   1538   // the 'other' chain is redundant.
   1539   if (N->getNumOperands() == 2) {
   1540     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
   1541       return N->getOperand(0);
   1542     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
   1543       return N->getOperand(1);
   1544   }
   1545 
   1546   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   1547   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
   1548   SmallPtrSet<SDNode*, 16> SeenOps;
   1549   bool Changed = false;             // If we should replace this token factor.
   1550 
   1551   // Start out with this token factor.
   1552   TFs.push_back(N);
   1553 
   1554   // Iterate through token factors.  The TFs grows when new token factors are
   1555   // encountered.
   1556   for (unsigned i = 0; i < TFs.size(); ++i) {
   1557     SDNode *TF = TFs[i];
   1558 
   1559     // Check each of the operands.
   1560     for (const SDValue &Op : TF->op_values()) {
   1561 
   1562       switch (Op.getOpcode()) {
   1563       case ISD::EntryToken:
   1564         // Entry tokens don't need to be added to the list. They are
   1565         // redundant.
   1566         Changed = true;
   1567         break;
   1568 
   1569       case ISD::TokenFactor:
   1570         if (Op.hasOneUse() &&
   1571             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
   1572           // Queue up for processing.
   1573           TFs.push_back(Op.getNode());
   1574           // Clean up in case the token factor is removed.
   1575           AddToWorklist(Op.getNode());
   1576           Changed = true;
   1577           break;
   1578         }
   1579         // Fall thru
   1580 
   1581       default:
   1582         // Only add if it isn't already in the list.
   1583         if (SeenOps.insert(Op.getNode()).second)
   1584           Ops.push_back(Op);
   1585         else
   1586           Changed = true;
   1587         break;
   1588       }
   1589     }
   1590   }
   1591 
   1592   SDValue Result;
   1593 
   1594   // If we've changed things around then replace token factor.
   1595   if (Changed) {
   1596     if (Ops.empty()) {
   1597       // The entry token is the only possible outcome.
   1598       Result = DAG.getEntryNode();
   1599     } else {
   1600       // New and improved token factor.
   1601       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
   1602     }
   1603 
   1604     // Add users to worklist if AA is enabled, since it may introduce
   1605     // a lot of new chained token factors while removing memory deps.
   1606     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   1607       : DAG.getSubtarget().useAA();
   1608     return CombineTo(N, Result, UseAA /*add to worklist*/);
   1609   }
   1610 
   1611   return Result;
   1612 }
   1613 
   1614 /// MERGE_VALUES can always be eliminated.
   1615 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   1616   WorklistRemover DeadNodes(*this);
   1617   // Replacing results may cause a different MERGE_VALUES to suddenly
   1618   // be CSE'd with N, and carry its uses with it. Iterate until no
   1619   // uses remain, to ensure that the node can be safely deleted.
   1620   // First add the users of this node to the work list so that they
   1621   // can be tried again once they have new operands.
   1622   AddUsersToWorklist(N);
   1623   do {
   1624     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1625       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
   1626   } while (!N->use_empty());
   1627   deleteAndRecombine(N);
   1628   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   1629 }
   1630 
   1631 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a
   1632 /// ContantSDNode pointer else nullptr.
   1633 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
   1634   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
   1635   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
   1636 }
   1637 
   1638 SDValue DAGCombiner::visitADD(SDNode *N) {
   1639   SDValue N0 = N->getOperand(0);
   1640   SDValue N1 = N->getOperand(1);
   1641   EVT VT = N0.getValueType();
   1642 
   1643   // fold vector ops
   1644   if (VT.isVector()) {
   1645     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1646       return FoldedVOp;
   1647 
   1648     // fold (add x, 0) -> x, vector edition
   1649     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1650       return N0;
   1651     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   1652       return N1;
   1653   }
   1654 
   1655   // fold (add x, undef) -> undef
   1656   if (N0.getOpcode() == ISD::UNDEF)
   1657     return N0;
   1658   if (N1.getOpcode() == ISD::UNDEF)
   1659     return N1;
   1660   // fold (add c1, c2) -> c1+c2
   1661   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   1662   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   1663   if (N0C && N1C)
   1664     return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C);
   1665   // canonicalize constant to RHS
   1666   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   1667      !isConstantIntBuildVectorOrConstantInt(N1))
   1668     return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   1669   // fold (add x, 0) -> x
   1670   if (isNullConstant(N1))
   1671     return N0;
   1672   // fold (add Sym, c) -> Sym+c
   1673   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1674     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C &&
   1675         GA->getOpcode() == ISD::GlobalAddress)
   1676       return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1677                                   GA->getOffset() +
   1678                                     (uint64_t)N1C->getSExtValue());
   1679   // fold ((c1-A)+c2) -> (c1+c2)-A
   1680   if (N1C && N0.getOpcode() == ISD::SUB)
   1681     if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   1682       SDLoc DL(N);
   1683       return DAG.getNode(ISD::SUB, DL, VT,
   1684                          DAG.getConstant(N1C->getAPIntValue()+
   1685                                          N0C->getAPIntValue(), DL, VT),
   1686                          N0.getOperand(1));
   1687     }
   1688   // reassociate add
   1689   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
   1690     return RADD;
   1691   // fold ((0-A) + B) -> B-A
   1692   if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
   1693     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   1694   // fold (A + (0-B)) -> A-B
   1695   if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
   1696     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   1697   // fold (A+(B-A)) -> B
   1698   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
   1699     return N1.getOperand(0);
   1700   // fold ((B-A)+A) -> B
   1701   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
   1702     return N0.getOperand(0);
   1703   // fold (A+(B-(A+C))) to (B-C)
   1704   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1705       N0 == N1.getOperand(1).getOperand(0))
   1706     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1707                        N1.getOperand(1).getOperand(1));
   1708   // fold (A+(B-(C+A))) to (B-C)
   1709   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1710       N0 == N1.getOperand(1).getOperand(1))
   1711     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1712                        N1.getOperand(1).getOperand(0));
   1713   // fold (A+((B-A)+or-C)) to (B+or-C)
   1714   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
   1715       N1.getOperand(0).getOpcode() == ISD::SUB &&
   1716       N0 == N1.getOperand(0).getOperand(1))
   1717     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
   1718                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
   1719 
   1720   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   1721   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
   1722     SDValue N00 = N0.getOperand(0);
   1723     SDValue N01 = N0.getOperand(1);
   1724     SDValue N10 = N1.getOperand(0);
   1725     SDValue N11 = N1.getOperand(1);
   1726 
   1727     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
   1728       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1729                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
   1730                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   1731   }
   1732 
   1733   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
   1734     return SDValue(N, 0);
   1735 
   1736   // fold (a+b) -> (a|b) iff a and b share no bits.
   1737   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
   1738       VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
   1739     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
   1740 
   1741   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   1742   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
   1743       isNullConstant(N1.getOperand(0).getOperand(0)))
   1744     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
   1745                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1746                                    N1.getOperand(0).getOperand(1),
   1747                                    N1.getOperand(1)));
   1748   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
   1749       isNullConstant(N0.getOperand(0).getOperand(0)))
   1750     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
   1751                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1752                                    N0.getOperand(0).getOperand(1),
   1753                                    N0.getOperand(1)));
   1754 
   1755   if (N1.getOpcode() == ISD::AND) {
   1756     SDValue AndOp0 = N1.getOperand(0);
   1757     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
   1758     unsigned DestBits = VT.getScalarType().getSizeInBits();
   1759 
   1760     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
   1761     // and similar xforms where the inner op is either ~0 or 0.
   1762     if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
   1763       SDLoc DL(N);
   1764       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
   1765     }
   1766   }
   1767 
   1768   // add (sext i1), X -> sub X, (zext i1)
   1769   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
   1770       N0.getOperand(0).getValueType() == MVT::i1 &&
   1771       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
   1772     SDLoc DL(N);
   1773     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
   1774     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   1775   }
   1776 
   1777   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
   1778   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1779     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1780     if (TN->getVT() == MVT::i1) {
   1781       SDLoc DL(N);
   1782       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1783                                  DAG.getConstant(1, DL, VT));
   1784       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
   1785     }
   1786   }
   1787 
   1788   return SDValue();
   1789 }
   1790 
   1791 SDValue DAGCombiner::visitADDC(SDNode *N) {
   1792   SDValue N0 = N->getOperand(0);
   1793   SDValue N1 = N->getOperand(1);
   1794   EVT VT = N0.getValueType();
   1795 
   1796   // If the flag result is dead, turn this into an ADD.
   1797   if (!N->hasAnyUseOfValue(1))
   1798     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
   1799                      DAG.getNode(ISD::CARRY_FALSE,
   1800                                  SDLoc(N), MVT::Glue));
   1801 
   1802   // canonicalize constant to RHS.
   1803   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1804   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1805   if (N0C && !N1C)
   1806     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
   1807 
   1808   // fold (addc x, 0) -> x + no carry out
   1809   if (isNullConstant(N1))
   1810     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
   1811                                         SDLoc(N), MVT::Glue));
   1812 
   1813   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   1814   APInt LHSZero, LHSOne;
   1815   APInt RHSZero, RHSOne;
   1816   DAG.computeKnownBits(N0, LHSZero, LHSOne);
   1817 
   1818   if (LHSZero.getBoolValue()) {
   1819     DAG.computeKnownBits(N1, RHSZero, RHSOne);
   1820 
   1821     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
   1822     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
   1823     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
   1824       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
   1825                        DAG.getNode(ISD::CARRY_FALSE,
   1826                                    SDLoc(N), MVT::Glue));
   1827   }
   1828 
   1829   return SDValue();
   1830 }
   1831 
   1832 SDValue DAGCombiner::visitADDE(SDNode *N) {
   1833   SDValue N0 = N->getOperand(0);
   1834   SDValue N1 = N->getOperand(1);
   1835   SDValue CarryIn = N->getOperand(2);
   1836 
   1837   // canonicalize constant to RHS
   1838   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1839   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1840   if (N0C && !N1C)
   1841     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
   1842                        N1, N0, CarryIn);
   1843 
   1844   // fold (adde x, y, false) -> (addc x, y)
   1845   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1846     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
   1847 
   1848   return SDValue();
   1849 }
   1850 
   1851 // Since it may not be valid to emit a fold to zero for vector initializers
   1852 // check if we can before folding.
   1853 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT,
   1854                              SelectionDAG &DAG,
   1855                              bool LegalOperations, bool LegalTypes) {
   1856   if (!VT.isVector())
   1857     return DAG.getConstant(0, DL, VT);
   1858   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   1859     return DAG.getConstant(0, DL, VT);
   1860   return SDValue();
   1861 }
   1862 
   1863 SDValue DAGCombiner::visitSUB(SDNode *N) {
   1864   SDValue N0 = N->getOperand(0);
   1865   SDValue N1 = N->getOperand(1);
   1866   EVT VT = N0.getValueType();
   1867 
   1868   // fold vector ops
   1869   if (VT.isVector()) {
   1870     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1871       return FoldedVOp;
   1872 
   1873     // fold (sub x, 0) -> x, vector edition
   1874     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1875       return N0;
   1876   }
   1877 
   1878   // fold (sub x, x) -> 0
   1879   // FIXME: Refactor this and xor and other similar operations together.
   1880   if (N0 == N1)
   1881     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   1882   // fold (sub c1, c2) -> c1-c2
   1883   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   1884   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   1885   if (N0C && N1C)
   1886     return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C);
   1887   // fold (sub x, c) -> (add x, -c)
   1888   if (N1C) {
   1889     SDLoc DL(N);
   1890     return DAG.getNode(ISD::ADD, DL, VT, N0,
   1891                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   1892   }
   1893   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   1894   if (isAllOnesConstant(N0))
   1895     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   1896   // fold A-(A-B) -> B
   1897   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
   1898     return N1.getOperand(1);
   1899   // fold (A+B)-A -> B
   1900   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
   1901     return N0.getOperand(1);
   1902   // fold (A+B)-B -> A
   1903   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
   1904     return N0.getOperand(0);
   1905   // fold C2-(A+C1) -> (C2-C1)-A
   1906   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
   1907     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   1908   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
   1909     SDLoc DL(N);
   1910     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
   1911                                    DL, VT);
   1912     return DAG.getNode(ISD::SUB, DL, VT, NewC,
   1913                        N1.getOperand(0));
   1914   }
   1915   // fold ((A+(B+or-C))-B) -> A+or-C
   1916   if (N0.getOpcode() == ISD::ADD &&
   1917       (N0.getOperand(1).getOpcode() == ISD::SUB ||
   1918        N0.getOperand(1).getOpcode() == ISD::ADD) &&
   1919       N0.getOperand(1).getOperand(0) == N1)
   1920     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
   1921                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
   1922   // fold ((A+(C+B))-B) -> A+C
   1923   if (N0.getOpcode() == ISD::ADD &&
   1924       N0.getOperand(1).getOpcode() == ISD::ADD &&
   1925       N0.getOperand(1).getOperand(1) == N1)
   1926     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   1927                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1928   // fold ((A-(B-C))-C) -> A-B
   1929   if (N0.getOpcode() == ISD::SUB &&
   1930       N0.getOperand(1).getOpcode() == ISD::SUB &&
   1931       N0.getOperand(1).getOperand(1) == N1)
   1932     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1933                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1934 
   1935   // If either operand of a sub is undef, the result is undef
   1936   if (N0.getOpcode() == ISD::UNDEF)
   1937     return N0;
   1938   if (N1.getOpcode() == ISD::UNDEF)
   1939     return N1;
   1940 
   1941   // If the relocation model supports it, consider symbol offsets.
   1942   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1943     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
   1944       // fold (sub Sym, c) -> Sym-c
   1945       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
   1946         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1947                                     GA->getOffset() -
   1948                                       (uint64_t)N1C->getSExtValue());
   1949       // fold (sub Sym+c1, Sym+c2) -> c1-c2
   1950       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
   1951         if (GA->getGlobal() == GB->getGlobal())
   1952           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
   1953                                  SDLoc(N), VT);
   1954     }
   1955 
   1956   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   1957   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1958     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1959     if (TN->getVT() == MVT::i1) {
   1960       SDLoc DL(N);
   1961       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1962                                  DAG.getConstant(1, DL, VT));
   1963       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
   1964     }
   1965   }
   1966 
   1967   return SDValue();
   1968 }
   1969 
   1970 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   1971   SDValue N0 = N->getOperand(0);
   1972   SDValue N1 = N->getOperand(1);
   1973   EVT VT = N0.getValueType();
   1974   SDLoc DL(N);
   1975 
   1976   // If the flag result is dead, turn this into an SUB.
   1977   if (!N->hasAnyUseOfValue(1))
   1978     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
   1979                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1980 
   1981   // fold (subc x, x) -> 0 + no borrow
   1982   if (N0 == N1)
   1983     return CombineTo(N, DAG.getConstant(0, DL, VT),
   1984                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1985 
   1986   // fold (subc x, 0) -> x + no borrow
   1987   if (isNullConstant(N1))
   1988     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1989 
   1990   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   1991   if (isAllOnesConstant(N0))
   1992     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
   1993                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1994 
   1995   return SDValue();
   1996 }
   1997 
   1998 SDValue DAGCombiner::visitSUBE(SDNode *N) {
   1999   SDValue N0 = N->getOperand(0);
   2000   SDValue N1 = N->getOperand(1);
   2001   SDValue CarryIn = N->getOperand(2);
   2002 
   2003   // fold (sube x, y, false) -> (subc x, y)
   2004   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   2005     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
   2006 
   2007   return SDValue();
   2008 }
   2009 
   2010 SDValue DAGCombiner::visitMUL(SDNode *N) {
   2011   SDValue N0 = N->getOperand(0);
   2012   SDValue N1 = N->getOperand(1);
   2013   EVT VT = N0.getValueType();
   2014 
   2015   // fold (mul x, undef) -> 0
   2016   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2017     return DAG.getConstant(0, SDLoc(N), VT);
   2018 
   2019   bool N0IsConst = false;
   2020   bool N1IsConst = false;
   2021   bool N1IsOpaqueConst = false;
   2022   bool N0IsOpaqueConst = false;
   2023   APInt ConstValue0, ConstValue1;
   2024   // fold vector ops
   2025   if (VT.isVector()) {
   2026     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2027       return FoldedVOp;
   2028 
   2029     N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0);
   2030     N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1);
   2031   } else {
   2032     N0IsConst = isa<ConstantSDNode>(N0);
   2033     if (N0IsConst) {
   2034       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
   2035       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
   2036     }
   2037     N1IsConst = isa<ConstantSDNode>(N1);
   2038     if (N1IsConst) {
   2039       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   2040       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
   2041     }
   2042   }
   2043 
   2044   // fold (mul c1, c2) -> c1*c2
   2045   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
   2046     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
   2047                                       N0.getNode(), N1.getNode());
   2048 
   2049   // canonicalize constant to RHS (vector doesn't have to splat)
   2050   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   2051      !isConstantIntBuildVectorOrConstantInt(N1))
   2052     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   2053   // fold (mul x, 0) -> 0
   2054   if (N1IsConst && ConstValue1 == 0)
   2055     return N1;
   2056   // We require a splat of the entire scalar bit width for non-contiguous
   2057   // bit patterns.
   2058   bool IsFullSplat =
   2059     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
   2060   // fold (mul x, 1) -> x
   2061   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
   2062     return N0;
   2063   // fold (mul x, -1) -> 0-x
   2064   if (N1IsConst && ConstValue1.isAllOnesValue()) {
   2065     SDLoc DL(N);
   2066     return DAG.getNode(ISD::SUB, DL, VT,
   2067                        DAG.getConstant(0, DL, VT), N0);
   2068   }
   2069   // fold (mul x, (1 << c)) -> x << c
   2070   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
   2071       IsFullSplat) {
   2072     SDLoc DL(N);
   2073     return DAG.getNode(ISD::SHL, DL, VT, N0,
   2074                        DAG.getConstant(ConstValue1.logBase2(), DL,
   2075                                        getShiftAmountTy(N0.getValueType())));
   2076   }
   2077   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   2078   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
   2079       IsFullSplat) {
   2080     unsigned Log2Val = (-ConstValue1).logBase2();
   2081     SDLoc DL(N);
   2082     // FIXME: If the input is something that is easily negated (e.g. a
   2083     // single-use add), we should put the negate there.
   2084     return DAG.getNode(ISD::SUB, DL, VT,
   2085                        DAG.getConstant(0, DL, VT),
   2086                        DAG.getNode(ISD::SHL, DL, VT, N0,
   2087                             DAG.getConstant(Log2Val, DL,
   2088                                       getShiftAmountTy(N0.getValueType()))));
   2089   }
   2090 
   2091   APInt Val;
   2092   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   2093   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
   2094       (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2095                      isa<ConstantSDNode>(N0.getOperand(1)))) {
   2096     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT,
   2097                              N1, N0.getOperand(1));
   2098     AddToWorklist(C3.getNode());
   2099     return DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2100                        N0.getOperand(0), C3);
   2101   }
   2102 
   2103   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   2104   // use.
   2105   {
   2106     SDValue Sh(nullptr,0), Y(nullptr,0);
   2107     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
   2108     if (N0.getOpcode() == ISD::SHL &&
   2109         (isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2110                        isa<ConstantSDNode>(N0.getOperand(1))) &&
   2111         N0.getNode()->hasOneUse()) {
   2112       Sh = N0; Y = N1;
   2113     } else if (N1.getOpcode() == ISD::SHL &&
   2114                isa<ConstantSDNode>(N1.getOperand(1)) &&
   2115                N1.getNode()->hasOneUse()) {
   2116       Sh = N1; Y = N0;
   2117     }
   2118 
   2119     if (Sh.getNode()) {
   2120       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT,
   2121                                 Sh.getOperand(0), Y);
   2122       return DAG.getNode(ISD::SHL, SDLoc(N), VT,
   2123                          Mul, Sh.getOperand(1));
   2124     }
   2125   }
   2126 
   2127   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
   2128   if (isConstantIntBuildVectorOrConstantInt(N1) &&
   2129       N0.getOpcode() == ISD::ADD &&
   2130       isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
   2131       isMulAddWithConstProfitable(N, N0, N1))
   2132       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   2133                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
   2134                                      N0.getOperand(0), N1),
   2135                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
   2136                                      N0.getOperand(1), N1));
   2137 
   2138   // reassociate mul
   2139   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
   2140     return RMUL;
   2141 
   2142   return SDValue();
   2143 }
   2144 
   2145 /// Return true if divmod libcall is available.
   2146 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
   2147                                      const TargetLowering &TLI) {
   2148   RTLIB::Libcall LC;
   2149   switch (Node->getSimpleValueType(0).SimpleTy) {
   2150   default: return false; // No libcall for vector types.
   2151   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   2152   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   2153   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
   2154   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
   2155   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   2156   }
   2157 
   2158   return TLI.getLibcallName(LC) != nullptr;
   2159 }
   2160 
   2161 /// Issue divrem if both quotient and remainder are needed.
   2162 SDValue DAGCombiner::useDivRem(SDNode *Node) {
   2163   if (Node->use_empty())
   2164     return SDValue(); // This is a dead node, leave it alone.
   2165 
   2166   EVT VT = Node->getValueType(0);
   2167   if (!TLI.isTypeLegal(VT))
   2168     return SDValue();
   2169 
   2170   unsigned Opcode = Node->getOpcode();
   2171   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
   2172 
   2173   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
   2174   // If DIVREM is going to get expanded into a libcall,
   2175   // but there is no libcall available, then don't combine.
   2176   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
   2177       !isDivRemLibcallAvailable(Node, isSigned, TLI))
   2178     return SDValue();
   2179 
   2180   // If div is legal, it's better to do the normal expansion
   2181   unsigned OtherOpcode = 0;
   2182   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
   2183     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
   2184     if (TLI.isOperationLegalOrCustom(Opcode, VT))
   2185       return SDValue();
   2186   } else {
   2187     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2188     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
   2189       return SDValue();
   2190   }
   2191 
   2192   SDValue Op0 = Node->getOperand(0);
   2193   SDValue Op1 = Node->getOperand(1);
   2194   SDValue combined;
   2195   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
   2196          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
   2197     SDNode *User = *UI;
   2198     if (User == Node || User->use_empty())
   2199       continue;
   2200     // Convert the other matching node(s), too;
   2201     // otherwise, the DIVREM may get target-legalized into something
   2202     // target-specific that we won't be able to recognize.
   2203     unsigned UserOpc = User->getOpcode();
   2204     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
   2205         User->getOperand(0) == Op0 &&
   2206         User->getOperand(1) == Op1) {
   2207       if (!combined) {
   2208         if (UserOpc == OtherOpcode) {
   2209           SDVTList VTs = DAG.getVTList(VT, VT);
   2210           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
   2211         } else if (UserOpc == DivRemOpc) {
   2212           combined = SDValue(User, 0);
   2213         } else {
   2214           assert(UserOpc == Opcode);
   2215           continue;
   2216         }
   2217       }
   2218       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
   2219         CombineTo(User, combined);
   2220       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
   2221         CombineTo(User, combined.getValue(1));
   2222     }
   2223   }
   2224   return combined;
   2225 }
   2226 
   2227 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   2228   SDValue N0 = N->getOperand(0);
   2229   SDValue N1 = N->getOperand(1);
   2230   EVT VT = N->getValueType(0);
   2231 
   2232   // fold vector ops
   2233   if (VT.isVector())
   2234     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2235       return FoldedVOp;
   2236 
   2237   SDLoc DL(N);
   2238 
   2239   // fold (sdiv c1, c2) -> c1/c2
   2240   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2241   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2242   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
   2243     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
   2244   // fold (sdiv X, 1) -> X
   2245   if (N1C && N1C->isOne())
   2246     return N0;
   2247   // fold (sdiv X, -1) -> 0-X
   2248   if (N1C && N1C->isAllOnesValue())
   2249     return DAG.getNode(ISD::SUB, DL, VT,
   2250                        DAG.getConstant(0, DL, VT), N0);
   2251 
   2252   // If we know the sign bits of both operands are zero, strength reduce to a
   2253   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   2254   if (!VT.isVector()) {
   2255     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2256       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
   2257   }
   2258 
   2259   // fold (sdiv X, pow2) -> simple ops after legalize
   2260   // FIXME: We check for the exact bit here because the generic lowering gives
   2261   // better results in that case. The target-specific lowering should learn how
   2262   // to handle exact sdivs efficiently.
   2263   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2264       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
   2265       (N1C->getAPIntValue().isPowerOf2() ||
   2266        (-N1C->getAPIntValue()).isPowerOf2())) {
   2267     // Target-specific implementation of sdiv x, pow2.
   2268     if (SDValue Res = BuildSDIVPow2(N))
   2269       return Res;
   2270 
   2271     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
   2272 
   2273     // Splat the sign bit into the register
   2274     SDValue SGN =
   2275         DAG.getNode(ISD::SRA, DL, VT, N0,
   2276                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
   2277                                     getShiftAmountTy(N0.getValueType())));
   2278     AddToWorklist(SGN.getNode());
   2279 
   2280     // Add (N0 < 0) ? abs2 - 1 : 0;
   2281     SDValue SRL =
   2282         DAG.getNode(ISD::SRL, DL, VT, SGN,
   2283                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
   2284                                     getShiftAmountTy(SGN.getValueType())));
   2285     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
   2286     AddToWorklist(SRL.getNode());
   2287     AddToWorklist(ADD.getNode());    // Divide by pow2
   2288     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
   2289                   DAG.getConstant(lg2, DL,
   2290                                   getShiftAmountTy(ADD.getValueType())));
   2291 
   2292     // If we're dividing by a positive value, we're done.  Otherwise, we must
   2293     // negate the result.
   2294     if (N1C->getAPIntValue().isNonNegative())
   2295       return SRA;
   2296 
   2297     AddToWorklist(SRA.getNode());
   2298     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
   2299   }
   2300 
   2301   // If integer divide is expensive and we satisfy the requirements, emit an
   2302   // alternate sequence.  Targets may check function attributes for size/speed
   2303   // trade-offs.
   2304   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2305   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2306     if (SDValue Op = BuildSDIV(N))
   2307       return Op;
   2308 
   2309   // sdiv, srem -> sdivrem
   2310   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2311   // Otherwise, we break the simplification logic in visitREM().
   2312   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2313     if (SDValue DivRem = useDivRem(N))
   2314         return DivRem;
   2315 
   2316   // undef / X -> 0
   2317   if (N0.getOpcode() == ISD::UNDEF)
   2318     return DAG.getConstant(0, DL, VT);
   2319   // X / undef -> undef
   2320   if (N1.getOpcode() == ISD::UNDEF)
   2321     return N1;
   2322 
   2323   return SDValue();
   2324 }
   2325 
   2326 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   2327   SDValue N0 = N->getOperand(0);
   2328   SDValue N1 = N->getOperand(1);
   2329   EVT VT = N->getValueType(0);
   2330 
   2331   // fold vector ops
   2332   if (VT.isVector())
   2333     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2334       return FoldedVOp;
   2335 
   2336   SDLoc DL(N);
   2337 
   2338   // fold (udiv c1, c2) -> c1/c2
   2339   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2340   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2341   if (N0C && N1C)
   2342     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
   2343                                                     N0C, N1C))
   2344       return Folded;
   2345   // fold (udiv x, (1 << c)) -> x >>u c
   2346   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
   2347     return DAG.getNode(ISD::SRL, DL, VT, N0,
   2348                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
   2349                                        getShiftAmountTy(N0.getValueType())));
   2350 
   2351   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   2352   if (N1.getOpcode() == ISD::SHL) {
   2353     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
   2354       if (SHC->getAPIntValue().isPowerOf2()) {
   2355         EVT ADDVT = N1.getOperand(1).getValueType();
   2356         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
   2357                                   N1.getOperand(1),
   2358                                   DAG.getConstant(SHC->getAPIntValue()
   2359                                                                   .logBase2(),
   2360                                                   DL, ADDVT));
   2361         AddToWorklist(Add.getNode());
   2362         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
   2363       }
   2364     }
   2365   }
   2366 
   2367   // fold (udiv x, c) -> alternate
   2368   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2369   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2370     if (SDValue Op = BuildUDIV(N))
   2371       return Op;
   2372 
   2373   // sdiv, srem -> sdivrem
   2374   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2375   // Otherwise, we break the simplification logic in visitREM().
   2376   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2377     if (SDValue DivRem = useDivRem(N))
   2378         return DivRem;
   2379 
   2380   // undef / X -> 0
   2381   if (N0.getOpcode() == ISD::UNDEF)
   2382     return DAG.getConstant(0, DL, VT);
   2383   // X / undef -> undef
   2384   if (N1.getOpcode() == ISD::UNDEF)
   2385     return N1;
   2386 
   2387   return SDValue();
   2388 }
   2389 
   2390 // handles ISD::SREM and ISD::UREM
   2391 SDValue DAGCombiner::visitREM(SDNode *N) {
   2392   unsigned Opcode = N->getOpcode();
   2393   SDValue N0 = N->getOperand(0);
   2394   SDValue N1 = N->getOperand(1);
   2395   EVT VT = N->getValueType(0);
   2396   bool isSigned = (Opcode == ISD::SREM);
   2397   SDLoc DL(N);
   2398 
   2399   // fold (rem c1, c2) -> c1%c2
   2400   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2401   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2402   if (N0C && N1C)
   2403     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
   2404       return Folded;
   2405 
   2406   if (isSigned) {
   2407     // If we know the sign bits of both operands are zero, strength reduce to a
   2408     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   2409     if (!VT.isVector()) {
   2410       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2411         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   2412     }
   2413   } else {
   2414     // fold (urem x, pow2) -> (and x, pow2-1)
   2415     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2416         N1C->getAPIntValue().isPowerOf2()) {
   2417       return DAG.getNode(ISD::AND, DL, VT, N0,
   2418                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
   2419     }
   2420     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   2421     if (N1.getOpcode() == ISD::SHL) {
   2422       if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
   2423         if (SHC->getAPIntValue().isPowerOf2()) {
   2424           SDValue Add =
   2425             DAG.getNode(ISD::ADD, DL, VT, N1,
   2426                  DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL,
   2427                                  VT));
   2428           AddToWorklist(Add.getNode());
   2429           return DAG.getNode(ISD::AND, DL, VT, N0, Add);
   2430         }
   2431       }
   2432     }
   2433   }
   2434 
   2435   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2436 
   2437   // If X/C can be simplified by the division-by-constant logic, lower
   2438   // X%C to the equivalent of X-X/C*C.
   2439   // To avoid mangling nodes, this simplification requires that the combine()
   2440   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
   2441   // against this by skipping the simplification if isIntDivCheap().  When
   2442   // div is not cheap, combine will not return a DIVREM.  Regardless,
   2443   // checking cheapness here makes sense since the simplification results in
   2444   // fatter code.
   2445   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
   2446     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2447     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
   2448     AddToWorklist(Div.getNode());
   2449     SDValue OptimizedDiv = combine(Div.getNode());
   2450     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
   2451       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
   2452              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
   2453       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
   2454       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
   2455       AddToWorklist(Mul.getNode());
   2456       return Sub;
   2457     }
   2458   }
   2459 
   2460   // sdiv, srem -> sdivrem
   2461   if (SDValue DivRem = useDivRem(N))
   2462     return DivRem.getValue(1);
   2463 
   2464   // undef % X -> 0
   2465   if (N0.getOpcode() == ISD::UNDEF)
   2466     return DAG.getConstant(0, DL, VT);
   2467   // X % undef -> undef
   2468   if (N1.getOpcode() == ISD::UNDEF)
   2469     return N1;
   2470 
   2471   return SDValue();
   2472 }
   2473 
   2474 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   2475   SDValue N0 = N->getOperand(0);
   2476   SDValue N1 = N->getOperand(1);
   2477   EVT VT = N->getValueType(0);
   2478   SDLoc DL(N);
   2479 
   2480   // fold (mulhs x, 0) -> 0
   2481   if (isNullConstant(N1))
   2482     return N1;
   2483   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   2484   if (isOneConstant(N1)) {
   2485     SDLoc DL(N);
   2486     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
   2487                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
   2488                                        DL,
   2489                                        getShiftAmountTy(N0.getValueType())));
   2490   }
   2491   // fold (mulhs x, undef) -> 0
   2492   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2493     return DAG.getConstant(0, SDLoc(N), VT);
   2494 
   2495   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   2496   // plus a shift.
   2497   if (VT.isSimple() && !VT.isVector()) {
   2498     MVT Simple = VT.getSimpleVT();
   2499     unsigned SimpleSize = Simple.getSizeInBits();
   2500     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2501     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2502       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
   2503       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
   2504       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2505       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2506             DAG.getConstant(SimpleSize, DL,
   2507                             getShiftAmountTy(N1.getValueType())));
   2508       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2509     }
   2510   }
   2511 
   2512   return SDValue();
   2513 }
   2514 
   2515 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   2516   SDValue N0 = N->getOperand(0);
   2517   SDValue N1 = N->getOperand(1);
   2518   EVT VT = N->getValueType(0);
   2519   SDLoc DL(N);
   2520 
   2521   // fold (mulhu x, 0) -> 0
   2522   if (isNullConstant(N1))
   2523     return N1;
   2524   // fold (mulhu x, 1) -> 0
   2525   if (isOneConstant(N1))
   2526     return DAG.getConstant(0, DL, N0.getValueType());
   2527   // fold (mulhu x, undef) -> 0
   2528   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2529     return DAG.getConstant(0, DL, VT);
   2530 
   2531   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2532   // plus a shift.
   2533   if (VT.isSimple() && !VT.isVector()) {
   2534     MVT Simple = VT.getSimpleVT();
   2535     unsigned SimpleSize = Simple.getSizeInBits();
   2536     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2537     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2538       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
   2539       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
   2540       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2541       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2542             DAG.getConstant(SimpleSize, DL,
   2543                             getShiftAmountTy(N1.getValueType())));
   2544       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2545     }
   2546   }
   2547 
   2548   return SDValue();
   2549 }
   2550 
   2551 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
   2552 /// give the opcodes for the two computations that are being performed. Return
   2553 /// true if a simplification was made.
   2554 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
   2555                                                 unsigned HiOp) {
   2556   // If the high half is not needed, just compute the low half.
   2557   bool HiExists = N->hasAnyUseOfValue(1);
   2558   if (!HiExists &&
   2559       (!LegalOperations ||
   2560        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
   2561     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2562     return CombineTo(N, Res, Res);
   2563   }
   2564 
   2565   // If the low half is not needed, just compute the high half.
   2566   bool LoExists = N->hasAnyUseOfValue(0);
   2567   if (!LoExists &&
   2568       (!LegalOperations ||
   2569        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
   2570     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2571     return CombineTo(N, Res, Res);
   2572   }
   2573 
   2574   // If both halves are used, return as it is.
   2575   if (LoExists && HiExists)
   2576     return SDValue();
   2577 
   2578   // If the two computed results can be simplified separately, separate them.
   2579   if (LoExists) {
   2580     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2581     AddToWorklist(Lo.getNode());
   2582     SDValue LoOpt = combine(Lo.getNode());
   2583     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
   2584         (!LegalOperations ||
   2585          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
   2586       return CombineTo(N, LoOpt, LoOpt);
   2587   }
   2588 
   2589   if (HiExists) {
   2590     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2591     AddToWorklist(Hi.getNode());
   2592     SDValue HiOpt = combine(Hi.getNode());
   2593     if (HiOpt.getNode() && HiOpt != Hi &&
   2594         (!LegalOperations ||
   2595          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
   2596       return CombineTo(N, HiOpt, HiOpt);
   2597   }
   2598 
   2599   return SDValue();
   2600 }
   2601 
   2602 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   2603   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
   2604     return Res;
   2605 
   2606   EVT VT = N->getValueType(0);
   2607   SDLoc DL(N);
   2608 
   2609   // If the type is twice as wide is legal, transform the mulhu to a wider
   2610   // multiply plus a shift.
   2611   if (VT.isSimple() && !VT.isVector()) {
   2612     MVT Simple = VT.getSimpleVT();
   2613     unsigned SimpleSize = Simple.getSizeInBits();
   2614     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2615     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2616       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
   2617       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
   2618       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2619       // Compute the high part as N1.
   2620       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2621             DAG.getConstant(SimpleSize, DL,
   2622                             getShiftAmountTy(Lo.getValueType())));
   2623       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2624       // Compute the low part as N0.
   2625       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2626       return CombineTo(N, Lo, Hi);
   2627     }
   2628   }
   2629 
   2630   return SDValue();
   2631 }
   2632 
   2633 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   2634   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
   2635     return Res;
   2636 
   2637   EVT VT = N->getValueType(0);
   2638   SDLoc DL(N);
   2639 
   2640   // If the type is twice as wide is legal, transform the mulhu to a wider
   2641   // multiply plus a shift.
   2642   if (VT.isSimple() && !VT.isVector()) {
   2643     MVT Simple = VT.getSimpleVT();
   2644     unsigned SimpleSize = Simple.getSizeInBits();
   2645     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2646     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2647       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
   2648       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
   2649       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2650       // Compute the high part as N1.
   2651       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2652             DAG.getConstant(SimpleSize, DL,
   2653                             getShiftAmountTy(Lo.getValueType())));
   2654       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2655       // Compute the low part as N0.
   2656       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2657       return CombineTo(N, Lo, Hi);
   2658     }
   2659   }
   2660 
   2661   return SDValue();
   2662 }
   2663 
   2664 SDValue DAGCombiner::visitSMULO(SDNode *N) {
   2665   // (smulo x, 2) -> (saddo x, x)
   2666   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2667     if (C2->getAPIntValue() == 2)
   2668       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
   2669                          N->getOperand(0), N->getOperand(0));
   2670 
   2671   return SDValue();
   2672 }
   2673 
   2674 SDValue DAGCombiner::visitUMULO(SDNode *N) {
   2675   // (umulo x, 2) -> (uaddo x, x)
   2676   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2677     if (C2->getAPIntValue() == 2)
   2678       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
   2679                          N->getOperand(0), N->getOperand(0));
   2680 
   2681   return SDValue();
   2682 }
   2683 
   2684 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   2685   SDValue N0 = N->getOperand(0);
   2686   SDValue N1 = N->getOperand(1);
   2687   EVT VT = N0.getValueType();
   2688 
   2689   // fold vector ops
   2690   if (VT.isVector())
   2691     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2692       return FoldedVOp;
   2693 
   2694   // fold (add c1, c2) -> c1+c2
   2695   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   2696   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   2697   if (N0C && N1C)
   2698     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
   2699 
   2700   // canonicalize constant to RHS
   2701   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   2702      !isConstantIntBuildVectorOrConstantInt(N1))
   2703     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   2704 
   2705   return SDValue();
   2706 }
   2707 
   2708 /// If this is a binary operator with two operands of the same opcode, try to
   2709 /// simplify it.
   2710 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   2711   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2712   EVT VT = N0.getValueType();
   2713   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
   2714 
   2715   // Bail early if none of these transforms apply.
   2716   if (N0.getNode()->getNumOperands() == 0) return SDValue();
   2717 
   2718   // For each of OP in AND/OR/XOR:
   2719   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   2720   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   2721   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   2722   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
   2723   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   2724   //
   2725   // do not sink logical op inside of a vector extend, since it may combine
   2726   // into a vsetcc.
   2727   EVT Op0VT = N0.getOperand(0).getValueType();
   2728   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
   2729        N0.getOpcode() == ISD::SIGN_EXTEND ||
   2730        N0.getOpcode() == ISD::BSWAP ||
   2731        // Avoid infinite looping with PromoteIntBinOp.
   2732        (N0.getOpcode() == ISD::ANY_EXTEND &&
   2733         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
   2734        (N0.getOpcode() == ISD::TRUNCATE &&
   2735         (!TLI.isZExtFree(VT, Op0VT) ||
   2736          !TLI.isTruncateFree(Op0VT, VT)) &&
   2737         TLI.isTypeLegal(Op0VT))) &&
   2738       !VT.isVector() &&
   2739       Op0VT == N1.getOperand(0).getValueType() &&
   2740       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
   2741     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2742                                  N0.getOperand(0).getValueType(),
   2743                                  N0.getOperand(0), N1.getOperand(0));
   2744     AddToWorklist(ORNode.getNode());
   2745     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
   2746   }
   2747 
   2748   // For each of OP in SHL/SRL/SRA/AND...
   2749   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
   2750   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
   2751   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
   2752   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
   2753        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
   2754       N0.getOperand(1) == N1.getOperand(1)) {
   2755     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2756                                  N0.getOperand(0).getValueType(),
   2757                                  N0.getOperand(0), N1.getOperand(0));
   2758     AddToWorklist(ORNode.getNode());
   2759     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   2760                        ORNode, N0.getOperand(1));
   2761   }
   2762 
   2763   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   2764   // Only perform this optimization after type legalization and before
   2765   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
   2766   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
   2767   // we don't want to undo this promotion.
   2768   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   2769   // on scalars.
   2770   if ((N0.getOpcode() == ISD::BITCAST ||
   2771        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
   2772       Level == AfterLegalizeTypes) {
   2773     SDValue In0 = N0.getOperand(0);
   2774     SDValue In1 = N1.getOperand(0);
   2775     EVT In0Ty = In0.getValueType();
   2776     EVT In1Ty = In1.getValueType();
   2777     SDLoc DL(N);
   2778     // If both incoming values are integers, and the original types are the
   2779     // same.
   2780     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
   2781       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
   2782       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
   2783       AddToWorklist(Op.getNode());
   2784       return BC;
   2785     }
   2786   }
   2787 
   2788   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
   2789   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
   2790   // If both shuffles use the same mask, and both shuffle within a single
   2791   // vector, then it is worthwhile to move the swizzle after the operation.
   2792   // The type-legalizer generates this pattern when loading illegal
   2793   // vector types from memory. In many cases this allows additional shuffle
   2794   // optimizations.
   2795   // There are other cases where moving the shuffle after the xor/and/or
   2796   // is profitable even if shuffles don't perform a swizzle.
   2797   // If both shuffles use the same mask, and both shuffles have the same first
   2798   // or second operand, then it might still be profitable to move the shuffle
   2799   // after the xor/and/or operation.
   2800   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
   2801     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
   2802     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
   2803 
   2804     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
   2805            "Inputs to shuffles are not the same type");
   2806 
   2807     // Check that both shuffles use the same mask. The masks are known to be of
   2808     // the same length because the result vector type is the same.
   2809     // Check also that shuffles have only one use to avoid introducing extra
   2810     // instructions.
   2811     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
   2812         SVN0->getMask().equals(SVN1->getMask())) {
   2813       SDValue ShOp = N0->getOperand(1);
   2814 
   2815       // Don't try to fold this node if it requires introducing a
   2816       // build vector of all zeros that might be illegal at this stage.
   2817       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2818         if (!LegalTypes)
   2819           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2820         else
   2821           ShOp = SDValue();
   2822       }
   2823 
   2824       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
   2825       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
   2826       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
   2827       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
   2828         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2829                                       N0->getOperand(0), N1->getOperand(0));
   2830         AddToWorklist(NewNode.getNode());
   2831         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
   2832                                     &SVN0->getMask()[0]);
   2833       }
   2834 
   2835       // Don't try to fold this node if it requires introducing a
   2836       // build vector of all zeros that might be illegal at this stage.
   2837       ShOp = N0->getOperand(0);
   2838       if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) {
   2839         if (!LegalTypes)
   2840           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2841         else
   2842           ShOp = SDValue();
   2843       }
   2844 
   2845       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
   2846       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
   2847       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
   2848       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
   2849         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2850                                       N0->getOperand(1), N1->getOperand(1));
   2851         AddToWorklist(NewNode.getNode());
   2852         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
   2853                                     &SVN0->getMask()[0]);
   2854       }
   2855     }
   2856   }
   2857 
   2858   return SDValue();
   2859 }
   2860 
   2861 /// This contains all DAGCombine rules which reduce two values combined by
   2862 /// an And operation to a single value. This makes them reusable in the context
   2863 /// of visitSELECT(). Rules involving constants are not included as
   2864 /// visitSELECT() already handles those cases.
   2865 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
   2866                                   SDNode *LocReference) {
   2867   EVT VT = N1.getValueType();
   2868 
   2869   // fold (and x, undef) -> 0
   2870   if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
   2871     return DAG.getConstant(0, SDLoc(LocReference), VT);
   2872   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   2873   SDValue LL, LR, RL, RR, CC0, CC1;
   2874   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   2875     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   2876     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   2877 
   2878     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
   2879         LL.getValueType().isInteger()) {
   2880       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
   2881       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
   2882         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2883                                      LR.getValueType(), LL, RL);
   2884         AddToWorklist(ORNode.getNode());
   2885         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2886       }
   2887       if (isAllOnesConstant(LR)) {
   2888         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
   2889         if (Op1 == ISD::SETEQ) {
   2890           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
   2891                                         LR.getValueType(), LL, RL);
   2892           AddToWorklist(ANDNode.getNode());
   2893           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   2894         }
   2895         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
   2896         if (Op1 == ISD::SETGT) {
   2897           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2898                                        LR.getValueType(), LL, RL);
   2899           AddToWorklist(ORNode.getNode());
   2900           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2901         }
   2902       }
   2903     }
   2904     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
   2905     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
   2906         Op0 == Op1 && LL.getValueType().isInteger() &&
   2907       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
   2908                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
   2909       SDLoc DL(N0);
   2910       SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
   2911                                     LL, DAG.getConstant(1, DL,
   2912                                                         LL.getValueType()));
   2913       AddToWorklist(ADDNode.getNode());
   2914       return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
   2915                           DAG.getConstant(2, DL, LL.getValueType()),
   2916                           ISD::SETUGE);
   2917     }
   2918     // canonicalize equivalent to ll == rl
   2919     if (LL == RR && LR == RL) {
   2920       Op1 = ISD::getSetCCSwappedOperands(Op1);
   2921       std::swap(RL, RR);
   2922     }
   2923     if (LL == RL && LR == RR) {
   2924       bool isInteger = LL.getValueType().isInteger();
   2925       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
   2926       if (Result != ISD::SETCC_INVALID &&
   2927           (!LegalOperations ||
   2928            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   2929             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   2930         EVT CCVT = getSetCCResultType(LL.getValueType());
   2931         if (N0.getValueType() == CCVT ||
   2932             (!LegalOperations && N0.getValueType() == MVT::i1))
   2933           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   2934                               LL, LR, Result);
   2935       }
   2936     }
   2937   }
   2938 
   2939   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
   2940       VT.getSizeInBits() <= 64) {
   2941     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   2942       APInt ADDC = ADDI->getAPIntValue();
   2943       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2944         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
   2945         // immediate for an add, but it is legal if its top c2 bits are set,
   2946         // transform the ADD so the immediate doesn't need to be materialized
   2947         // in a register.
   2948         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
   2949           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
   2950                                              SRLI->getZExtValue());
   2951           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
   2952             ADDC |= Mask;
   2953             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2954               SDLoc DL(N0);
   2955               SDValue NewAdd =
   2956                 DAG.getNode(ISD::ADD, DL, VT,
   2957                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
   2958               CombineTo(N0.getNode(), NewAdd);
   2959               // Return N so it doesn't get rechecked!
   2960               return SDValue(LocReference, 0);
   2961             }
   2962           }
   2963         }
   2964       }
   2965     }
   2966   }
   2967 
   2968   return SDValue();
   2969 }
   2970 
   2971 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
   2972                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
   2973                                    bool &NarrowLoad) {
   2974   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
   2975 
   2976   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
   2977     return false;
   2978 
   2979   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   2980   LoadedVT = LoadN->getMemoryVT();
   2981 
   2982   if (ExtVT == LoadedVT &&
   2983       (!LegalOperations ||
   2984        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
   2985     // ZEXTLOAD will match without needing to change the size of the value being
   2986     // loaded.
   2987     NarrowLoad = false;
   2988     return true;
   2989   }
   2990 
   2991   // Do not change the width of a volatile load.
   2992   if (LoadN->isVolatile())
   2993     return false;
   2994 
   2995   // Do not generate loads of non-round integer types since these can
   2996   // be expensive (and would be wrong if the type is not byte sized).
   2997   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
   2998     return false;
   2999 
   3000   if (LegalOperations &&
   3001       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
   3002     return false;
   3003 
   3004   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
   3005     return false;
   3006 
   3007   NarrowLoad = true;
   3008   return true;
   3009 }
   3010 
   3011 SDValue DAGCombiner::visitAND(SDNode *N) {
   3012   SDValue N0 = N->getOperand(0);
   3013   SDValue N1 = N->getOperand(1);
   3014   EVT VT = N1.getValueType();
   3015 
   3016   // fold vector ops
   3017   if (VT.isVector()) {
   3018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3019       return FoldedVOp;
   3020 
   3021     // fold (and x, 0) -> 0, vector edition
   3022     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3023       // do not return N0, because undef node may exist in N0
   3024       return DAG.getConstant(
   3025           APInt::getNullValue(
   3026               N0.getValueType().getScalarType().getSizeInBits()),
   3027           SDLoc(N), N0.getValueType());
   3028     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3029       // do not return N1, because undef node may exist in N1
   3030       return DAG.getConstant(
   3031           APInt::getNullValue(
   3032               N1.getValueType().getScalarType().getSizeInBits()),
   3033           SDLoc(N), N1.getValueType());
   3034 
   3035     // fold (and x, -1) -> x, vector edition
   3036     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3037       return N1;
   3038     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3039       return N0;
   3040   }
   3041 
   3042   // fold (and c1, c2) -> c1&c2
   3043   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3044   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3045   if (N0C && N1C && !N1C->isOpaque())
   3046     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   3047   // canonicalize constant to RHS
   3048   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   3049      !isConstantIntBuildVectorOrConstantInt(N1))
   3050     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   3051   // fold (and x, -1) -> x
   3052   if (isAllOnesConstant(N1))
   3053     return N0;
   3054   // if (and x, c) is known to be zero, return 0
   3055   unsigned BitWidth = VT.getScalarType().getSizeInBits();
   3056   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   3057                                    APInt::getAllOnesValue(BitWidth)))
   3058     return DAG.getConstant(0, SDLoc(N), VT);
   3059   // reassociate and
   3060   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
   3061     return RAND;
   3062   // fold (and (or x, C), D) -> D if (C & D) == D
   3063   if (N1C && N0.getOpcode() == ISD::OR)
   3064     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   3065       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
   3066         return N1;
   3067   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   3068   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   3069     SDValue N0Op0 = N0.getOperand(0);
   3070     APInt Mask = ~N1C->getAPIntValue();
   3071     Mask = Mask.trunc(N0Op0.getValueSizeInBits());
   3072     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
   3073       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   3074                                  N0.getValueType(), N0Op0);
   3075 
   3076       // Replace uses of the AND with uses of the Zero extend node.
   3077       CombineTo(N, Zext);
   3078 
   3079       // We actually want to replace all uses of the any_extend with the
   3080       // zero_extend, to avoid duplicating things.  This will later cause this
   3081       // AND to be folded.
   3082       CombineTo(N0.getNode(), Zext);
   3083       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3084     }
   3085   }
   3086   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   3087   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   3088   // already be zero by virtue of the width of the base type of the load.
   3089   //
   3090   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   3091   // more cases.
   3092   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   3093        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
   3094        N0.getOperand(0).getOpcode() == ISD::LOAD) ||
   3095       N0.getOpcode() == ISD::LOAD) {
   3096     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
   3097                                          N0 : N0.getOperand(0) );
   3098 
   3099     // Get the constant (if applicable) the zero'th operand is being ANDed with.
   3100     // This can be a pure constant or a vector splat, in which case we treat the
   3101     // vector as a scalar and use the splat value.
   3102     APInt Constant = APInt::getNullValue(1);
   3103     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   3104       Constant = C->getAPIntValue();
   3105     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
   3106       APInt SplatValue, SplatUndef;
   3107       unsigned SplatBitSize;
   3108       bool HasAnyUndefs;
   3109       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
   3110                                              SplatBitSize, HasAnyUndefs);
   3111       if (IsSplat) {
   3112         // Undef bits can contribute to a possible optimisation if set, so
   3113         // set them.
   3114         SplatValue |= SplatUndef;
   3115 
   3116         // The splat value may be something like "0x00FFFFFF", which means 0 for
   3117         // the first vector value and FF for the rest, repeating. We need a mask
   3118         // that will apply equally to all members of the vector, so AND all the
   3119         // lanes of the constant together.
   3120         EVT VT = Vector->getValueType(0);
   3121         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
   3122 
   3123         // If the splat value has been compressed to a bitlength lower
   3124         // than the size of the vector lane, we need to re-expand it to
   3125         // the lane size.
   3126         if (BitWidth > SplatBitSize)
   3127           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
   3128                SplatBitSize < BitWidth;
   3129                SplatBitSize = SplatBitSize * 2)
   3130             SplatValue |= SplatValue.shl(SplatBitSize);
   3131 
   3132         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
   3133         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
   3134         if (SplatBitSize % BitWidth == 0) {
   3135           Constant = APInt::getAllOnesValue(BitWidth);
   3136           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
   3137             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
   3138         }
   3139       }
   3140     }
   3141 
   3142     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
   3143     // actually legal and isn't going to get expanded, else this is a false
   3144     // optimisation.
   3145     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
   3146                                                     Load->getValueType(0),
   3147                                                     Load->getMemoryVT());
   3148 
   3149     // Resize the constant to the same size as the original memory access before
   3150     // extension. If it is still the AllOnesValue then this AND is completely
   3151     // unneeded.
   3152     Constant =
   3153       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
   3154 
   3155     bool B;
   3156     switch (Load->getExtensionType()) {
   3157     default: B = false; break;
   3158     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
   3159     case ISD::ZEXTLOAD:
   3160     case ISD::NON_EXTLOAD: B = true; break;
   3161     }
   3162 
   3163     if (B && Constant.isAllOnesValue()) {
   3164       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
   3165       // preserve semantics once we get rid of the AND.
   3166       SDValue NewLoad(Load, 0);
   3167       if (Load->getExtensionType() == ISD::EXTLOAD) {
   3168         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
   3169                               Load->getValueType(0), SDLoc(Load),
   3170                               Load->getChain(), Load->getBasePtr(),
   3171                               Load->getOffset(), Load->getMemoryVT(),
   3172                               Load->getMemOperand());
   3173         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
   3174         if (Load->getNumValues() == 3) {
   3175           // PRE/POST_INC loads have 3 values.
   3176           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
   3177                            NewLoad.getValue(2) };
   3178           CombineTo(Load, To, 3, true);
   3179         } else {
   3180           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
   3181         }
   3182       }
   3183 
   3184       // Fold the AND away, taking care not to fold to the old load node if we
   3185       // replaced it.
   3186       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
   3187 
   3188       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   3189     }
   3190   }
   3191 
   3192   // fold (and (load x), 255) -> (zextload x, i8)
   3193   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   3194   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
   3195   if (N1C && (N0.getOpcode() == ISD::LOAD ||
   3196               (N0.getOpcode() == ISD::ANY_EXTEND &&
   3197                N0.getOperand(0).getOpcode() == ISD::LOAD))) {
   3198     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
   3199     LoadSDNode *LN0 = HasAnyExt
   3200       ? cast<LoadSDNode>(N0.getOperand(0))
   3201       : cast<LoadSDNode>(N0);
   3202     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
   3203         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
   3204       auto NarrowLoad = false;
   3205       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
   3206       EVT ExtVT, LoadedVT;
   3207       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
   3208                            NarrowLoad)) {
   3209         if (!NarrowLoad) {
   3210           SDValue NewLoad =
   3211             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3212                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
   3213                            LN0->getMemOperand());
   3214           AddToWorklist(N);
   3215           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
   3216           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3217         } else {
   3218           EVT PtrType = LN0->getOperand(1).getValueType();
   3219 
   3220           unsigned Alignment = LN0->getAlignment();
   3221           SDValue NewPtr = LN0->getBasePtr();
   3222 
   3223           // For big endian targets, we need to add an offset to the pointer
   3224           // to load the correct bytes.  For little endian systems, we merely
   3225           // need to read fewer bytes from the same pointer.
   3226           if (DAG.getDataLayout().isBigEndian()) {
   3227             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
   3228             unsigned EVTStoreBytes = ExtVT.getStoreSize();
   3229             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
   3230             SDLoc DL(LN0);
   3231             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
   3232                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
   3233             Alignment = MinAlign(Alignment, PtrOff);
   3234           }
   3235 
   3236           AddToWorklist(NewPtr.getNode());
   3237 
   3238           SDValue Load =
   3239             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3240                            LN0->getChain(), NewPtr,
   3241                            LN0->getPointerInfo(),
   3242                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   3243                            LN0->isInvariant(), Alignment, LN0->getAAInfo());
   3244           AddToWorklist(N);
   3245           CombineTo(LN0, Load, Load.getValue(1));
   3246           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3247         }
   3248       }
   3249     }
   3250   }
   3251 
   3252   if (SDValue Combined = visitANDLike(N0, N1, N))
   3253     return Combined;
   3254 
   3255   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   3256   if (N0.getOpcode() == N1.getOpcode())
   3257     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3258       return Tmp;
   3259 
   3260   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   3261   // fold (and (sra)) -> (and (srl)) when possible.
   3262   if (!VT.isVector() &&
   3263       SimplifyDemandedBits(SDValue(N, 0)))
   3264     return SDValue(N, 0);
   3265 
   3266   // fold (zext_inreg (extload x)) -> (zextload x)
   3267   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
   3268     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3269     EVT MemVT = LN0->getMemoryVT();
   3270     // If we zero all the possible extended bits, then we can turn this into
   3271     // a zextload if we are running before legalize or the operation is legal.
   3272     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3273     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3274                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3275         ((!LegalOperations && !LN0->isVolatile()) ||
   3276          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3277       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3278                                        LN0->getChain(), LN0->getBasePtr(),
   3279                                        MemVT, LN0->getMemOperand());
   3280       AddToWorklist(N);
   3281       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3282       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3283     }
   3284   }
   3285   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
   3286   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   3287       N0.hasOneUse()) {
   3288     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3289     EVT MemVT = LN0->getMemoryVT();
   3290     // If we zero all the possible extended bits, then we can turn this into
   3291     // a zextload if we are running before legalize or the operation is legal.
   3292     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3293     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3294                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3295         ((!LegalOperations && !LN0->isVolatile()) ||
   3296          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3297       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3298                                        LN0->getChain(), LN0->getBasePtr(),
   3299                                        MemVT, LN0->getMemOperand());
   3300       AddToWorklist(N);
   3301       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3302       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3303     }
   3304   }
   3305   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   3306   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
   3307     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   3308                                        N0.getOperand(1), false);
   3309     if (BSwap.getNode())
   3310       return BSwap;
   3311   }
   3312 
   3313   return SDValue();
   3314 }
   3315 
   3316 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
   3317 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   3318                                         bool DemandHighBits) {
   3319   if (!LegalOperations)
   3320     return SDValue();
   3321 
   3322   EVT VT = N->getValueType(0);
   3323   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
   3324     return SDValue();
   3325   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3326     return SDValue();
   3327 
   3328   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
   3329   bool LookPassAnd0 = false;
   3330   bool LookPassAnd1 = false;
   3331   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
   3332       std::swap(N0, N1);
   3333   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
   3334       std::swap(N0, N1);
   3335   if (N0.getOpcode() == ISD::AND) {
   3336     if (!N0.getNode()->hasOneUse())
   3337       return SDValue();
   3338     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3339     if (!N01C || N01C->getZExtValue() != 0xFF00)
   3340       return SDValue();
   3341     N0 = N0.getOperand(0);
   3342     LookPassAnd0 = true;
   3343   }
   3344 
   3345   if (N1.getOpcode() == ISD::AND) {
   3346     if (!N1.getNode()->hasOneUse())
   3347       return SDValue();
   3348     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3349     if (!N11C || N11C->getZExtValue() != 0xFF)
   3350       return SDValue();
   3351     N1 = N1.getOperand(0);
   3352     LookPassAnd1 = true;
   3353   }
   3354 
   3355   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
   3356     std::swap(N0, N1);
   3357   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
   3358     return SDValue();
   3359   if (!N0.getNode()->hasOneUse() ||
   3360       !N1.getNode()->hasOneUse())
   3361     return SDValue();
   3362 
   3363   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3364   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3365   if (!N01C || !N11C)
   3366     return SDValue();
   3367   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
   3368     return SDValue();
   3369 
   3370   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
   3371   SDValue N00 = N0->getOperand(0);
   3372   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
   3373     if (!N00.getNode()->hasOneUse())
   3374       return SDValue();
   3375     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
   3376     if (!N001C || N001C->getZExtValue() != 0xFF)
   3377       return SDValue();
   3378     N00 = N00.getOperand(0);
   3379     LookPassAnd0 = true;
   3380   }
   3381 
   3382   SDValue N10 = N1->getOperand(0);
   3383   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
   3384     if (!N10.getNode()->hasOneUse())
   3385       return SDValue();
   3386     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
   3387     if (!N101C || N101C->getZExtValue() != 0xFF00)
   3388       return SDValue();
   3389     N10 = N10.getOperand(0);
   3390     LookPassAnd1 = true;
   3391   }
   3392 
   3393   if (N00 != N10)
   3394     return SDValue();
   3395 
   3396   // Make sure everything beyond the low halfword gets set to zero since the SRL
   3397   // 16 will clear the top bits.
   3398   unsigned OpSizeInBits = VT.getSizeInBits();
   3399   if (DemandHighBits && OpSizeInBits > 16) {
   3400     // If the left-shift isn't masked out then the only way this is a bswap is
   3401     // if all bits beyond the low 8 are 0. In that case the entire pattern
   3402     // reduces to a left shift anyway: leave it for other parts of the combiner.
   3403     if (!LookPassAnd0)
   3404       return SDValue();
   3405 
   3406     // However, if the right shift isn't masked out then it might be because
   3407     // it's not needed. See if we can spot that too.
   3408     if (!LookPassAnd1 &&
   3409         !DAG.MaskedValueIsZero(
   3410             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
   3411       return SDValue();
   3412   }
   3413 
   3414   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
   3415   if (OpSizeInBits > 16) {
   3416     SDLoc DL(N);
   3417     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
   3418                       DAG.getConstant(OpSizeInBits - 16, DL,
   3419                                       getShiftAmountTy(VT)));
   3420   }
   3421   return Res;
   3422 }
   3423 
   3424 /// Return true if the specified node is an element that makes up a 32-bit
   3425 /// packed halfword byteswap.
   3426 /// ((x & 0x000000ff) << 8) |
   3427 /// ((x & 0x0000ff00) >> 8) |
   3428 /// ((x & 0x00ff0000) << 8) |
   3429 /// ((x & 0xff000000) >> 8)
   3430 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
   3431   if (!N.getNode()->hasOneUse())
   3432     return false;
   3433 
   3434   unsigned Opc = N.getOpcode();
   3435   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
   3436     return false;
   3437 
   3438   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3439   if (!N1C)
   3440     return false;
   3441 
   3442   unsigned Num;
   3443   switch (N1C->getZExtValue()) {
   3444   default:
   3445     return false;
   3446   case 0xFF:       Num = 0; break;
   3447   case 0xFF00:     Num = 1; break;
   3448   case 0xFF0000:   Num = 2; break;
   3449   case 0xFF000000: Num = 3; break;
   3450   }
   3451 
   3452   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
   3453   SDValue N0 = N.getOperand(0);
   3454   if (Opc == ISD::AND) {
   3455     if (Num == 0 || Num == 2) {
   3456       // (x >> 8) & 0xff
   3457       // (x >> 8) & 0xff0000
   3458       if (N0.getOpcode() != ISD::SRL)
   3459         return false;
   3460       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3461       if (!C || C->getZExtValue() != 8)
   3462         return false;
   3463     } else {
   3464       // (x << 8) & 0xff00
   3465       // (x << 8) & 0xff000000
   3466       if (N0.getOpcode() != ISD::SHL)
   3467         return false;
   3468       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3469       if (!C || C->getZExtValue() != 8)
   3470         return false;
   3471     }
   3472   } else if (Opc == ISD::SHL) {
   3473     // (x & 0xff) << 8
   3474     // (x & 0xff0000) << 8
   3475     if (Num != 0 && Num != 2)
   3476       return false;
   3477     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3478     if (!C || C->getZExtValue() != 8)
   3479       return false;
   3480   } else { // Opc == ISD::SRL
   3481     // (x & 0xff00) >> 8
   3482     // (x & 0xff000000) >> 8
   3483     if (Num != 1 && Num != 3)
   3484       return false;
   3485     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3486     if (!C || C->getZExtValue() != 8)
   3487       return false;
   3488   }
   3489 
   3490   if (Parts[Num])
   3491     return false;
   3492 
   3493   Parts[Num] = N0.getOperand(0).getNode();
   3494   return true;
   3495 }
   3496 
   3497 /// Match a 32-bit packed halfword bswap. That is
   3498 /// ((x & 0x000000ff) << 8) |
   3499 /// ((x & 0x0000ff00) >> 8) |
   3500 /// ((x & 0x00ff0000) << 8) |
   3501 /// ((x & 0xff000000) >> 8)
   3502 /// => (rotl (bswap x), 16)
   3503 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   3504   if (!LegalOperations)
   3505     return SDValue();
   3506 
   3507   EVT VT = N->getValueType(0);
   3508   if (VT != MVT::i32)
   3509     return SDValue();
   3510   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3511     return SDValue();
   3512 
   3513   // Look for either
   3514   // (or (or (and), (and)), (or (and), (and)))
   3515   // (or (or (or (and), (and)), (and)), (and))
   3516   if (N0.getOpcode() != ISD::OR)
   3517     return SDValue();
   3518   SDValue N00 = N0.getOperand(0);
   3519   SDValue N01 = N0.getOperand(1);
   3520   SDNode *Parts[4] = {};
   3521 
   3522   if (N1.getOpcode() == ISD::OR &&
   3523       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
   3524     // (or (or (and), (and)), (or (and), (and)))
   3525     SDValue N000 = N00.getOperand(0);
   3526     if (!isBSwapHWordElement(N000, Parts))
   3527       return SDValue();
   3528 
   3529     SDValue N001 = N00.getOperand(1);
   3530     if (!isBSwapHWordElement(N001, Parts))
   3531       return SDValue();
   3532     SDValue N010 = N01.getOperand(0);
   3533     if (!isBSwapHWordElement(N010, Parts))
   3534       return SDValue();
   3535     SDValue N011 = N01.getOperand(1);
   3536     if (!isBSwapHWordElement(N011, Parts))
   3537       return SDValue();
   3538   } else {
   3539     // (or (or (or (and), (and)), (and)), (and))
   3540     if (!isBSwapHWordElement(N1, Parts))
   3541       return SDValue();
   3542     if (!isBSwapHWordElement(N01, Parts))
   3543       return SDValue();
   3544     if (N00.getOpcode() != ISD::OR)
   3545       return SDValue();
   3546     SDValue N000 = N00.getOperand(0);
   3547     if (!isBSwapHWordElement(N000, Parts))
   3548       return SDValue();
   3549     SDValue N001 = N00.getOperand(1);
   3550     if (!isBSwapHWordElement(N001, Parts))
   3551       return SDValue();
   3552   }
   3553 
   3554   // Make sure the parts are all coming from the same node.
   3555   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
   3556     return SDValue();
   3557 
   3558   SDLoc DL(N);
   3559   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
   3560                               SDValue(Parts[0], 0));
   3561 
   3562   // Result of the bswap should be rotated by 16. If it's not legal, then
   3563   // do  (x << 16) | (x >> 16).
   3564   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   3565   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
   3566     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   3567   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
   3568     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
   3569   return DAG.getNode(ISD::OR, DL, VT,
   3570                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
   3571                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
   3572 }
   3573 
   3574 /// This contains all DAGCombine rules which reduce two values combined by
   3575 /// an Or operation to a single value \see visitANDLike().
   3576 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
   3577   EVT VT = N1.getValueType();
   3578   // fold (or x, undef) -> -1
   3579   if (!LegalOperations &&
   3580       (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) {
   3581     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   3582     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
   3583                            SDLoc(LocReference), VT);
   3584   }
   3585   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   3586   SDValue LL, LR, RL, RR, CC0, CC1;
   3587   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   3588     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   3589     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   3590 
   3591     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
   3592       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
   3593       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
   3594       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
   3595         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
   3596                                      LR.getValueType(), LL, RL);
   3597         AddToWorklist(ORNode.getNode());
   3598         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   3599       }
   3600       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
   3601       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
   3602       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
   3603         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
   3604                                       LR.getValueType(), LL, RL);
   3605         AddToWorklist(ANDNode.getNode());
   3606         return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   3607       }
   3608     }
   3609     // canonicalize equivalent to ll == rl
   3610     if (LL == RR && LR == RL) {
   3611       Op1 = ISD::getSetCCSwappedOperands(Op1);
   3612       std::swap(RL, RR);
   3613     }
   3614     if (LL == RL && LR == RR) {
   3615       bool isInteger = LL.getValueType().isInteger();
   3616       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
   3617       if (Result != ISD::SETCC_INVALID &&
   3618           (!LegalOperations ||
   3619            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   3620             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   3621         EVT CCVT = getSetCCResultType(LL.getValueType());
   3622         if (N0.getValueType() == CCVT ||
   3623             (!LegalOperations && N0.getValueType() == MVT::i1))
   3624           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   3625                               LL, LR, Result);
   3626       }
   3627     }
   3628   }
   3629 
   3630   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
   3631   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
   3632       // Don't increase # computations.
   3633       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3634     // We can only do this xform if we know that bits from X that are set in C2
   3635     // but not in C1 are already zero.  Likewise for Y.
   3636     if (const ConstantSDNode *N0O1C =
   3637         getAsNonOpaqueConstant(N0.getOperand(1))) {
   3638       if (const ConstantSDNode *N1O1C =
   3639           getAsNonOpaqueConstant(N1.getOperand(1))) {
   3640         // We can only do this xform if we know that bits from X that are set in
   3641         // C2 but not in C1 are already zero.  Likewise for Y.
   3642         const APInt &LHSMask = N0O1C->getAPIntValue();
   3643         const APInt &RHSMask = N1O1C->getAPIntValue();
   3644 
   3645         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
   3646             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
   3647           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3648                                   N0.getOperand(0), N1.getOperand(0));
   3649           SDLoc DL(LocReference);
   3650           return DAG.getNode(ISD::AND, DL, VT, X,
   3651                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
   3652         }
   3653       }
   3654     }
   3655   }
   3656 
   3657   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
   3658   if (N0.getOpcode() == ISD::AND &&
   3659       N1.getOpcode() == ISD::AND &&
   3660       N0.getOperand(0) == N1.getOperand(0) &&
   3661       // Don't increase # computations.
   3662       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3663     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3664                             N0.getOperand(1), N1.getOperand(1));
   3665     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
   3666   }
   3667 
   3668   return SDValue();
   3669 }
   3670 
   3671 SDValue DAGCombiner::visitOR(SDNode *N) {
   3672   SDValue N0 = N->getOperand(0);
   3673   SDValue N1 = N->getOperand(1);
   3674   EVT VT = N1.getValueType();
   3675 
   3676   // fold vector ops
   3677   if (VT.isVector()) {
   3678     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3679       return FoldedVOp;
   3680 
   3681     // fold (or x, 0) -> x, vector edition
   3682     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3683       return N1;
   3684     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3685       return N0;
   3686 
   3687     // fold (or x, -1) -> -1, vector edition
   3688     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3689       // do not return N0, because undef node may exist in N0
   3690       return DAG.getConstant(
   3691           APInt::getAllOnesValue(
   3692               N0.getValueType().getScalarType().getSizeInBits()),
   3693           SDLoc(N), N0.getValueType());
   3694     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3695       // do not return N1, because undef node may exist in N1
   3696       return DAG.getConstant(
   3697           APInt::getAllOnesValue(
   3698               N1.getValueType().getScalarType().getSizeInBits()),
   3699           SDLoc(N), N1.getValueType());
   3700 
   3701     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1)
   3702     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2)
   3703     // Do this only if the resulting shuffle is legal.
   3704     if (isa<ShuffleVectorSDNode>(N0) &&
   3705         isa<ShuffleVectorSDNode>(N1) &&
   3706         // Avoid folding a node with illegal type.
   3707         TLI.isTypeLegal(VT) &&
   3708         N0->getOperand(1) == N1->getOperand(1) &&
   3709         ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) {
   3710       bool CanFold = true;
   3711       unsigned NumElts = VT.getVectorNumElements();
   3712       const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
   3713       const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
   3714       // We construct two shuffle masks:
   3715       // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand
   3716       // and N1 as the second operand.
   3717       // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand
   3718       // and N0 as the second operand.
   3719       // We do this because OR is commutable and therefore there might be
   3720       // two ways to fold this node into a shuffle.
   3721       SmallVector<int,4> Mask1;
   3722       SmallVector<int,4> Mask2;
   3723 
   3724       for (unsigned i = 0; i != NumElts && CanFold; ++i) {
   3725         int M0 = SV0->getMaskElt(i);
   3726         int M1 = SV1->getMaskElt(i);
   3727 
   3728         // Both shuffle indexes are undef. Propagate Undef.
   3729         if (M0 < 0 && M1 < 0) {
   3730           Mask1.push_back(M0);
   3731           Mask2.push_back(M0);
   3732           continue;
   3733         }
   3734 
   3735         if (M0 < 0 || M1 < 0 ||
   3736             (M0 < (int)NumElts && M1 < (int)NumElts) ||
   3737             (M0 >= (int)NumElts && M1 >= (int)NumElts)) {
   3738           CanFold = false;
   3739           break;
   3740         }
   3741 
   3742         Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts);
   3743         Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts);
   3744       }
   3745 
   3746       if (CanFold) {
   3747         // Fold this sequence only if the resulting shuffle is 'legal'.
   3748         if (TLI.isShuffleMaskLegal(Mask1, VT))
   3749           return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0),
   3750                                       N1->getOperand(0), &Mask1[0]);
   3751         if (TLI.isShuffleMaskLegal(Mask2, VT))
   3752           return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0),
   3753                                       N0->getOperand(0), &Mask2[0]);
   3754       }
   3755     }
   3756   }
   3757 
   3758   // fold (or c1, c2) -> c1|c2
   3759   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3760   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3761   if (N0C && N1C && !N1C->isOpaque())
   3762     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   3763   // canonicalize constant to RHS
   3764   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   3765      !isConstantIntBuildVectorOrConstantInt(N1))
   3766     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   3767   // fold (or x, 0) -> x
   3768   if (isNullConstant(N1))
   3769     return N0;
   3770   // fold (or x, -1) -> -1
   3771   if (isAllOnesConstant(N1))
   3772     return N1;
   3773   // fold (or x, c) -> c iff (x & ~c) == 0
   3774   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
   3775     return N1;
   3776 
   3777   if (SDValue Combined = visitORLike(N0, N1, N))
   3778     return Combined;
   3779 
   3780   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   3781   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
   3782     return BSwap;
   3783   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
   3784     return BSwap;
   3785 
   3786   // reassociate or
   3787   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
   3788     return ROR;
   3789   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   3790   // iff (c1 & c2) == 0.
   3791   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   3792              isa<ConstantSDNode>(N0.getOperand(1))) {
   3793     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
   3794     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
   3795       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
   3796                                                    N1C, C1))
   3797         return DAG.getNode(
   3798             ISD::AND, SDLoc(N), VT,
   3799             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
   3800       return SDValue();
   3801     }
   3802   }
   3803   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   3804   if (N0.getOpcode() == N1.getOpcode())
   3805     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3806       return Tmp;
   3807 
   3808   // See if this is some rotate idiom.
   3809   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
   3810     return SDValue(Rot, 0);
   3811 
   3812   // Simplify the operands using demanded-bits information.
   3813   if (!VT.isVector() &&
   3814       SimplifyDemandedBits(SDValue(N, 0)))
   3815     return SDValue(N, 0);
   3816 
   3817   return SDValue();
   3818 }
   3819 
   3820 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
   3821 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
   3822   if (Op.getOpcode() == ISD::AND) {
   3823     if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
   3824       Mask = Op.getOperand(1);
   3825       Op = Op.getOperand(0);
   3826     } else {
   3827       return false;
   3828     }
   3829   }
   3830 
   3831   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
   3832     Shift = Op;
   3833     return true;
   3834   }
   3835 
   3836   return false;
   3837 }
   3838 
   3839 // Return true if we can prove that, whenever Neg and Pos are both in the
   3840 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
   3841 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
   3842 //
   3843 //     (or (shift1 X, Neg), (shift2 X, Pos))
   3844 //
   3845 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
   3846 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
   3847 // to consider shift amounts with defined behavior.
   3848 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
   3849   // If EltSize is a power of 2 then:
   3850   //
   3851   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
   3852   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
   3853   //
   3854   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
   3855   // for the stronger condition:
   3856   //
   3857   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
   3858   //
   3859   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
   3860   // we can just replace Neg with Neg' for the rest of the function.
   3861   //
   3862   // In other cases we check for the even stronger condition:
   3863   //
   3864   //     Neg == EltSize - Pos                                    [B]
   3865   //
   3866   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
   3867   // behavior if Pos == 0 (and consequently Neg == EltSize).
   3868   //
   3869   // We could actually use [A] whenever EltSize is a power of 2, but the
   3870   // only extra cases that it would match are those uninteresting ones
   3871   // where Neg and Pos are never in range at the same time.  E.g. for
   3872   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   3873   // as well as (sub 32, Pos), but:
   3874   //
   3875   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   3876   //
   3877   // always invokes undefined behavior for 32-bit X.
   3878   //
   3879   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
   3880   unsigned MaskLoBits = 0;
   3881   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
   3882     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
   3883       if (NegC->getAPIntValue() == EltSize - 1) {
   3884         Neg = Neg.getOperand(0);
   3885         MaskLoBits = Log2_64(EltSize);
   3886       }
   3887     }
   3888   }
   3889 
   3890   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   3891   if (Neg.getOpcode() != ISD::SUB)
   3892     return false;
   3893   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
   3894   if (!NegC)
   3895     return false;
   3896   SDValue NegOp1 = Neg.getOperand(1);
   3897 
   3898   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   3899   // Pos'.  The truncation is redundant for the purpose of the equality.
   3900   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
   3901     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3902       if (PosC->getAPIntValue() == EltSize - 1)
   3903         Pos = Pos.getOperand(0);
   3904 
   3905   // The condition we need is now:
   3906   //
   3907   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
   3908   //
   3909   // If NegOp1 == Pos then we need:
   3910   //
   3911   //              EltSize & Mask == NegC & Mask
   3912   //
   3913   // (because "x & Mask" is a truncation and distributes through subtraction).
   3914   APInt Width;
   3915   if (Pos == NegOp1)
   3916     Width = NegC->getAPIntValue();
   3917 
   3918   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   3919   // Then the condition we want to prove becomes:
   3920   //
   3921   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
   3922   //
   3923   // which, again because "x & Mask" is a truncation, becomes:
   3924   //
   3925   //                NegC & Mask == (EltSize - PosC) & Mask
   3926   //             EltSize & Mask == (NegC + PosC) & Mask
   3927   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
   3928     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3929       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
   3930     else
   3931       return false;
   3932   } else
   3933     return false;
   3934 
   3935   // Now we just need to check that EltSize & Mask == Width & Mask.
   3936   if (MaskLoBits)
   3937     // EltSize & Mask is 0 since Mask is EltSize - 1.
   3938     return Width.getLoBits(MaskLoBits) == 0;
   3939   return Width == EltSize;
   3940 }
   3941 
   3942 // A subroutine of MatchRotate used once we have found an OR of two opposite
   3943 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
   3944 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
   3945 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
   3946 // Neg with outer conversions stripped away.
   3947 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   3948                                        SDValue Neg, SDValue InnerPos,
   3949                                        SDValue InnerNeg, unsigned PosOpcode,
   3950                                        unsigned NegOpcode, SDLoc DL) {
   3951   // fold (or (shl x, (*ext y)),
   3952   //          (srl x, (*ext (sub 32, y)))) ->
   3953   //   (rotl x, y) or (rotr x, (sub 32, y))
   3954   //
   3955   // fold (or (shl x, (*ext (sub 32, y))),
   3956   //          (srl x, (*ext y))) ->
   3957   //   (rotr x, y) or (rotl x, (sub 32, y))
   3958   EVT VT = Shifted.getValueType();
   3959   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
   3960     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
   3961     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
   3962                        HasPos ? Pos : Neg).getNode();
   3963   }
   3964 
   3965   return nullptr;
   3966 }
   3967 
   3968 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
   3969 // idioms for rotate, and if the target supports rotation instructions, generate
   3970 // a rot[lr].
   3971 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) {
   3972   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   3973   EVT VT = LHS.getValueType();
   3974   if (!TLI.isTypeLegal(VT)) return nullptr;
   3975 
   3976   // The target must have at least one rotate flavor.
   3977   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
   3978   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
   3979   if (!HasROTL && !HasROTR) return nullptr;
   3980 
   3981   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   3982   SDValue LHSShift;   // The shift.
   3983   SDValue LHSMask;    // AND value if any.
   3984   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
   3985     return nullptr; // Not part of a rotate.
   3986 
   3987   SDValue RHSShift;   // The shift.
   3988   SDValue RHSMask;    // AND value if any.
   3989   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
   3990     return nullptr; // Not part of a rotate.
   3991 
   3992   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
   3993     return nullptr;   // Not shifting the same value.
   3994 
   3995   if (LHSShift.getOpcode() == RHSShift.getOpcode())
   3996     return nullptr;   // Shifts must disagree.
   3997 
   3998   // Canonicalize shl to left side in a shl/srl pair.
   3999   if (RHSShift.getOpcode() == ISD::SHL) {
   4000     std::swap(LHS, RHS);
   4001     std::swap(LHSShift, RHSShift);
   4002     std::swap(LHSMask, RHSMask);
   4003   }
   4004 
   4005   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   4006   SDValue LHSShiftArg = LHSShift.getOperand(0);
   4007   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   4008   SDValue RHSShiftArg = RHSShift.getOperand(0);
   4009   SDValue RHSShiftAmt = RHSShift.getOperand(1);
   4010 
   4011   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   4012   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
   4013   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
   4014     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
   4015     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
   4016     if ((LShVal + RShVal) != EltSizeInBits)
   4017       return nullptr;
   4018 
   4019     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
   4020                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
   4021 
   4022     // If there is an AND of either shifted operand, apply it to the result.
   4023     if (LHSMask.getNode() || RHSMask.getNode()) {
   4024       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
   4025       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
   4026 
   4027       if (LHSMask.getNode()) {
   4028         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
   4029         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4030                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
   4031                                        DAG.getConstant(RHSBits, DL, VT)));
   4032       }
   4033       if (RHSMask.getNode()) {
   4034         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
   4035         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4036                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
   4037                                        DAG.getConstant(LHSBits, DL, VT)));
   4038       }
   4039 
   4040       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
   4041     }
   4042 
   4043     return Rot.getNode();
   4044   }
   4045 
   4046   // If there is a mask here, and we have a variable shift, we can't be sure
   4047   // that we're masking out the right stuff.
   4048   if (LHSMask.getNode() || RHSMask.getNode())
   4049     return nullptr;
   4050 
   4051   // If the shift amount is sign/zext/any-extended just peel it off.
   4052   SDValue LExtOp0 = LHSShiftAmt;
   4053   SDValue RExtOp0 = RHSShiftAmt;
   4054   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4055        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4056        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4057        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
   4058       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4059        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4060        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4061        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
   4062     LExtOp0 = LHSShiftAmt.getOperand(0);
   4063     RExtOp0 = RHSShiftAmt.getOperand(0);
   4064   }
   4065 
   4066   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
   4067                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
   4068   if (TryL)
   4069     return TryL;
   4070 
   4071   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
   4072                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
   4073   if (TryR)
   4074     return TryR;
   4075 
   4076   return nullptr;
   4077 }
   4078 
   4079 SDValue DAGCombiner::visitXOR(SDNode *N) {
   4080   SDValue N0 = N->getOperand(0);
   4081   SDValue N1 = N->getOperand(1);
   4082   EVT VT = N0.getValueType();
   4083 
   4084   // fold vector ops
   4085   if (VT.isVector()) {
   4086     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4087       return FoldedVOp;
   4088 
   4089     // fold (xor x, 0) -> x, vector edition
   4090     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   4091       return N1;
   4092     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   4093       return N0;
   4094   }
   4095 
   4096   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   4097   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
   4098     return DAG.getConstant(0, SDLoc(N), VT);
   4099   // fold (xor x, undef) -> undef
   4100   if (N0.getOpcode() == ISD::UNDEF)
   4101     return N0;
   4102   if (N1.getOpcode() == ISD::UNDEF)
   4103     return N1;
   4104   // fold (xor c1, c2) -> c1^c2
   4105   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4106   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   4107   if (N0C && N1C)
   4108     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   4109   // canonicalize constant to RHS
   4110   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   4111      !isConstantIntBuildVectorOrConstantInt(N1))
   4112     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   4113   // fold (xor x, 0) -> x
   4114   if (isNullConstant(N1))
   4115     return N0;
   4116   // reassociate xor
   4117   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
   4118     return RXOR;
   4119 
   4120   // fold !(x cc y) -> (x !cc y)
   4121   SDValue LHS, RHS, CC;
   4122   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
   4123     bool isInt = LHS.getValueType().isInteger();
   4124     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
   4125                                                isInt);
   4126 
   4127     if (!LegalOperations ||
   4128         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
   4129       switch (N0.getOpcode()) {
   4130       default:
   4131         llvm_unreachable("Unhandled SetCC Equivalent!");
   4132       case ISD::SETCC:
   4133         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
   4134       case ISD::SELECT_CC:
   4135         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
   4136                                N0.getOperand(3), NotCC);
   4137       }
   4138     }
   4139   }
   4140 
   4141   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
   4142   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
   4143       N0.getNode()->hasOneUse() &&
   4144       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
   4145     SDValue V = N0.getOperand(0);
   4146     SDLoc DL(N0);
   4147     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
   4148                     DAG.getConstant(1, DL, V.getValueType()));
   4149     AddToWorklist(V.getNode());
   4150     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   4151   }
   4152 
   4153   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   4154   if (isOneConstant(N1) && VT == MVT::i1 &&
   4155       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4156     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4157     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
   4158       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4159       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4160       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4161       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4162       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4163     }
   4164   }
   4165   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   4166   if (isAllOnesConstant(N1) &&
   4167       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4168     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4169     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
   4170       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4171       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4172       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4173       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4174       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4175     }
   4176   }
   4177   // fold (xor (and x, y), y) -> (and (not x), y)
   4178   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   4179       N0->getOperand(1) == N1) {
   4180     SDValue X = N0->getOperand(0);
   4181     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
   4182     AddToWorklist(NotX.getNode());
   4183     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
   4184   }
   4185   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   4186   if (N1C && N0.getOpcode() == ISD::XOR) {
   4187     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   4188       SDLoc DL(N);
   4189       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
   4190                          DAG.getConstant(N1C->getAPIntValue() ^
   4191                                          N00C->getAPIntValue(), DL, VT));
   4192     }
   4193     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
   4194       SDLoc DL(N);
   4195       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
   4196                          DAG.getConstant(N1C->getAPIntValue() ^
   4197                                          N01C->getAPIntValue(), DL, VT));
   4198     }
   4199   }
   4200   // fold (xor x, x) -> 0
   4201   if (N0 == N1)
   4202     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   4203 
   4204   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
   4205   // Here is a concrete example of this equivalence:
   4206   // i16   x ==  14
   4207   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
   4208   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
   4209   //
   4210   // =>
   4211   //
   4212   // i16     ~1      == 0b1111111111111110
   4213   // i16 rol(~1, 14) == 0b1011111111111111
   4214   //
   4215   // Some additional tips to help conceptualize this transform:
   4216   // - Try to see the operation as placing a single zero in a value of all ones.
   4217   // - There exists no value for x which would allow the result to contain zero.
   4218   // - Values of x larger than the bitwidth are undefined and do not require a
   4219   //   consistent result.
   4220   // - Pushing the zero left requires shifting one bits in from the right.
   4221   // A rotate left of ~1 is a nice way of achieving the desired result.
   4222   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
   4223       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
   4224     SDLoc DL(N);
   4225     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
   4226                        N0.getOperand(1));
   4227   }
   4228 
   4229   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   4230   if (N0.getOpcode() == N1.getOpcode())
   4231     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   4232       return Tmp;
   4233 
   4234   // Simplify the expression using non-local knowledge.
   4235   if (!VT.isVector() &&
   4236       SimplifyDemandedBits(SDValue(N, 0)))
   4237     return SDValue(N, 0);
   4238 
   4239   return SDValue();
   4240 }
   4241 
   4242 /// Handle transforms common to the three shifts, when the shift amount is a
   4243 /// constant.
   4244 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   4245   SDNode *LHS = N->getOperand(0).getNode();
   4246   if (!LHS->hasOneUse()) return SDValue();
   4247 
   4248   // We want to pull some binops through shifts, so that we have (and (shift))
   4249   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   4250   // thing happens with address calculations, so it's important to canonicalize
   4251   // it.
   4252   bool HighBitSet = false;  // Can we transform this if the high bit is set?
   4253 
   4254   switch (LHS->getOpcode()) {
   4255   default: return SDValue();
   4256   case ISD::OR:
   4257   case ISD::XOR:
   4258     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4259     break;
   4260   case ISD::AND:
   4261     HighBitSet = true;  // We can only transform sra if the high bit is set.
   4262     break;
   4263   case ISD::ADD:
   4264     if (N->getOpcode() != ISD::SHL)
   4265       return SDValue(); // only shl(add) not sr[al](add).
   4266     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4267     break;
   4268   }
   4269 
   4270   // We require the RHS of the binop to be a constant and not opaque as well.
   4271   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   4272   if (!BinOpCst) return SDValue();
   4273 
   4274   // FIXME: disable this unless the input to the binop is a shift by a constant.
   4275   // If it is not a shift, it pessimizes some common cases like:
   4276   //
   4277   //    void foo(int *X, int i) { X[i & 1235] = 1; }
   4278   //    int bar(int *X, int i) { return X[i & 255]; }
   4279   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
   4280   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
   4281        BinOpLHSVal->getOpcode() != ISD::SRA &&
   4282        BinOpLHSVal->getOpcode() != ISD::SRL) ||
   4283       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
   4284     return SDValue();
   4285 
   4286   EVT VT = N->getValueType(0);
   4287 
   4288   // If this is a signed shift right, and the high bit is modified by the
   4289   // logical operation, do not perform the transformation. The highBitSet
   4290   // boolean indicates the value of the high bit of the constant which would
   4291   // cause it to be modified for this operation.
   4292   if (N->getOpcode() == ISD::SRA) {
   4293     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
   4294     if (BinOpRHSSignSet != HighBitSet)
   4295       return SDValue();
   4296   }
   4297 
   4298   if (!TLI.isDesirableToCommuteWithShift(LHS))
   4299     return SDValue();
   4300 
   4301   // Fold the constants, shifting the binop RHS by the shift amount.
   4302   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
   4303                                N->getValueType(0),
   4304                                LHS->getOperand(1), N->getOperand(1));
   4305   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
   4306 
   4307   // Create the new shift.
   4308   SDValue NewShift = DAG.getNode(N->getOpcode(),
   4309                                  SDLoc(LHS->getOperand(0)),
   4310                                  VT, LHS->getOperand(0), N->getOperand(1));
   4311 
   4312   // Create the new binop.
   4313   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
   4314 }
   4315 
   4316 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   4317   assert(N->getOpcode() == ISD::TRUNCATE);
   4318   assert(N->getOperand(0).getOpcode() == ISD::AND);
   4319 
   4320   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   4321   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
   4322     SDValue N01 = N->getOperand(0).getOperand(1);
   4323 
   4324     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
   4325       if (!N01C->isOpaque()) {
   4326         EVT TruncVT = N->getValueType(0);
   4327         SDValue N00 = N->getOperand(0).getOperand(0);
   4328         APInt TruncC = N01C->getAPIntValue();
   4329         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
   4330         SDLoc DL(N);
   4331 
   4332         return DAG.getNode(ISD::AND, DL, TruncVT,
   4333                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
   4334                            DAG.getConstant(TruncC, DL, TruncVT));
   4335       }
   4336     }
   4337   }
   4338 
   4339   return SDValue();
   4340 }
   4341 
   4342 SDValue DAGCombiner::visitRotate(SDNode *N) {
   4343   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
   4344   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
   4345       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
   4346     SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode());
   4347     if (NewOp1.getNode())
   4348       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
   4349                          N->getOperand(0), NewOp1);
   4350   }
   4351   return SDValue();
   4352 }
   4353 
   4354 SDValue DAGCombiner::visitSHL(SDNode *N) {
   4355   SDValue N0 = N->getOperand(0);
   4356   SDValue N1 = N->getOperand(1);
   4357   EVT VT = N0.getValueType();
   4358   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   4359 
   4360   // fold vector ops
   4361   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4362   if (VT.isVector()) {
   4363     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4364       return FoldedVOp;
   4365 
   4366     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
   4367     // If setcc produces all-one true value then:
   4368     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
   4369     if (N1CV && N1CV->isConstant()) {
   4370       if (N0.getOpcode() == ISD::AND) {
   4371         SDValue N00 = N0->getOperand(0);
   4372         SDValue N01 = N0->getOperand(1);
   4373         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
   4374 
   4375         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
   4376             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
   4377                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
   4378           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
   4379                                                      N01CV, N1CV))
   4380             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
   4381         }
   4382       } else {
   4383         N1C = isConstOrConstSplat(N1);
   4384       }
   4385     }
   4386   }
   4387 
   4388   // fold (shl c1, c2) -> c1<<c2
   4389   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4390   if (N0C && N1C && !N1C->isOpaque())
   4391     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   4392   // fold (shl 0, x) -> 0
   4393   if (isNullConstant(N0))
   4394     return N0;
   4395   // fold (shl x, c >= size(x)) -> undef
   4396   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
   4397     return DAG.getUNDEF(VT);
   4398   // fold (shl x, 0) -> x
   4399   if (N1C && N1C->isNullValue())
   4400     return N0;
   4401   // fold (shl undef, x) -> 0
   4402   if (N0.getOpcode() == ISD::UNDEF)
   4403     return DAG.getConstant(0, SDLoc(N), VT);
   4404   // if (shl x, c) is known to be zero, return 0
   4405   if (DAG.MaskedValueIsZero(SDValue(N, 0),
   4406                             APInt::getAllOnesValue(OpSizeInBits)))
   4407     return DAG.getConstant(0, SDLoc(N), VT);
   4408   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   4409   if (N1.getOpcode() == ISD::TRUNCATE &&
   4410       N1.getOperand(0).getOpcode() == ISD::AND) {
   4411     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4412     if (NewOp1.getNode())
   4413       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   4414   }
   4415 
   4416   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4417     return SDValue(N, 0);
   4418 
   4419   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   4420   if (N1C && N0.getOpcode() == ISD::SHL) {
   4421     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4422       uint64_t c1 = N0C1->getZExtValue();
   4423       uint64_t c2 = N1C->getZExtValue();
   4424       SDLoc DL(N);
   4425       if (c1 + c2 >= OpSizeInBits)
   4426         return DAG.getConstant(0, DL, VT);
   4427       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4428                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4429     }
   4430   }
   4431 
   4432   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
   4433   // For this to be valid, the second form must not preserve any of the bits
   4434   // that are shifted out by the inner shift in the first form.  This means
   4435   // the outer shift size must be >= the number of bits added by the ext.
   4436   // As a corollary, we don't care what kind of ext it is.
   4437   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
   4438               N0.getOpcode() == ISD::ANY_EXTEND ||
   4439               N0.getOpcode() == ISD::SIGN_EXTEND) &&
   4440       N0.getOperand(0).getOpcode() == ISD::SHL) {
   4441     SDValue N0Op0 = N0.getOperand(0);
   4442     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4443       uint64_t c1 = N0Op0C1->getZExtValue();
   4444       uint64_t c2 = N1C->getZExtValue();
   4445       EVT InnerShiftVT = N0Op0.getValueType();
   4446       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   4447       if (c2 >= OpSizeInBits - InnerShiftSize) {
   4448         SDLoc DL(N0);
   4449         if (c1 + c2 >= OpSizeInBits)
   4450           return DAG.getConstant(0, DL, VT);
   4451         return DAG.getNode(ISD::SHL, DL, VT,
   4452                            DAG.getNode(N0.getOpcode(), DL, VT,
   4453                                        N0Op0->getOperand(0)),
   4454                            DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4455       }
   4456     }
   4457   }
   4458 
   4459   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   4460   // Only fold this if the inner zext has no other uses to avoid increasing
   4461   // the total number of instructions.
   4462   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
   4463       N0.getOperand(0).getOpcode() == ISD::SRL) {
   4464     SDValue N0Op0 = N0.getOperand(0);
   4465     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4466       uint64_t c1 = N0Op0C1->getZExtValue();
   4467       if (c1 < VT.getScalarSizeInBits()) {
   4468         uint64_t c2 = N1C->getZExtValue();
   4469         if (c1 == c2) {
   4470           SDValue NewOp0 = N0.getOperand(0);
   4471           EVT CountVT = NewOp0.getOperand(1).getValueType();
   4472           SDLoc DL(N);
   4473           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
   4474                                        NewOp0,
   4475                                        DAG.getConstant(c2, DL, CountVT));
   4476           AddToWorklist(NewSHL.getNode());
   4477           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
   4478         }
   4479       }
   4480     }
   4481   }
   4482 
   4483   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   4484   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
   4485   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
   4486       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
   4487     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4488       uint64_t C1 = N0C1->getZExtValue();
   4489       uint64_t C2 = N1C->getZExtValue();
   4490       SDLoc DL(N);
   4491       if (C1 <= C2)
   4492         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4493                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
   4494       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
   4495                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
   4496     }
   4497   }
   4498 
   4499   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   4500   //                               (and (srl x, (sub c1, c2), MASK)
   4501   // Only fold this if the inner shift has no other uses -- if it does, folding
   4502   // this will increase the total number of instructions.
   4503   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   4504     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4505       uint64_t c1 = N0C1->getZExtValue();
   4506       if (c1 < OpSizeInBits) {
   4507         uint64_t c2 = N1C->getZExtValue();
   4508         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
   4509         SDValue Shift;
   4510         if (c2 > c1) {
   4511           Mask = Mask.shl(c2 - c1);
   4512           SDLoc DL(N);
   4513           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4514                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
   4515         } else {
   4516           Mask = Mask.lshr(c1 - c2);
   4517           SDLoc DL(N);
   4518           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
   4519                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
   4520         }
   4521         SDLoc DL(N0);
   4522         return DAG.getNode(ISD::AND, DL, VT, Shift,
   4523                            DAG.getConstant(Mask, DL, VT));
   4524       }
   4525     }
   4526   }
   4527   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   4528   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
   4529     unsigned BitSize = VT.getScalarSizeInBits();
   4530     SDLoc DL(N);
   4531     SDValue HiBitsMask =
   4532       DAG.getConstant(APInt::getHighBitsSet(BitSize,
   4533                                             BitSize - N1C->getZExtValue()),
   4534                       DL, VT);
   4535     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
   4536                        HiBitsMask);
   4537   }
   4538 
   4539   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   4540   // Variant of version done on multiply, except mul by a power of 2 is turned
   4541   // into a shift.
   4542   APInt Val;
   4543   if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
   4544       (isa<ConstantSDNode>(N0.getOperand(1)) ||
   4545        isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
   4546     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
   4547     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
   4548     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   4549   }
   4550 
   4551   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
   4552   if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
   4553     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4554       if (SDValue Folded =
   4555               DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
   4556         return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
   4557     }
   4558   }
   4559 
   4560   if (N1C && !N1C->isOpaque())
   4561     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
   4562       return NewSHL;
   4563 
   4564   return SDValue();
   4565 }
   4566 
   4567 SDValue DAGCombiner::visitSRA(SDNode *N) {
   4568   SDValue N0 = N->getOperand(0);
   4569   SDValue N1 = N->getOperand(1);
   4570   EVT VT = N0.getValueType();
   4571   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4572 
   4573   // fold vector ops
   4574   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4575   if (VT.isVector()) {
   4576     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4577       return FoldedVOp;
   4578 
   4579     N1C = isConstOrConstSplat(N1);
   4580   }
   4581 
   4582   // fold (sra c1, c2) -> (sra c1, c2)
   4583   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4584   if (N0C && N1C && !N1C->isOpaque())
   4585     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   4586   // fold (sra 0, x) -> 0
   4587   if (isNullConstant(N0))
   4588     return N0;
   4589   // fold (sra -1, x) -> -1
   4590   if (isAllOnesConstant(N0))
   4591     return N0;
   4592   // fold (sra x, (setge c, size(x))) -> undef
   4593   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4594     return DAG.getUNDEF(VT);
   4595   // fold (sra x, 0) -> x
   4596   if (N1C && N1C->isNullValue())
   4597     return N0;
   4598   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
   4599   // sext_inreg.
   4600   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
   4601     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
   4602     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
   4603     if (VT.isVector())
   4604       ExtVT = EVT::getVectorVT(*DAG.getContext(),
   4605                                ExtVT, VT.getVectorNumElements());
   4606     if ((!LegalOperations ||
   4607          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
   4608       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   4609                          N0.getOperand(0), DAG.getValueType(ExtVT));
   4610   }
   4611 
   4612   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   4613   if (N1C && N0.getOpcode() == ISD::SRA) {
   4614     if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4615       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
   4616       if (Sum >= OpSizeInBits)
   4617         Sum = OpSizeInBits - 1;
   4618       SDLoc DL(N);
   4619       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
   4620                          DAG.getConstant(Sum, DL, N1.getValueType()));
   4621     }
   4622   }
   4623 
   4624   // fold (sra (shl X, m), (sub result_size, n))
   4625   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
   4626   // result_size - n != m.
   4627   // If truncate is free for the target sext(shl) is likely to result in better
   4628   // code.
   4629   if (N0.getOpcode() == ISD::SHL && N1C) {
   4630     // Get the two constanst of the shifts, CN0 = m, CN = n.
   4631     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
   4632     if (N01C) {
   4633       LLVMContext &Ctx = *DAG.getContext();
   4634       // Determine what the truncate's result bitsize and type would be.
   4635       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
   4636 
   4637       if (VT.isVector())
   4638         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
   4639 
   4640       // Determine the residual right-shift amount.
   4641       signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
   4642 
   4643       // If the shift is not a no-op (in which case this should be just a sign
   4644       // extend already), the truncated to type is legal, sign_extend is legal
   4645       // on that type, and the truncate to that type is both legal and free,
   4646       // perform the transform.
   4647       if ((ShiftAmt > 0) &&
   4648           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
   4649           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
   4650           TLI.isTruncateFree(VT, TruncVT)) {
   4651 
   4652         SDLoc DL(N);
   4653         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
   4654             getShiftAmountTy(N0.getOperand(0).getValueType()));
   4655         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
   4656                                     N0.getOperand(0), Amt);
   4657         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
   4658                                     Shift);
   4659         return DAG.getNode(ISD::SIGN_EXTEND, DL,
   4660                            N->getValueType(0), Trunc);
   4661       }
   4662     }
   4663   }
   4664 
   4665   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   4666   if (N1.getOpcode() == ISD::TRUNCATE &&
   4667       N1.getOperand(0).getOpcode() == ISD::AND) {
   4668     SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode());
   4669     if (NewOp1.getNode())
   4670       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   4671   }
   4672 
   4673   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   4674   //      if c1 is equal to the number of bits the trunc removes
   4675   if (N0.getOpcode() == ISD::TRUNCATE &&
   4676       (N0.getOperand(0).getOpcode() == ISD::SRL ||
   4677        N0.getOperand(0).getOpcode() == ISD::SRA) &&
   4678       N0.getOperand(0).hasOneUse() &&
   4679       N0.getOperand(0).getOperand(1).hasOneUse() &&
   4680       N1C) {
   4681     SDValue N0Op0 = N0.getOperand(0);
   4682     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4683       unsigned LargeShiftVal = LargeShift->getZExtValue();
   4684       EVT LargeVT = N0Op0.getValueType();
   4685 
   4686       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
   4687         SDLoc DL(N);
   4688         SDValue Amt =
   4689           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
   4690                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
   4691         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
   4692                                   N0Op0.getOperand(0), Amt);
   4693         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
   4694       }
   4695     }
   4696   }
   4697 
   4698   // Simplify, based on bits shifted out of the LHS.
   4699   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4700     return SDValue(N, 0);
   4701 
   4702 
   4703   // If the sign bit is known to be zero, switch this to a SRL.
   4704   if (DAG.SignBitIsZero(N0))
   4705     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
   4706 
   4707   if (N1C && !N1C->isOpaque())
   4708     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
   4709       return NewSRA;
   4710 
   4711   return SDValue();
   4712 }
   4713 
   4714 SDValue DAGCombiner::visitSRL(SDNode *N) {
   4715   SDValue N0 = N->getOperand(0);
   4716   SDValue N1 = N->getOperand(1);
   4717   EVT VT = N0.getValueType();
   4718   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4719 
   4720   // fold vector ops
   4721   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4722   if (VT.isVector()) {
   4723     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4724       return FoldedVOp;
   4725 
   4726     N1C = isConstOrConstSplat(N1);
   4727   }
   4728 
   4729   // fold (srl c1, c2) -> c1 >>u c2
   4730   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4731   if (N0C && N1C && !N1C->isOpaque())
   4732     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
   4733   // fold (srl 0, x) -> 0
   4734   if (isNullConstant(N0))
   4735     return N0;
   4736   // fold (srl x, c >= size(x)) -> undef
   4737   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4738     return DAG.getUNDEF(VT);
   4739   // fold (srl x, 0) -> x
   4740   if (N1C && N1C->isNullValue())
   4741     return N0;
   4742   // if (srl x, c) is known to be zero, return 0
   4743   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   4744                                    APInt::getAllOnesValue(OpSizeInBits)))
   4745     return DAG.getConstant(0, SDLoc(N), VT);
   4746 
   4747   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   4748   if (N1C && N0.getOpcode() == ISD::SRL) {
   4749     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
   4750       uint64_t c1 = N01C->getZExtValue();
   4751       uint64_t c2 = N1C->getZExtValue();
   4752       SDLoc DL(N);
   4753       if (c1 + c2 >= OpSizeInBits)
   4754         return DAG.getConstant(0, DL, VT);
   4755       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
   4756                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4757     }
   4758   }
   4759 
   4760   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
   4761   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
   4762       N0.getOperand(0).getOpcode() == ISD::SRL &&
   4763       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
   4764     uint64_t c1 =
   4765       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
   4766     uint64_t c2 = N1C->getZExtValue();
   4767     EVT InnerShiftVT = N0.getOperand(0).getValueType();
   4768     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
   4769     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
   4770     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
   4771     if (c1 + OpSizeInBits == InnerShiftSize) {
   4772       SDLoc DL(N0);
   4773       if (c1 + c2 >= InnerShiftSize)
   4774         return DAG.getConstant(0, DL, VT);
   4775       return DAG.getNode(ISD::TRUNCATE, DL, VT,
   4776                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
   4777                                      N0.getOperand(0)->getOperand(0),
   4778                                      DAG.getConstant(c1 + c2, DL,
   4779                                                      ShiftCountVT)));
   4780     }
   4781   }
   4782 
   4783   // fold (srl (shl x, c), c) -> (and x, cst2)
   4784   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
   4785     unsigned BitSize = N0.getScalarValueSizeInBits();
   4786     if (BitSize <= 64) {
   4787       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
   4788       SDLoc DL(N);
   4789       return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
   4790                          DAG.getConstant(~0ULL >> ShAmt, DL, VT));
   4791     }
   4792   }
   4793 
   4794   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   4795   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   4796     // Shifting in all undef bits?
   4797     EVT SmallVT = N0.getOperand(0).getValueType();
   4798     unsigned BitSize = SmallVT.getScalarSizeInBits();
   4799     if (N1C->getZExtValue() >= BitSize)
   4800       return DAG.getUNDEF(VT);
   4801 
   4802     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
   4803       uint64_t ShiftAmt = N1C->getZExtValue();
   4804       SDLoc DL0(N0);
   4805       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
   4806                                        N0.getOperand(0),
   4807                           DAG.getConstant(ShiftAmt, DL0,
   4808                                           getShiftAmountTy(SmallVT)));
   4809       AddToWorklist(SmallShift.getNode());
   4810       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
   4811       SDLoc DL(N);
   4812       return DAG.getNode(ISD::AND, DL, VT,
   4813                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
   4814                          DAG.getConstant(Mask, DL, VT));
   4815     }
   4816   }
   4817 
   4818   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   4819   // bit, which is unmodified by sra.
   4820   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
   4821     if (N0.getOpcode() == ISD::SRA)
   4822       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   4823   }
   4824 
   4825   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   4826   if (N1C && N0.getOpcode() == ISD::CTLZ &&
   4827       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
   4828     APInt KnownZero, KnownOne;
   4829     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
   4830 
   4831     // If any of the input bits are KnownOne, then the input couldn't be all
   4832     // zeros, thus the result of the srl will always be zero.
   4833     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
   4834 
   4835     // If all of the bits input the to ctlz node are known to be zero, then
   4836     // the result of the ctlz is "32" and the result of the shift is one.
   4837     APInt UnknownBits = ~KnownZero;
   4838     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
   4839 
   4840     // Otherwise, check to see if there is exactly one bit input to the ctlz.
   4841     if ((UnknownBits & (UnknownBits - 1)) == 0) {
   4842       // Okay, we know that only that the single bit specified by UnknownBits
   4843       // could be set on input to the CTLZ node. If this bit is set, the SRL
   4844       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
   4845       // to an SRL/XOR pair, which is likely to simplify more.
   4846       unsigned ShAmt = UnknownBits.countTrailingZeros();
   4847       SDValue Op = N0.getOperand(0);
   4848 
   4849       if (ShAmt) {
   4850         SDLoc DL(N0);
   4851         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
   4852                   DAG.getConstant(ShAmt, DL,
   4853                                   getShiftAmountTy(Op.getValueType())));
   4854         AddToWorklist(Op.getNode());
   4855       }
   4856 
   4857       SDLoc DL(N);
   4858       return DAG.getNode(ISD::XOR, DL, VT,
   4859                          Op, DAG.getConstant(1, DL, VT));
   4860     }
   4861   }
   4862 
   4863   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   4864   if (N1.getOpcode() == ISD::TRUNCATE &&
   4865       N1.getOperand(0).getOpcode() == ISD::AND) {
   4866     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   4867       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   4868   }
   4869 
   4870   // fold operands of srl based on knowledge that the low bits are not
   4871   // demanded.
   4872   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4873     return SDValue(N, 0);
   4874 
   4875   if (N1C && !N1C->isOpaque())
   4876     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
   4877       return NewSRL;
   4878 
   4879   // Attempt to convert a srl of a load into a narrower zero-extending load.
   4880   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   4881     return NarrowLoad;
   4882 
   4883   // Here is a common situation. We want to optimize:
   4884   //
   4885   //   %a = ...
   4886   //   %b = and i32 %a, 2
   4887   //   %c = srl i32 %b, 1
   4888   //   brcond i32 %c ...
   4889   //
   4890   // into
   4891   //
   4892   //   %a = ...
   4893   //   %b = and %a, 2
   4894   //   %c = setcc eq %b, 0
   4895   //   brcond %c ...
   4896   //
   4897   // However when after the source operand of SRL is optimized into AND, the SRL
   4898   // itself may not be optimized further. Look for it and add the BRCOND into
   4899   // the worklist.
   4900   if (N->hasOneUse()) {
   4901     SDNode *Use = *N->use_begin();
   4902     if (Use->getOpcode() == ISD::BRCOND)
   4903       AddToWorklist(Use);
   4904     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
   4905       // Also look pass the truncate.
   4906       Use = *Use->use_begin();
   4907       if (Use->getOpcode() == ISD::BRCOND)
   4908         AddToWorklist(Use);
   4909     }
   4910   }
   4911 
   4912   return SDValue();
   4913 }
   4914 
   4915 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   4916   SDValue N0 = N->getOperand(0);
   4917   EVT VT = N->getValueType(0);
   4918 
   4919   // fold (bswap c1) -> c2
   4920   if (isConstantIntBuildVectorOrConstantInt(N0))
   4921     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
   4922   // fold (bswap (bswap x)) -> x
   4923   if (N0.getOpcode() == ISD::BSWAP)
   4924     return N0->getOperand(0);
   4925   return SDValue();
   4926 }
   4927 
   4928 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   4929   SDValue N0 = N->getOperand(0);
   4930   EVT VT = N->getValueType(0);
   4931 
   4932   // fold (ctlz c1) -> c2
   4933   if (isConstantIntBuildVectorOrConstantInt(N0))
   4934     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
   4935   return SDValue();
   4936 }
   4937 
   4938 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
   4939   SDValue N0 = N->getOperand(0);
   4940   EVT VT = N->getValueType(0);
   4941 
   4942   // fold (ctlz_zero_undef c1) -> c2
   4943   if (isConstantIntBuildVectorOrConstantInt(N0))
   4944     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   4945   return SDValue();
   4946 }
   4947 
   4948 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   4949   SDValue N0 = N->getOperand(0);
   4950   EVT VT = N->getValueType(0);
   4951 
   4952   // fold (cttz c1) -> c2
   4953   if (isConstantIntBuildVectorOrConstantInt(N0))
   4954     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
   4955   return SDValue();
   4956 }
   4957 
   4958 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
   4959   SDValue N0 = N->getOperand(0);
   4960   EVT VT = N->getValueType(0);
   4961 
   4962   // fold (cttz_zero_undef c1) -> c2
   4963   if (isConstantIntBuildVectorOrConstantInt(N0))
   4964     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   4965   return SDValue();
   4966 }
   4967 
   4968 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   4969   SDValue N0 = N->getOperand(0);
   4970   EVT VT = N->getValueType(0);
   4971 
   4972   // fold (ctpop c1) -> c2
   4973   if (isConstantIntBuildVectorOrConstantInt(N0))
   4974     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
   4975   return SDValue();
   4976 }
   4977 
   4978 
   4979 /// \brief Generate Min/Max node
   4980 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS,
   4981                                    SDValue True, SDValue False,
   4982                                    ISD::CondCode CC, const TargetLowering &TLI,
   4983                                    SelectionDAG &DAG) {
   4984   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
   4985     return SDValue();
   4986 
   4987   switch (CC) {
   4988   case ISD::SETOLT:
   4989   case ISD::SETOLE:
   4990   case ISD::SETLT:
   4991   case ISD::SETLE:
   4992   case ISD::SETULT:
   4993   case ISD::SETULE: {
   4994     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
   4995     if (TLI.isOperationLegal(Opcode, VT))
   4996       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   4997     return SDValue();
   4998   }
   4999   case ISD::SETOGT:
   5000   case ISD::SETOGE:
   5001   case ISD::SETGT:
   5002   case ISD::SETGE:
   5003   case ISD::SETUGT:
   5004   case ISD::SETUGE: {
   5005     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
   5006     if (TLI.isOperationLegal(Opcode, VT))
   5007       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   5008     return SDValue();
   5009   }
   5010   default:
   5011     return SDValue();
   5012   }
   5013 }
   5014 
   5015 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   5016   SDValue N0 = N->getOperand(0);
   5017   SDValue N1 = N->getOperand(1);
   5018   SDValue N2 = N->getOperand(2);
   5019   EVT VT = N->getValueType(0);
   5020   EVT VT0 = N0.getValueType();
   5021 
   5022   // fold (select C, X, X) -> X
   5023   if (N1 == N2)
   5024     return N1;
   5025   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
   5026     // fold (select true, X, Y) -> X
   5027     // fold (select false, X, Y) -> Y
   5028     return !N0C->isNullValue() ? N1 : N2;
   5029   }
   5030   // fold (select C, 1, X) -> (or C, X)
   5031   if (VT == MVT::i1 && isOneConstant(N1))
   5032     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   5033   // fold (select C, 0, 1) -> (xor C, 1)
   5034   // We can't do this reliably if integer based booleans have different contents
   5035   // to floating point based booleans. This is because we can't tell whether we
   5036   // have an integer-based boolean or a floating-point-based boolean unless we
   5037   // can find the SETCC that produced it and inspect its operands. This is
   5038   // fairly easy if C is the SETCC node, but it can potentially be
   5039   // undiscoverable (or not reasonably discoverable). For example, it could be
   5040   // in another basic block or it could require searching a complicated
   5041   // expression.
   5042   if (VT.isInteger() &&
   5043       (VT0 == MVT::i1 || (VT0.isInteger() &&
   5044                           TLI.getBooleanContents(false, false) ==
   5045                               TLI.getBooleanContents(false, true) &&
   5046                           TLI.getBooleanContents(false, false) ==
   5047                               TargetLowering::ZeroOrOneBooleanContent)) &&
   5048       isNullConstant(N1) && isOneConstant(N2)) {
   5049     SDValue XORNode;
   5050     if (VT == VT0) {
   5051       SDLoc DL(N);
   5052       return DAG.getNode(ISD::XOR, DL, VT0,
   5053                          N0, DAG.getConstant(1, DL, VT0));
   5054     }
   5055     SDLoc DL0(N0);
   5056     XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
   5057                           N0, DAG.getConstant(1, DL0, VT0));
   5058     AddToWorklist(XORNode.getNode());
   5059     if (VT.bitsGT(VT0))
   5060       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
   5061     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
   5062   }
   5063   // fold (select C, 0, X) -> (and (not C), X)
   5064   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
   5065     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   5066     AddToWorklist(NOTNode.getNode());
   5067     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
   5068   }
   5069   // fold (select C, X, 1) -> (or (not C), X)
   5070   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
   5071     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   5072     AddToWorklist(NOTNode.getNode());
   5073     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   5074   }
   5075   // fold (select C, X, 0) -> (and C, X)
   5076   if (VT == MVT::i1 && isNullConstant(N2))
   5077     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   5078   // fold (select X, X, Y) -> (or X, Y)
   5079   // fold (select X, 1, Y) -> (or X, Y)
   5080   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
   5081     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   5082   // fold (select X, Y, X) -> (and X, Y)
   5083   // fold (select X, Y, 0) -> (and X, Y)
   5084   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
   5085     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   5086 
   5087   // If we can fold this based on the true/false value, do so.
   5088   if (SimplifySelectOps(N, N1, N2))
   5089     return SDValue(N, 0);  // Don't revisit N.
   5090 
   5091   if (VT0 == MVT::i1) {
   5092     // The code in this block deals with the following 2 equivalences:
   5093     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
   5094     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
   5095     // The target can specify its prefered form with the
   5096     // shouldNormalizeToSelectSequence() callback. However we always transform
   5097     // to the right anyway if we find the inner select exists in the DAG anyway
   5098     // and we always transform to the left side if we know that we can further
   5099     // optimize the combination of the conditions.
   5100     bool normalizeToSequence
   5101       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
   5102     // select (and Cond0, Cond1), X, Y
   5103     //   -> select Cond0, (select Cond1, X, Y), Y
   5104     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
   5105       SDValue Cond0 = N0->getOperand(0);
   5106       SDValue Cond1 = N0->getOperand(1);
   5107       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
   5108                                         N1.getValueType(), Cond1, N1, N2);
   5109       if (normalizeToSequence || !InnerSelect.use_empty())
   5110         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
   5111                            InnerSelect, N2);
   5112     }
   5113     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
   5114     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
   5115       SDValue Cond0 = N0->getOperand(0);
   5116       SDValue Cond1 = N0->getOperand(1);
   5117       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
   5118                                         N1.getValueType(), Cond1, N1, N2);
   5119       if (normalizeToSequence || !InnerSelect.use_empty())
   5120         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
   5121                            InnerSelect);
   5122     }
   5123 
   5124     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
   5125     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
   5126       SDValue N1_0 = N1->getOperand(0);
   5127       SDValue N1_1 = N1->getOperand(1);
   5128       SDValue N1_2 = N1->getOperand(2);
   5129       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
   5130         // Create the actual and node if we can generate good code for it.
   5131         if (!normalizeToSequence) {
   5132           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
   5133                                     N0, N1_0);
   5134           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
   5135                              N1_1, N2);
   5136         }
   5137         // Otherwise see if we can optimize the "and" to a better pattern.
   5138         if (SDValue Combined = visitANDLike(N0, N1_0, N))
   5139           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
   5140                              N1_1, N2);
   5141       }
   5142     }
   5143     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
   5144     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
   5145       SDValue N2_0 = N2->getOperand(0);
   5146       SDValue N2_1 = N2->getOperand(1);
   5147       SDValue N2_2 = N2->getOperand(2);
   5148       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
   5149         // Create the actual or node if we can generate good code for it.
   5150         if (!normalizeToSequence) {
   5151           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
   5152                                    N0, N2_0);
   5153           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
   5154                              N1, N2_2);
   5155         }
   5156         // Otherwise see if we can optimize to a better pattern.
   5157         if (SDValue Combined = visitORLike(N0, N2_0, N))
   5158           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
   5159                              N1, N2_2);
   5160       }
   5161     }
   5162   }
   5163 
   5164   // fold selects based on a setcc into other things, such as min/max/abs
   5165   if (N0.getOpcode() == ISD::SETCC) {
   5166     // select x, y (fcmp lt x, y) -> fminnum x, y
   5167     // select x, y (fcmp gt x, y) -> fmaxnum x, y
   5168     //
   5169     // This is OK if we don't care about what happens if either operand is a
   5170     // NaN.
   5171     //
   5172 
   5173     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
   5174     // no signed zeros as well as no nans.
   5175     const TargetOptions &Options = DAG.getTarget().Options;
   5176     if (Options.UnsafeFPMath &&
   5177         VT.isFloatingPoint() && N0.hasOneUse() &&
   5178         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
   5179       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   5180 
   5181       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
   5182                                                 N0.getOperand(1), N1, N2, CC,
   5183                                                 TLI, DAG))
   5184         return FMinMax;
   5185     }
   5186 
   5187     if ((!LegalOperations &&
   5188          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
   5189         TLI.isOperationLegal(ISD::SELECT_CC, VT))
   5190       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
   5191                          N0.getOperand(0), N0.getOperand(1),
   5192                          N1, N2, N0.getOperand(2));
   5193     return SimplifySelect(SDLoc(N), N0, N1, N2);
   5194   }
   5195 
   5196   return SDValue();
   5197 }
   5198 
   5199 static
   5200 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   5201   SDLoc DL(N);
   5202   EVT LoVT, HiVT;
   5203   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   5204 
   5205   // Split the inputs.
   5206   SDValue Lo, Hi, LL, LH, RL, RH;
   5207   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
   5208   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
   5209 
   5210   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   5211   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
   5212 
   5213   return std::make_pair(Lo, Hi);
   5214 }
   5215 
   5216 // This function assumes all the vselect's arguments are CONCAT_VECTOR
   5217 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
   5218 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
   5219   SDLoc dl(N);
   5220   SDValue Cond = N->getOperand(0);
   5221   SDValue LHS = N->getOperand(1);
   5222   SDValue RHS = N->getOperand(2);
   5223   EVT VT = N->getValueType(0);
   5224   int NumElems = VT.getVectorNumElements();
   5225   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
   5226          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
   5227          Cond.getOpcode() == ISD::BUILD_VECTOR);
   5228 
   5229   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
   5230   // binary ones here.
   5231   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
   5232     return SDValue();
   5233 
   5234   // We're sure we have an even number of elements due to the
   5235   // concat_vectors we have as arguments to vselect.
   5236   // Skip BV elements until we find one that's not an UNDEF
   5237   // After we find an UNDEF element, keep looping until we get to half the
   5238   // length of the BV and see if all the non-undef nodes are the same.
   5239   ConstantSDNode *BottomHalf = nullptr;
   5240   for (int i = 0; i < NumElems / 2; ++i) {
   5241     if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
   5242       continue;
   5243 
   5244     if (BottomHalf == nullptr)
   5245       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   5246     else if (Cond->getOperand(i).getNode() != BottomHalf)
   5247       return SDValue();
   5248   }
   5249 
   5250   // Do the same for the second half of the BuildVector
   5251   ConstantSDNode *TopHalf = nullptr;
   5252   for (int i = NumElems / 2; i < NumElems; ++i) {
   5253     if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF)
   5254       continue;
   5255 
   5256     if (TopHalf == nullptr)
   5257       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   5258     else if (Cond->getOperand(i).getNode() != TopHalf)
   5259       return SDValue();
   5260   }
   5261 
   5262   assert(TopHalf && BottomHalf &&
   5263          "One half of the selector was all UNDEFs and the other was all the "
   5264          "same value. This should have been addressed before this function.");
   5265   return DAG.getNode(
   5266       ISD::CONCAT_VECTORS, dl, VT,
   5267       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
   5268       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
   5269 }
   5270 
   5271 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   5272 
   5273   if (Level >= AfterLegalizeTypes)
   5274     return SDValue();
   5275 
   5276   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   5277   SDValue Mask = MSC->getMask();
   5278   SDValue Data  = MSC->getValue();
   5279   SDLoc DL(N);
   5280 
   5281   // If the MSCATTER data type requires splitting and the mask is provided by a
   5282   // SETCC, then split both nodes and its operands before legalization. This
   5283   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5284   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5285   if (Mask.getOpcode() != ISD::SETCC)
   5286     return SDValue();
   5287 
   5288   // Check if any splitting is required.
   5289   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
   5290       TargetLowering::TypeSplitVector)
   5291     return SDValue();
   5292   SDValue MaskLo, MaskHi, Lo, Hi;
   5293   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5294 
   5295   EVT LoVT, HiVT;
   5296   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
   5297 
   5298   SDValue Chain = MSC->getChain();
   5299 
   5300   EVT MemoryVT = MSC->getMemoryVT();
   5301   unsigned Alignment = MSC->getOriginalAlignment();
   5302 
   5303   EVT LoMemVT, HiMemVT;
   5304   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5305 
   5306   SDValue DataLo, DataHi;
   5307   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   5308 
   5309   SDValue BasePtr = MSC->getBasePtr();
   5310   SDValue IndexLo, IndexHi;
   5311   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
   5312 
   5313   MachineMemOperand *MMO = DAG.getMachineFunction().
   5314     getMachineMemOperand(MSC->getPointerInfo(),
   5315                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   5316                           Alignment, MSC->getAAInfo(), MSC->getRanges());
   5317 
   5318   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
   5319   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
   5320                             DL, OpsLo, MMO);
   5321 
   5322   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
   5323   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
   5324                             DL, OpsHi, MMO);
   5325 
   5326   AddToWorklist(Lo.getNode());
   5327   AddToWorklist(Hi.getNode());
   5328 
   5329   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   5330 }
   5331 
   5332 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   5333 
   5334   if (Level >= AfterLegalizeTypes)
   5335     return SDValue();
   5336 
   5337   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
   5338   SDValue Mask = MST->getMask();
   5339   SDValue Data  = MST->getValue();
   5340   SDLoc DL(N);
   5341 
   5342   // If the MSTORE data type requires splitting and the mask is provided by a
   5343   // SETCC, then split both nodes and its operands before legalization. This
   5344   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5345   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5346   if (Mask.getOpcode() == ISD::SETCC) {
   5347 
   5348     // Check if any splitting is required.
   5349     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
   5350         TargetLowering::TypeSplitVector)
   5351       return SDValue();
   5352 
   5353     SDValue MaskLo, MaskHi, Lo, Hi;
   5354     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5355 
   5356     EVT LoVT, HiVT;
   5357     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
   5358 
   5359     SDValue Chain = MST->getChain();
   5360     SDValue Ptr   = MST->getBasePtr();
   5361 
   5362     EVT MemoryVT = MST->getMemoryVT();
   5363     unsigned Alignment = MST->getOriginalAlignment();
   5364 
   5365     // if Alignment is equal to the vector size,
   5366     // take the half of it for the second part
   5367     unsigned SecondHalfAlignment =
   5368       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
   5369          Alignment/2 : Alignment;
   5370 
   5371     EVT LoMemVT, HiMemVT;
   5372     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5373 
   5374     SDValue DataLo, DataHi;
   5375     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   5376 
   5377     MachineMemOperand *MMO = DAG.getMachineFunction().
   5378       getMachineMemOperand(MST->getPointerInfo(),
   5379                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   5380                            Alignment, MST->getAAInfo(), MST->getRanges());
   5381 
   5382     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
   5383                             MST->isTruncatingStore());
   5384 
   5385     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   5386     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   5387                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
   5388 
   5389     MMO = DAG.getMachineFunction().
   5390       getMachineMemOperand(MST->getPointerInfo(),
   5391                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
   5392                            SecondHalfAlignment, MST->getAAInfo(),
   5393                            MST->getRanges());
   5394 
   5395     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
   5396                             MST->isTruncatingStore());
   5397 
   5398     AddToWorklist(Lo.getNode());
   5399     AddToWorklist(Hi.getNode());
   5400 
   5401     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   5402   }
   5403   return SDValue();
   5404 }
   5405 
   5406 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   5407 
   5408   if (Level >= AfterLegalizeTypes)
   5409     return SDValue();
   5410 
   5411   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
   5412   SDValue Mask = MGT->getMask();
   5413   SDLoc DL(N);
   5414 
   5415   // If the MGATHER result requires splitting and the mask is provided by a
   5416   // SETCC, then split both nodes and its operands before legalization. This
   5417   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5418   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5419 
   5420   if (Mask.getOpcode() != ISD::SETCC)
   5421     return SDValue();
   5422 
   5423   EVT VT = N->getValueType(0);
   5424 
   5425   // Check if any splitting is required.
   5426   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5427       TargetLowering::TypeSplitVector)
   5428     return SDValue();
   5429 
   5430   SDValue MaskLo, MaskHi, Lo, Hi;
   5431   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5432 
   5433   SDValue Src0 = MGT->getValue();
   5434   SDValue Src0Lo, Src0Hi;
   5435   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   5436 
   5437   EVT LoVT, HiVT;
   5438   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
   5439 
   5440   SDValue Chain = MGT->getChain();
   5441   EVT MemoryVT = MGT->getMemoryVT();
   5442   unsigned Alignment = MGT->getOriginalAlignment();
   5443 
   5444   EVT LoMemVT, HiMemVT;
   5445   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5446 
   5447   SDValue BasePtr = MGT->getBasePtr();
   5448   SDValue Index = MGT->getIndex();
   5449   SDValue IndexLo, IndexHi;
   5450   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
   5451 
   5452   MachineMemOperand *MMO = DAG.getMachineFunction().
   5453     getMachineMemOperand(MGT->getPointerInfo(),
   5454                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   5455                           Alignment, MGT->getAAInfo(), MGT->getRanges());
   5456 
   5457   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
   5458   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
   5459                             MMO);
   5460 
   5461   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
   5462   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
   5463                             MMO);
   5464 
   5465   AddToWorklist(Lo.getNode());
   5466   AddToWorklist(Hi.getNode());
   5467 
   5468   // Build a factor node to remember that this load is independent of the
   5469   // other one.
   5470   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   5471                       Hi.getValue(1));
   5472 
   5473   // Legalized the chain result - switch anything that used the old chain to
   5474   // use the new one.
   5475   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
   5476 
   5477   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5478 
   5479   SDValue RetOps[] = { GatherRes, Chain };
   5480   return DAG.getMergeValues(RetOps, DL);
   5481 }
   5482 
   5483 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   5484 
   5485   if (Level >= AfterLegalizeTypes)
   5486     return SDValue();
   5487 
   5488   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
   5489   SDValue Mask = MLD->getMask();
   5490   SDLoc DL(N);
   5491 
   5492   // If the MLOAD result requires splitting and the mask is provided by a
   5493   // SETCC, then split both nodes and its operands before legalization. This
   5494   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5495   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5496 
   5497   if (Mask.getOpcode() == ISD::SETCC) {
   5498     EVT VT = N->getValueType(0);
   5499 
   5500     // Check if any splitting is required.
   5501     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5502         TargetLowering::TypeSplitVector)
   5503       return SDValue();
   5504 
   5505     SDValue MaskLo, MaskHi, Lo, Hi;
   5506     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5507 
   5508     SDValue Src0 = MLD->getSrc0();
   5509     SDValue Src0Lo, Src0Hi;
   5510     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   5511 
   5512     EVT LoVT, HiVT;
   5513     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
   5514 
   5515     SDValue Chain = MLD->getChain();
   5516     SDValue Ptr   = MLD->getBasePtr();
   5517     EVT MemoryVT = MLD->getMemoryVT();
   5518     unsigned Alignment = MLD->getOriginalAlignment();
   5519 
   5520     // if Alignment is equal to the vector size,
   5521     // take the half of it for the second part
   5522     unsigned SecondHalfAlignment =
   5523       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
   5524          Alignment/2 : Alignment;
   5525 
   5526     EVT LoMemVT, HiMemVT;
   5527     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5528 
   5529     MachineMemOperand *MMO = DAG.getMachineFunction().
   5530     getMachineMemOperand(MLD->getPointerInfo(),
   5531                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   5532                          Alignment, MLD->getAAInfo(), MLD->getRanges());
   5533 
   5534     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
   5535                            ISD::NON_EXTLOAD);
   5536 
   5537     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   5538     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   5539                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
   5540 
   5541     MMO = DAG.getMachineFunction().
   5542     getMachineMemOperand(MLD->getPointerInfo(),
   5543                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
   5544                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
   5545 
   5546     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
   5547                            ISD::NON_EXTLOAD);
   5548 
   5549     AddToWorklist(Lo.getNode());
   5550     AddToWorklist(Hi.getNode());
   5551 
   5552     // Build a factor node to remember that this load is independent of the
   5553     // other one.
   5554     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   5555                         Hi.getValue(1));
   5556 
   5557     // Legalized the chain result - switch anything that used the old chain to
   5558     // use the new one.
   5559     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
   5560 
   5561     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5562 
   5563     SDValue RetOps[] = { LoadRes, Chain };
   5564     return DAG.getMergeValues(RetOps, DL);
   5565   }
   5566   return SDValue();
   5567 }
   5568 
   5569 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   5570   SDValue N0 = N->getOperand(0);
   5571   SDValue N1 = N->getOperand(1);
   5572   SDValue N2 = N->getOperand(2);
   5573   SDLoc DL(N);
   5574 
   5575   // Canonicalize integer abs.
   5576   // vselect (setg[te] X,  0),  X, -X ->
   5577   // vselect (setgt    X, -1),  X, -X ->
   5578   // vselect (setl[te] X,  0), -X,  X ->
   5579   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   5580   if (N0.getOpcode() == ISD::SETCC) {
   5581     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   5582     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   5583     bool isAbs = false;
   5584     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
   5585 
   5586     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   5587          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
   5588         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
   5589       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
   5590     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
   5591              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
   5592       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   5593 
   5594     if (isAbs) {
   5595       EVT VT = LHS.getValueType();
   5596       SDValue Shift = DAG.getNode(
   5597           ISD::SRA, DL, VT, LHS,
   5598           DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
   5599       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
   5600       AddToWorklist(Shift.getNode());
   5601       AddToWorklist(Add.getNode());
   5602       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
   5603     }
   5604   }
   5605 
   5606   if (SimplifySelectOps(N, N1, N2))
   5607     return SDValue(N, 0);  // Don't revisit N.
   5608 
   5609   // If the VSELECT result requires splitting and the mask is provided by a
   5610   // SETCC, then split both nodes and its operands before legalization. This
   5611   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5612   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5613   if (N0.getOpcode() == ISD::SETCC) {
   5614     EVT VT = N->getValueType(0);
   5615 
   5616     // Check if any splitting is required.
   5617     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5618         TargetLowering::TypeSplitVector)
   5619       return SDValue();
   5620 
   5621     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
   5622     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
   5623     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
   5624     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
   5625 
   5626     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
   5627     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
   5628 
   5629     // Add the new VSELECT nodes to the work list in case they need to be split
   5630     // again.
   5631     AddToWorklist(Lo.getNode());
   5632     AddToWorklist(Hi.getNode());
   5633 
   5634     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5635   }
   5636 
   5637   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
   5638   if (ISD::isBuildVectorAllOnes(N0.getNode()))
   5639     return N1;
   5640   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
   5641   if (ISD::isBuildVectorAllZeros(N0.getNode()))
   5642     return N2;
   5643 
   5644   // The ConvertSelectToConcatVector function is assuming both the above
   5645   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
   5646   // and addressed.
   5647   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   5648       N2.getOpcode() == ISD::CONCAT_VECTORS &&
   5649       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
   5650     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
   5651       return CV;
   5652   }
   5653 
   5654   return SDValue();
   5655 }
   5656 
   5657 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   5658   SDValue N0 = N->getOperand(0);
   5659   SDValue N1 = N->getOperand(1);
   5660   SDValue N2 = N->getOperand(2);
   5661   SDValue N3 = N->getOperand(3);
   5662   SDValue N4 = N->getOperand(4);
   5663   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
   5664 
   5665   // fold select_cc lhs, rhs, x, x, cc -> x
   5666   if (N2 == N3)
   5667     return N2;
   5668 
   5669   // Determine if the condition we're dealing with is constant
   5670   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   5671                               N0, N1, CC, SDLoc(N), false);
   5672   if (SCC.getNode()) {
   5673     AddToWorklist(SCC.getNode());
   5674 
   5675     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
   5676       if (!SCCC->isNullValue())
   5677         return N2;    // cond always true -> true val
   5678       else
   5679         return N3;    // cond always false -> false val
   5680     } else if (SCC->getOpcode() == ISD::UNDEF) {
   5681       // When the condition is UNDEF, just return the first operand. This is
   5682       // coherent the DAG creation, no setcc node is created in this case
   5683       return N2;
   5684     } else if (SCC.getOpcode() == ISD::SETCC) {
   5685       // Fold to a simpler select_cc
   5686       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
   5687                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
   5688                          SCC.getOperand(2));
   5689     }
   5690   }
   5691 
   5692   // If we can fold this based on the true/false value, do so.
   5693   if (SimplifySelectOps(N, N2, N3))
   5694     return SDValue(N, 0);  // Don't revisit N.
   5695 
   5696   // fold select_cc into other things, such as min/max/abs
   5697   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
   5698 }
   5699 
   5700 SDValue DAGCombiner::visitSETCC(SDNode *N) {
   5701   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
   5702                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
   5703                        SDLoc(N));
   5704 }
   5705 
   5706 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
   5707   SDValue LHS = N->getOperand(0);
   5708   SDValue RHS = N->getOperand(1);
   5709   SDValue Carry = N->getOperand(2);
   5710   SDValue Cond = N->getOperand(3);
   5711 
   5712   // If Carry is false, fold to a regular SETCC.
   5713   if (Carry.getOpcode() == ISD::CARRY_FALSE)
   5714     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
   5715 
   5716   return SDValue();
   5717 }
   5718 
   5719 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
   5720 /// a build_vector of constants.
   5721 /// This function is called by the DAGCombiner when visiting sext/zext/aext
   5722 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
   5723 /// Vector extends are not folded if operations are legal; this is to
   5724 /// avoid introducing illegal build_vector dag nodes.
   5725 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   5726                                          SelectionDAG &DAG, bool LegalTypes,
   5727                                          bool LegalOperations) {
   5728   unsigned Opcode = N->getOpcode();
   5729   SDValue N0 = N->getOperand(0);
   5730   EVT VT = N->getValueType(0);
   5731 
   5732   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
   5733          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
   5734          && "Expected EXTEND dag node in input!");
   5735 
   5736   // fold (sext c1) -> c1
   5737   // fold (zext c1) -> c1
   5738   // fold (aext c1) -> c1
   5739   if (isa<ConstantSDNode>(N0))
   5740     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
   5741 
   5742   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   5743   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
   5744   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
   5745   EVT SVT = VT.getScalarType();
   5746   if (!(VT.isVector() &&
   5747       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
   5748       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
   5749     return nullptr;
   5750 
   5751   // We can fold this node into a build_vector.
   5752   unsigned VTBits = SVT.getSizeInBits();
   5753   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
   5754   SmallVector<SDValue, 8> Elts;
   5755   unsigned NumElts = VT.getVectorNumElements();
   5756   SDLoc DL(N);
   5757 
   5758   for (unsigned i=0; i != NumElts; ++i) {
   5759     SDValue Op = N0->getOperand(i);
   5760     if (Op->getOpcode() == ISD::UNDEF) {
   5761       Elts.push_back(DAG.getUNDEF(SVT));
   5762       continue;
   5763     }
   5764 
   5765     SDLoc DL(Op);
   5766     // Get the constant value and if needed trunc it to the size of the type.
   5767     // Nodes like build_vector might have constants wider than the scalar type.
   5768     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
   5769     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
   5770       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
   5771     else
   5772       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
   5773   }
   5774 
   5775   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode();
   5776 }
   5777 
   5778 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
   5779 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
   5780 // transformation. Returns true if extension are possible and the above
   5781 // mentioned transformation is profitable.
   5782 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   5783                                     unsigned ExtOpc,
   5784                                     SmallVectorImpl<SDNode *> &ExtendNodes,
   5785                                     const TargetLowering &TLI) {
   5786   bool HasCopyToRegUses = false;
   5787   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
   5788   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
   5789                             UE = N0.getNode()->use_end();
   5790        UI != UE; ++UI) {
   5791     SDNode *User = *UI;
   5792     if (User == N)
   5793       continue;
   5794     if (UI.getUse().getResNo() != N0.getResNo())
   5795       continue;
   5796     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
   5797     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
   5798       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
   5799       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
   5800         // Sign bits will be lost after a zext.
   5801         return false;
   5802       bool Add = false;
   5803       for (unsigned i = 0; i != 2; ++i) {
   5804         SDValue UseOp = User->getOperand(i);
   5805         if (UseOp == N0)
   5806           continue;
   5807         if (!isa<ConstantSDNode>(UseOp))
   5808           return false;
   5809         Add = true;
   5810       }
   5811       if (Add)
   5812         ExtendNodes.push_back(User);
   5813       continue;
   5814     }
   5815     // If truncates aren't free and there are users we can't
   5816     // extend, it isn't worthwhile.
   5817     if (!isTruncFree)
   5818       return false;
   5819     // Remember if this value is live-out.
   5820     if (User->getOpcode() == ISD::CopyToReg)
   5821       HasCopyToRegUses = true;
   5822   }
   5823 
   5824   if (HasCopyToRegUses) {
   5825     bool BothLiveOut = false;
   5826     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   5827          UI != UE; ++UI) {
   5828       SDUse &Use = UI.getUse();
   5829       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
   5830         BothLiveOut = true;
   5831         break;
   5832       }
   5833     }
   5834     if (BothLiveOut)
   5835       // Both unextended and extended values are live out. There had better be
   5836       // a good reason for the transformation.
   5837       return ExtendNodes.size();
   5838   }
   5839   return true;
   5840 }
   5841 
   5842 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   5843                                   SDValue Trunc, SDValue ExtLoad, SDLoc DL,
   5844                                   ISD::NodeType ExtType) {
   5845   // Extend SetCC uses if necessary.
   5846   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
   5847     SDNode *SetCC = SetCCs[i];
   5848     SmallVector<SDValue, 4> Ops;
   5849 
   5850     for (unsigned j = 0; j != 2; ++j) {
   5851       SDValue SOp = SetCC->getOperand(j);
   5852       if (SOp == Trunc)
   5853         Ops.push_back(ExtLoad);
   5854       else
   5855         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
   5856     }
   5857 
   5858     Ops.push_back(SetCC->getOperand(2));
   5859     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
   5860   }
   5861 }
   5862 
   5863 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
   5864 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
   5865   SDValue N0 = N->getOperand(0);
   5866   EVT DstVT = N->getValueType(0);
   5867   EVT SrcVT = N0.getValueType();
   5868 
   5869   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
   5870           N->getOpcode() == ISD::ZERO_EXTEND) &&
   5871          "Unexpected node type (not an extend)!");
   5872 
   5873   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
   5874   // For example, on a target with legal v4i32, but illegal v8i32, turn:
   5875   //   (v8i32 (sext (v8i16 (load x))))
   5876   // into:
   5877   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
   5878   //                          (v4i32 (sextload (x + 16)))))
   5879   // Where uses of the original load, i.e.:
   5880   //   (v8i16 (load x))
   5881   // are replaced with:
   5882   //   (v8i16 (truncate
   5883   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
   5884   //                            (v4i32 (sextload (x + 16)))))))
   5885   //
   5886   // This combine is only applicable to illegal, but splittable, vectors.
   5887   // All legal types, and illegal non-vector types, are handled elsewhere.
   5888   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
   5889   //
   5890   if (N0->getOpcode() != ISD::LOAD)
   5891     return SDValue();
   5892 
   5893   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5894 
   5895   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
   5896       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
   5897       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
   5898     return SDValue();
   5899 
   5900   SmallVector<SDNode *, 4> SetCCs;
   5901   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
   5902     return SDValue();
   5903 
   5904   ISD::LoadExtType ExtType =
   5905       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
   5906 
   5907   // Try to split the vector types to get down to legal types.
   5908   EVT SplitSrcVT = SrcVT;
   5909   EVT SplitDstVT = DstVT;
   5910   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
   5911          SplitSrcVT.getVectorNumElements() > 1) {
   5912     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
   5913     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
   5914   }
   5915 
   5916   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
   5917     return SDValue();
   5918 
   5919   SDLoc DL(N);
   5920   const unsigned NumSplits =
   5921       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
   5922   const unsigned Stride = SplitSrcVT.getStoreSize();
   5923   SmallVector<SDValue, 4> Loads;
   5924   SmallVector<SDValue, 4> Chains;
   5925 
   5926   SDValue BasePtr = LN0->getBasePtr();
   5927   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
   5928     const unsigned Offset = Idx * Stride;
   5929     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
   5930 
   5931     SDValue SplitLoad = DAG.getExtLoad(
   5932         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
   5933         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
   5934         LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
   5935         Align, LN0->getAAInfo());
   5936 
   5937     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
   5938                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
   5939 
   5940     Loads.push_back(SplitLoad.getValue(0));
   5941     Chains.push_back(SplitLoad.getValue(1));
   5942   }
   5943 
   5944   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
   5945   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
   5946 
   5947   CombineTo(N, NewValue);
   5948 
   5949   // Replace uses of the original load (before extension)
   5950   // with a truncate of the concatenated sextloaded vectors.
   5951   SDValue Trunc =
   5952       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
   5953   CombineTo(N0.getNode(), Trunc, NewChain);
   5954   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
   5955                   (ISD::NodeType)N->getOpcode());
   5956   return SDValue(N, 0); // Return N so it doesn't get rechecked!
   5957 }
   5958 
   5959 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   5960   SDValue N0 = N->getOperand(0);
   5961   EVT VT = N->getValueType(0);
   5962 
   5963   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   5964                                               LegalOperations))
   5965     return SDValue(Res, 0);
   5966 
   5967   // fold (sext (sext x)) -> (sext x)
   5968   // fold (sext (aext x)) -> (sext x)
   5969   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   5970     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
   5971                        N0.getOperand(0));
   5972 
   5973   if (N0.getOpcode() == ISD::TRUNCATE) {
   5974     // fold (sext (truncate (load x))) -> (sext (smaller load x))
   5975     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
   5976     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   5977       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   5978       if (NarrowLoad.getNode() != N0.getNode()) {
   5979         CombineTo(N0.getNode(), NarrowLoad);
   5980         // CombineTo deleted the truncate, if needed, but not what's under it.
   5981         AddToWorklist(oye);
   5982       }
   5983       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   5984     }
   5985 
   5986     // See if the value being truncated is already sign extended.  If so, just
   5987     // eliminate the trunc/sext pair.
   5988     SDValue Op = N0.getOperand(0);
   5989     unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
   5990     unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
   5991     unsigned DestBits = VT.getScalarType().getSizeInBits();
   5992     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
   5993 
   5994     if (OpBits == DestBits) {
   5995       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
   5996       // bits, it is already ready.
   5997       if (NumSignBits > DestBits-MidBits)
   5998         return Op;
   5999     } else if (OpBits < DestBits) {
   6000       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
   6001       // bits, just sext from i32.
   6002       if (NumSignBits > OpBits-MidBits)
   6003         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
   6004     } else {
   6005       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
   6006       // bits, just truncate to i32.
   6007       if (NumSignBits > OpBits-MidBits)
   6008         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6009     }
   6010 
   6011     // fold (sext (truncate x)) -> (sextinreg x).
   6012     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
   6013                                                  N0.getValueType())) {
   6014       if (OpBits < DestBits)
   6015         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
   6016       else if (OpBits > DestBits)
   6017         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
   6018       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
   6019                          DAG.getValueType(N0.getValueType()));
   6020     }
   6021   }
   6022 
   6023   // fold (sext (load x)) -> (sext (truncate (sextload x)))
   6024   // Only generate vector extloads when 1) they're legal, and 2) they are
   6025   // deemed desirable by the target.
   6026   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6027       ((!LegalOperations && !VT.isVector() &&
   6028         !cast<LoadSDNode>(N0)->isVolatile()) ||
   6029        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
   6030     bool DoXform = true;
   6031     SmallVector<SDNode*, 4> SetCCs;
   6032     if (!N0.hasOneUse())
   6033       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
   6034     if (VT.isVector())
   6035       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
   6036     if (DoXform) {
   6037       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6038       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6039                                        LN0->getChain(),
   6040                                        LN0->getBasePtr(), N0.getValueType(),
   6041                                        LN0->getMemOperand());
   6042       CombineTo(N, ExtLoad);
   6043       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6044                                   N0.getValueType(), ExtLoad);
   6045       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6046       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6047                       ISD::SIGN_EXTEND);
   6048       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6049     }
   6050   }
   6051 
   6052   // fold (sext (load x)) to multiple smaller sextloads.
   6053   // Only on illegal but splittable vectors.
   6054   if (SDValue ExtLoad = CombineExtLoad(N))
   6055     return ExtLoad;
   6056 
   6057   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
   6058   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
   6059   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   6060       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   6061     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6062     EVT MemVT = LN0->getMemoryVT();
   6063     if ((!LegalOperations && !LN0->isVolatile()) ||
   6064         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
   6065       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6066                                        LN0->getChain(),
   6067                                        LN0->getBasePtr(), MemVT,
   6068                                        LN0->getMemOperand());
   6069       CombineTo(N, ExtLoad);
   6070       CombineTo(N0.getNode(),
   6071                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6072                             N0.getValueType(), ExtLoad),
   6073                 ExtLoad.getValue(1));
   6074       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6075     }
   6076   }
   6077 
   6078   // fold (sext (and/or/xor (load x), cst)) ->
   6079   //      (and/or/xor (sextload x), (sext cst))
   6080   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   6081        N0.getOpcode() == ISD::XOR) &&
   6082       isa<LoadSDNode>(N0.getOperand(0)) &&
   6083       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6084       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
   6085       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   6086     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   6087     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
   6088       bool DoXform = true;
   6089       SmallVector<SDNode*, 4> SetCCs;
   6090       if (!N0.hasOneUse())
   6091         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
   6092                                           SetCCs, TLI);
   6093       if (DoXform) {
   6094         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
   6095                                          LN0->getChain(), LN0->getBasePtr(),
   6096                                          LN0->getMemoryVT(),
   6097                                          LN0->getMemOperand());
   6098         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6099         Mask = Mask.sext(VT.getSizeInBits());
   6100         SDLoc DL(N);
   6101         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   6102                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   6103         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   6104                                     SDLoc(N0.getOperand(0)),
   6105                                     N0.getOperand(0).getValueType(), ExtLoad);
   6106         CombineTo(N, And);
   6107         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   6108         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
   6109                         ISD::SIGN_EXTEND);
   6110         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6111       }
   6112     }
   6113   }
   6114 
   6115   if (N0.getOpcode() == ISD::SETCC) {
   6116     EVT N0VT = N0.getOperand(0).getValueType();
   6117     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
   6118     // Only do this before legalize for now.
   6119     if (VT.isVector() && !LegalOperations &&
   6120         TLI.getBooleanContents(N0VT) ==
   6121             TargetLowering::ZeroOrNegativeOneBooleanContent) {
   6122       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
   6123       // of the same size as the compared operands. Only optimize sext(setcc())
   6124       // if this is the case.
   6125       EVT SVT = getSetCCResultType(N0VT);
   6126 
   6127       // We know that the # elements of the results is the same as the
   6128       // # elements of the compare (and the # elements of the compare result
   6129       // for that matter).  Check to see that they are the same size.  If so,
   6130       // we know that the element size of the sext'd result matches the
   6131       // element size of the compare operands.
   6132       if (VT.getSizeInBits() == SVT.getSizeInBits())
   6133         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   6134                              N0.getOperand(1),
   6135                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6136 
   6137       // If the desired elements are smaller or larger than the source
   6138       // elements we can use a matching integer vector type and then
   6139       // truncate/sign extend
   6140       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   6141       if (SVT == MatchingVectorType) {
   6142         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
   6143                                N0.getOperand(0), N0.getOperand(1),
   6144                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6145         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
   6146       }
   6147     }
   6148 
   6149     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
   6150     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
   6151     SDLoc DL(N);
   6152     SDValue NegOne =
   6153       DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
   6154     SDValue SCC =
   6155       SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
   6156                        NegOne, DAG.getConstant(0, DL, VT),
   6157                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   6158     if (SCC.getNode()) return SCC;
   6159 
   6160     if (!VT.isVector()) {
   6161       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
   6162       if (!LegalOperations ||
   6163           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
   6164         SDLoc DL(N);
   6165         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   6166         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
   6167                                      N0.getOperand(0), N0.getOperand(1), CC);
   6168         return DAG.getSelect(DL, VT, SetCC,
   6169                              NegOne, DAG.getConstant(0, DL, VT));
   6170       }
   6171     }
   6172   }
   6173 
   6174   // fold (sext x) -> (zext x) if the sign bit is known zero.
   6175   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
   6176       DAG.SignBitIsZero(N0))
   6177     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
   6178 
   6179   return SDValue();
   6180 }
   6181 
   6182 // isTruncateOf - If N is a truncate of some other value, return true, record
   6183 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
   6184 // This function computes KnownZero to avoid a duplicated call to
   6185 // computeKnownBits in the caller.
   6186 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   6187                          APInt &KnownZero) {
   6188   APInt KnownOne;
   6189   if (N->getOpcode() == ISD::TRUNCATE) {
   6190     Op = N->getOperand(0);
   6191     DAG.computeKnownBits(Op, KnownZero, KnownOne);
   6192     return true;
   6193   }
   6194 
   6195   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
   6196       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
   6197     return false;
   6198 
   6199   SDValue Op0 = N->getOperand(0);
   6200   SDValue Op1 = N->getOperand(1);
   6201   assert(Op0.getValueType() == Op1.getValueType());
   6202 
   6203   if (isNullConstant(Op0))
   6204     Op = Op1;
   6205   else if (isNullConstant(Op1))
   6206     Op = Op0;
   6207   else
   6208     return false;
   6209 
   6210   DAG.computeKnownBits(Op, KnownZero, KnownOne);
   6211 
   6212   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
   6213     return false;
   6214 
   6215   return true;
   6216 }
   6217 
   6218 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   6219   SDValue N0 = N->getOperand(0);
   6220   EVT VT = N->getValueType(0);
   6221 
   6222   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   6223                                               LegalOperations))
   6224     return SDValue(Res, 0);
   6225 
   6226   // fold (zext (zext x)) -> (zext x)
   6227   // fold (zext (aext x)) -> (zext x)
   6228   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   6229     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
   6230                        N0.getOperand(0));
   6231 
   6232   // fold (zext (truncate x)) -> (zext x) or
   6233   //      (zext (truncate x)) -> (truncate x)
   6234   // This is valid when the truncated bits of x are already zero.
   6235   // FIXME: We should extend this to work for vectors too.
   6236   SDValue Op;
   6237   APInt KnownZero;
   6238   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
   6239     APInt TruncatedBits =
   6240       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
   6241       APInt(Op.getValueSizeInBits(), 0) :
   6242       APInt::getBitsSet(Op.getValueSizeInBits(),
   6243                         N0.getValueSizeInBits(),
   6244                         std::min(Op.getValueSizeInBits(),
   6245                                  VT.getSizeInBits()));
   6246     if (TruncatedBits == (KnownZero & TruncatedBits)) {
   6247       if (VT.bitsGT(Op.getValueType()))
   6248         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
   6249       if (VT.bitsLT(Op.getValueType()))
   6250         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6251 
   6252       return Op;
   6253     }
   6254   }
   6255 
   6256   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   6257   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   6258   if (N0.getOpcode() == ISD::TRUNCATE) {
   6259     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6260       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   6261       if (NarrowLoad.getNode() != N0.getNode()) {
   6262         CombineTo(N0.getNode(), NarrowLoad);
   6263         // CombineTo deleted the truncate, if needed, but not what's under it.
   6264         AddToWorklist(oye);
   6265       }
   6266       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6267     }
   6268   }
   6269 
   6270   // fold (zext (truncate x)) -> (and x, mask)
   6271   if (N0.getOpcode() == ISD::TRUNCATE) {
   6272     // fold (zext (truncate (load x))) -> (zext (smaller load x))
   6273     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
   6274     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6275       SDNode *oye = N0.getNode()->getOperand(0).getNode();
   6276       if (NarrowLoad.getNode() != N0.getNode()) {
   6277         CombineTo(N0.getNode(), NarrowLoad);
   6278         // CombineTo deleted the truncate, if needed, but not what's under it.
   6279         AddToWorklist(oye);
   6280       }
   6281       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   6282     }
   6283 
   6284     EVT SrcVT = N0.getOperand(0).getValueType();
   6285     EVT MinVT = N0.getValueType();
   6286 
   6287     // Try to mask before the extension to avoid having to generate a larger mask,
   6288     // possibly over several sub-vectors.
   6289     if (SrcVT.bitsLT(VT)) {
   6290       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
   6291                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
   6292         SDValue Op = N0.getOperand(0);
   6293         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   6294         AddToWorklist(Op.getNode());
   6295         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   6296       }
   6297     }
   6298 
   6299     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
   6300       SDValue Op = N0.getOperand(0);
   6301       if (SrcVT.bitsLT(VT)) {
   6302         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
   6303         AddToWorklist(Op.getNode());
   6304       } else if (SrcVT.bitsGT(VT)) {
   6305         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6306         AddToWorklist(Op.getNode());
   6307       }
   6308       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   6309     }
   6310   }
   6311 
   6312   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
   6313   // if either of the casts is not free.
   6314   if (N0.getOpcode() == ISD::AND &&
   6315       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   6316       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6317       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   6318                            N0.getValueType()) ||
   6319        !TLI.isZExtFree(N0.getValueType(), VT))) {
   6320     SDValue X = N0.getOperand(0).getOperand(0);
   6321     if (X.getValueType().bitsLT(VT)) {
   6322       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
   6323     } else if (X.getValueType().bitsGT(VT)) {
   6324       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   6325     }
   6326     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6327     Mask = Mask.zext(VT.getSizeInBits());
   6328     SDLoc DL(N);
   6329     return DAG.getNode(ISD::AND, DL, VT,
   6330                        X, DAG.getConstant(Mask, DL, VT));
   6331   }
   6332 
   6333   // fold (zext (load x)) -> (zext (truncate (zextload x)))
   6334   // Only generate vector extloads when 1) they're legal, and 2) they are
   6335   // deemed desirable by the target.
   6336   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6337       ((!LegalOperations && !VT.isVector() &&
   6338         !cast<LoadSDNode>(N0)->isVolatile()) ||
   6339        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
   6340     bool DoXform = true;
   6341     SmallVector<SDNode*, 4> SetCCs;
   6342     if (!N0.hasOneUse())
   6343       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
   6344     if (VT.isVector())
   6345       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
   6346     if (DoXform) {
   6347       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6348       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   6349                                        LN0->getChain(),
   6350                                        LN0->getBasePtr(), N0.getValueType(),
   6351                                        LN0->getMemOperand());
   6352       CombineTo(N, ExtLoad);
   6353       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6354                                   N0.getValueType(), ExtLoad);
   6355       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6356 
   6357       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6358                       ISD::ZERO_EXTEND);
   6359       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6360     }
   6361   }
   6362 
   6363   // fold (zext (load x)) to multiple smaller zextloads.
   6364   // Only on illegal but splittable vectors.
   6365   if (SDValue ExtLoad = CombineExtLoad(N))
   6366     return ExtLoad;
   6367 
   6368   // fold (zext (and/or/xor (load x), cst)) ->
   6369   //      (and/or/xor (zextload x), (zext cst))
   6370   // Unless (and (load x) cst) will match as a zextload already and has
   6371   // additional users.
   6372   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   6373        N0.getOpcode() == ISD::XOR) &&
   6374       isa<LoadSDNode>(N0.getOperand(0)) &&
   6375       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6376       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
   6377       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   6378     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   6379     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
   6380       bool DoXform = true;
   6381       SmallVector<SDNode*, 4> SetCCs;
   6382       if (!N0.hasOneUse()) {
   6383         if (N0.getOpcode() == ISD::AND) {
   6384           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
   6385           auto NarrowLoad = false;
   6386           EVT LoadResultTy = AndC->getValueType(0);
   6387           EVT ExtVT, LoadedVT;
   6388           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
   6389                                NarrowLoad))
   6390             DoXform = false;
   6391         }
   6392         if (DoXform)
   6393           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
   6394                                             ISD::ZERO_EXTEND, SetCCs, TLI);
   6395       }
   6396       if (DoXform) {
   6397         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
   6398                                          LN0->getChain(), LN0->getBasePtr(),
   6399                                          LN0->getMemoryVT(),
   6400                                          LN0->getMemOperand());
   6401         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6402         Mask = Mask.zext(VT.getSizeInBits());
   6403         SDLoc DL(N);
   6404         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   6405                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   6406         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   6407                                     SDLoc(N0.getOperand(0)),
   6408                                     N0.getOperand(0).getValueType(), ExtLoad);
   6409         CombineTo(N, And);
   6410         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   6411         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
   6412                         ISD::ZERO_EXTEND);
   6413         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6414       }
   6415     }
   6416   }
   6417 
   6418   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
   6419   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
   6420   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   6421       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   6422     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6423     EVT MemVT = LN0->getMemoryVT();
   6424     if ((!LegalOperations && !LN0->isVolatile()) ||
   6425         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
   6426       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   6427                                        LN0->getChain(),
   6428                                        LN0->getBasePtr(), MemVT,
   6429                                        LN0->getMemOperand());
   6430       CombineTo(N, ExtLoad);
   6431       CombineTo(N0.getNode(),
   6432                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
   6433                             ExtLoad),
   6434                 ExtLoad.getValue(1));
   6435       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6436     }
   6437   }
   6438 
   6439   if (N0.getOpcode() == ISD::SETCC) {
   6440     if (!LegalOperations && VT.isVector() &&
   6441         N0.getValueType().getVectorElementType() == MVT::i1) {
   6442       EVT N0VT = N0.getOperand(0).getValueType();
   6443       if (getSetCCResultType(N0VT) == N0.getValueType())
   6444         return SDValue();
   6445 
   6446       // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
   6447       // Only do this before legalize for now.
   6448       EVT EltVT = VT.getVectorElementType();
   6449       SDLoc DL(N);
   6450       SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(),
   6451                                     DAG.getConstant(1, DL, EltVT));
   6452       if (VT.getSizeInBits() == N0VT.getSizeInBits())
   6453         // We know that the # elements of the results is the same as the
   6454         // # elements of the compare (and the # elements of the compare result
   6455         // for that matter).  Check to see that they are the same size.  If so,
   6456         // we know that the element size of the sext'd result matches the
   6457         // element size of the compare operands.
   6458         return DAG.getNode(ISD::AND, DL, VT,
   6459                            DAG.getSetCC(DL, VT, N0.getOperand(0),
   6460                                          N0.getOperand(1),
   6461                                  cast<CondCodeSDNode>(N0.getOperand(2))->get()),
   6462                            DAG.getNode(ISD::BUILD_VECTOR, DL, VT,
   6463                                        OneOps));
   6464 
   6465       // If the desired elements are smaller or larger than the source
   6466       // elements we can use a matching integer vector type and then
   6467       // truncate/sign extend
   6468       EVT MatchingElementType =
   6469         EVT::getIntegerVT(*DAG.getContext(),
   6470                           N0VT.getScalarType().getSizeInBits());
   6471       EVT MatchingVectorType =
   6472         EVT::getVectorVT(*DAG.getContext(), MatchingElementType,
   6473                          N0VT.getVectorNumElements());
   6474       SDValue VsetCC =
   6475         DAG.getSetCC(DL, MatchingVectorType, N0.getOperand(0),
   6476                       N0.getOperand(1),
   6477                       cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6478       return DAG.getNode(ISD::AND, DL, VT,
   6479                          DAG.getSExtOrTrunc(VsetCC, DL, VT),
   6480                          DAG.getNode(ISD::BUILD_VECTOR, DL, VT, OneOps));
   6481     }
   6482 
   6483     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   6484     SDLoc DL(N);
   6485     SDValue SCC =
   6486       SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
   6487                        DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
   6488                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   6489     if (SCC.getNode()) return SCC;
   6490   }
   6491 
   6492   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
   6493   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
   6494       isa<ConstantSDNode>(N0.getOperand(1)) &&
   6495       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
   6496       N0.hasOneUse()) {
   6497     SDValue ShAmt = N0.getOperand(1);
   6498     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
   6499     if (N0.getOpcode() == ISD::SHL) {
   6500       SDValue InnerZExt = N0.getOperand(0);
   6501       // If the original shl may be shifting out bits, do not perform this
   6502       // transformation.
   6503       unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
   6504         InnerZExt.getOperand(0).getValueType().getSizeInBits();
   6505       if (ShAmtVal > KnownZeroBits)
   6506         return SDValue();
   6507     }
   6508 
   6509     SDLoc DL(N);
   6510 
   6511     // Ensure that the shift amount is wide enough for the shifted value.
   6512     if (VT.getSizeInBits() >= 256)
   6513       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
   6514 
   6515     return DAG.getNode(N0.getOpcode(), DL, VT,
   6516                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
   6517                        ShAmt);
   6518   }
   6519 
   6520   return SDValue();
   6521 }
   6522 
   6523 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   6524   SDValue N0 = N->getOperand(0);
   6525   EVT VT = N->getValueType(0);
   6526 
   6527   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   6528                                               LegalOperations))
   6529     return SDValue(Res, 0);
   6530 
   6531   // fold (aext (aext x)) -> (aext x)
   6532   // fold (aext (zext x)) -> (zext x)
   6533   // fold (aext (sext x)) -> (sext x)
   6534   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
   6535       N0.getOpcode() == ISD::ZERO_EXTEND ||
   6536       N0.getOpcode() == ISD::SIGN_EXTEND)
   6537     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   6538 
   6539   // fold (aext (truncate (load x))) -> (aext (smaller load x))
   6540   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   6541   if (N0.getOpcode() == ISD::TRUNCATE) {
   6542     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6543       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   6544       if (NarrowLoad.getNode() != N0.getNode()) {
   6545         CombineTo(N0.getNode(), NarrowLoad);
   6546         // CombineTo deleted the truncate, if needed, but not what's under it.
   6547         AddToWorklist(oye);
   6548       }
   6549       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6550     }
   6551   }
   6552 
   6553   // fold (aext (truncate x))
   6554   if (N0.getOpcode() == ISD::TRUNCATE) {
   6555     SDValue TruncOp = N0.getOperand(0);
   6556     if (TruncOp.getValueType() == VT)
   6557       return TruncOp; // x iff x size == zext size.
   6558     if (TruncOp.getValueType().bitsGT(VT))
   6559       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
   6560     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
   6561   }
   6562 
   6563   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
   6564   // if the trunc is not free.
   6565   if (N0.getOpcode() == ISD::AND &&
   6566       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   6567       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6568       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   6569                           N0.getValueType())) {
   6570     SDValue X = N0.getOperand(0).getOperand(0);
   6571     if (X.getValueType().bitsLT(VT)) {
   6572       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
   6573     } else if (X.getValueType().bitsGT(VT)) {
   6574       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
   6575     }
   6576     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6577     Mask = Mask.zext(VT.getSizeInBits());
   6578     SDLoc DL(N);
   6579     return DAG.getNode(ISD::AND, DL, VT,
   6580                        X, DAG.getConstant(Mask, DL, VT));
   6581   }
   6582 
   6583   // fold (aext (load x)) -> (aext (truncate (extload x)))
   6584   // None of the supported targets knows how to perform load and any_ext
   6585   // on vectors in one instruction.  We only perform this transformation on
   6586   // scalars.
   6587   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   6588       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6589       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   6590     bool DoXform = true;
   6591     SmallVector<SDNode*, 4> SetCCs;
   6592     if (!N0.hasOneUse())
   6593       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
   6594     if (DoXform) {
   6595       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6596       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   6597                                        LN0->getChain(),
   6598                                        LN0->getBasePtr(), N0.getValueType(),
   6599                                        LN0->getMemOperand());
   6600       CombineTo(N, ExtLoad);
   6601       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6602                                   N0.getValueType(), ExtLoad);
   6603       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6604       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6605                       ISD::ANY_EXTEND);
   6606       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6607     }
   6608   }
   6609 
   6610   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
   6611   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
   6612   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
   6613   if (N0.getOpcode() == ISD::LOAD &&
   6614       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6615       N0.hasOneUse()) {
   6616     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6617     ISD::LoadExtType ExtType = LN0->getExtensionType();
   6618     EVT MemVT = LN0->getMemoryVT();
   6619     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
   6620       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
   6621                                        VT, LN0->getChain(), LN0->getBasePtr(),
   6622                                        MemVT, LN0->getMemOperand());
   6623       CombineTo(N, ExtLoad);
   6624       CombineTo(N0.getNode(),
   6625                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6626                             N0.getValueType(), ExtLoad),
   6627                 ExtLoad.getValue(1));
   6628       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6629     }
   6630   }
   6631 
   6632   if (N0.getOpcode() == ISD::SETCC) {
   6633     // For vectors:
   6634     // aext(setcc) -> vsetcc
   6635     // aext(setcc) -> truncate(vsetcc)
   6636     // aext(setcc) -> aext(vsetcc)
   6637     // Only do this before legalize for now.
   6638     if (VT.isVector() && !LegalOperations) {
   6639       EVT N0VT = N0.getOperand(0).getValueType();
   6640         // We know that the # elements of the results is the same as the
   6641         // # elements of the compare (and the # elements of the compare result
   6642         // for that matter).  Check to see that they are the same size.  If so,
   6643         // we know that the element size of the sext'd result matches the
   6644         // element size of the compare operands.
   6645       if (VT.getSizeInBits() == N0VT.getSizeInBits())
   6646         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   6647                              N0.getOperand(1),
   6648                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6649       // If the desired elements are smaller or larger than the source
   6650       // elements we can use a matching integer vector type and then
   6651       // truncate/any extend
   6652       else {
   6653         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   6654         SDValue VsetCC =
   6655           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
   6656                         N0.getOperand(1),
   6657                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6658         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
   6659       }
   6660     }
   6661 
   6662     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   6663     SDLoc DL(N);
   6664     SDValue SCC =
   6665       SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1),
   6666                        DAG.getConstant(1, DL, VT), DAG.getConstant(0, DL, VT),
   6667                        cast<CondCodeSDNode>(N0.getOperand(2))->get(), true);
   6668     if (SCC.getNode())
   6669       return SCC;
   6670   }
   6671 
   6672   return SDValue();
   6673 }
   6674 
   6675 /// See if the specified operand can be simplified with the knowledge that only
   6676 /// the bits specified by Mask are used.  If so, return the simpler operand,
   6677 /// otherwise return a null SDValue.
   6678 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
   6679   switch (V.getOpcode()) {
   6680   default: break;
   6681   case ISD::Constant: {
   6682     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
   6683     assert(CV && "Const value should be ConstSDNode.");
   6684     const APInt &CVal = CV->getAPIntValue();
   6685     APInt NewVal = CVal & Mask;
   6686     if (NewVal != CVal)
   6687       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
   6688     break;
   6689   }
   6690   case ISD::OR:
   6691   case ISD::XOR:
   6692     // If the LHS or RHS don't contribute bits to the or, drop them.
   6693     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
   6694       return V.getOperand(1);
   6695     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
   6696       return V.getOperand(0);
   6697     break;
   6698   case ISD::SRL:
   6699     // Only look at single-use SRLs.
   6700     if (!V.getNode()->hasOneUse())
   6701       break;
   6702     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
   6703       // See if we can recursively simplify the LHS.
   6704       unsigned Amt = RHSC->getZExtValue();
   6705 
   6706       // Watch out for shift count overflow though.
   6707       if (Amt >= Mask.getBitWidth()) break;
   6708       APInt NewMask = Mask << Amt;
   6709       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
   6710         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
   6711                            SimplifyLHS, V.getOperand(1));
   6712     }
   6713   }
   6714   return SDValue();
   6715 }
   6716 
   6717 /// If the result of a wider load is shifted to right of N  bits and then
   6718 /// truncated to a narrower type and where N is a multiple of number of bits of
   6719 /// the narrower type, transform it to a narrower load from address + N / num of
   6720 /// bits of new type. If the result is to be extended, also fold the extension
   6721 /// to form a extending load.
   6722 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   6723   unsigned Opc = N->getOpcode();
   6724 
   6725   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   6726   SDValue N0 = N->getOperand(0);
   6727   EVT VT = N->getValueType(0);
   6728   EVT ExtVT = VT;
   6729 
   6730   // This transformation isn't valid for vector loads.
   6731   if (VT.isVector())
   6732     return SDValue();
   6733 
   6734   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   6735   // extended to VT.
   6736   if (Opc == ISD::SIGN_EXTEND_INREG) {
   6737     ExtType = ISD::SEXTLOAD;
   6738     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   6739   } else if (Opc == ISD::SRL) {
   6740     // Another special-case: SRL is basically zero-extending a narrower value.
   6741     ExtType = ISD::ZEXTLOAD;
   6742     N0 = SDValue(N, 0);
   6743     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   6744     if (!N01) return SDValue();
   6745     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
   6746                               VT.getSizeInBits() - N01->getZExtValue());
   6747   }
   6748   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
   6749     return SDValue();
   6750 
   6751   unsigned EVTBits = ExtVT.getSizeInBits();
   6752 
   6753   // Do not generate loads of non-round integer types since these can
   6754   // be expensive (and would be wrong if the type is not byte sized).
   6755   if (!ExtVT.isRound())
   6756     return SDValue();
   6757 
   6758   unsigned ShAmt = 0;
   6759   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   6760     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   6761       ShAmt = N01->getZExtValue();
   6762       // Is the shift amount a multiple of size of VT?
   6763       if ((ShAmt & (EVTBits-1)) == 0) {
   6764         N0 = N0.getOperand(0);
   6765         // Is the load width a multiple of size of VT?
   6766         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
   6767           return SDValue();
   6768       }
   6769 
   6770       // At this point, we must have a load or else we can't do the transform.
   6771       if (!isa<LoadSDNode>(N0)) return SDValue();
   6772 
   6773       // Because a SRL must be assumed to *need* to zero-extend the high bits
   6774       // (as opposed to anyext the high bits), we can't combine the zextload
   6775       // lowering of SRL and an sextload.
   6776       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
   6777         return SDValue();
   6778 
   6779       // If the shift amount is larger than the input type then we're not
   6780       // accessing any of the loaded bytes.  If the load was a zextload/extload
   6781       // then the result of the shift+trunc is zero/undef (handled elsewhere).
   6782       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
   6783         return SDValue();
   6784     }
   6785   }
   6786 
   6787   // If the load is shifted left (and the result isn't shifted back right),
   6788   // we can fold the truncate through the shift.
   6789   unsigned ShLeftAmt = 0;
   6790   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   6791       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
   6792     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   6793       ShLeftAmt = N01->getZExtValue();
   6794       N0 = N0.getOperand(0);
   6795     }
   6796   }
   6797 
   6798   // If we haven't found a load, we can't narrow it.  Don't transform one with
   6799   // multiple uses, this would require adding a new load.
   6800   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
   6801     return SDValue();
   6802 
   6803   // Don't change the width of a volatile load.
   6804   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6805   if (LN0->isVolatile())
   6806     return SDValue();
   6807 
   6808   // Verify that we are actually reducing a load width here.
   6809   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
   6810     return SDValue();
   6811 
   6812   // For the transform to be legal, the load must produce only two values
   6813   // (the value loaded and the chain).  Don't transform a pre-increment
   6814   // load, for example, which produces an extra value.  Otherwise the
   6815   // transformation is not equivalent, and the downstream logic to replace
   6816   // uses gets things wrong.
   6817   if (LN0->getNumValues() > 2)
   6818     return SDValue();
   6819 
   6820   // If the load that we're shrinking is an extload and we're not just
   6821   // discarding the extension we can't simply shrink the load. Bail.
   6822   // TODO: It would be possible to merge the extensions in some cases.
   6823   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
   6824       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
   6825     return SDValue();
   6826 
   6827   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
   6828     return SDValue();
   6829 
   6830   EVT PtrType = N0.getOperand(1).getValueType();
   6831 
   6832   if (PtrType == MVT::Untyped || PtrType.isExtended())
   6833     // It's not possible to generate a constant of extended or untyped type.
   6834     return SDValue();
   6835 
   6836   // For big endian targets, we need to adjust the offset to the pointer to
   6837   // load the correct bytes.
   6838   if (DAG.getDataLayout().isBigEndian()) {
   6839     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
   6840     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
   6841     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
   6842   }
   6843 
   6844   uint64_t PtrOff = ShAmt / 8;
   6845   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   6846   SDLoc DL(LN0);
   6847   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
   6848                                PtrType, LN0->getBasePtr(),
   6849                                DAG.getConstant(PtrOff, DL, PtrType));
   6850   AddToWorklist(NewPtr.getNode());
   6851 
   6852   SDValue Load;
   6853   if (ExtType == ISD::NON_EXTLOAD)
   6854     Load =  DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
   6855                         LN0->getPointerInfo().getWithOffset(PtrOff),
   6856                         LN0->isVolatile(), LN0->isNonTemporal(),
   6857                         LN0->isInvariant(), NewAlign, LN0->getAAInfo());
   6858   else
   6859     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
   6860                           LN0->getPointerInfo().getWithOffset(PtrOff),
   6861                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   6862                           LN0->isInvariant(), NewAlign, LN0->getAAInfo());
   6863 
   6864   // Replace the old load's chain with the new load's chain.
   6865   WorklistRemover DeadNodes(*this);
   6866   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   6867 
   6868   // Shift the result left, if we've swallowed a left shift.
   6869   SDValue Result = Load;
   6870   if (ShLeftAmt != 0) {
   6871     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
   6872     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
   6873       ShImmTy = VT;
   6874     // If the shift amount is as large as the result size (but, presumably,
   6875     // no larger than the source) then the useful bits of the result are
   6876     // zero; we can't simply return the shortened shift, because the result
   6877     // of that operation is undefined.
   6878     SDLoc DL(N0);
   6879     if (ShLeftAmt >= VT.getSizeInBits())
   6880       Result = DAG.getConstant(0, DL, VT);
   6881     else
   6882       Result = DAG.getNode(ISD::SHL, DL, VT,
   6883                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   6884   }
   6885 
   6886   // Return the new loaded value.
   6887   return Result;
   6888 }
   6889 
   6890 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   6891   SDValue N0 = N->getOperand(0);
   6892   SDValue N1 = N->getOperand(1);
   6893   EVT VT = N->getValueType(0);
   6894   EVT EVT = cast<VTSDNode>(N1)->getVT();
   6895   unsigned VTBits = VT.getScalarType().getSizeInBits();
   6896   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
   6897 
   6898   if (N0.isUndef())
   6899     return DAG.getUNDEF(VT);
   6900 
   6901   // fold (sext_in_reg c1) -> c1
   6902   if (isConstantIntBuildVectorOrConstantInt(N0))
   6903     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
   6904 
   6905   // If the input is already sign extended, just drop the extension.
   6906   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
   6907     return N0;
   6908 
   6909   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
   6910   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
   6911       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
   6912     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   6913                        N0.getOperand(0), N1);
   6914 
   6915   // fold (sext_in_reg (sext x)) -> (sext x)
   6916   // fold (sext_in_reg (aext x)) -> (sext x)
   6917   // if x is small enough.
   6918   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
   6919     SDValue N00 = N0.getOperand(0);
   6920     if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
   6921         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
   6922       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   6923   }
   6924 
   6925   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   6926   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
   6927     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
   6928 
   6929   // fold operands of sext_in_reg based on knowledge that the top bits are not
   6930   // demanded.
   6931   if (SimplifyDemandedBits(SDValue(N, 0)))
   6932     return SDValue(N, 0);
   6933 
   6934   // fold (sext_in_reg (load x)) -> (smaller sextload x)
   6935   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
   6936   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   6937     return NarrowLoad;
   6938 
   6939   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
   6940   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
   6941   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   6942   if (N0.getOpcode() == ISD::SRL) {
   6943     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   6944       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
   6945         // We can turn this into an SRA iff the input to the SRL is already sign
   6946         // extended enough.
   6947         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
   6948         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
   6949           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
   6950                              N0.getOperand(0), N0.getOperand(1));
   6951       }
   6952   }
   6953 
   6954   // fold (sext_inreg (extload x)) -> (sextload x)
   6955   if (ISD::isEXTLoad(N0.getNode()) &&
   6956       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6957       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   6958       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   6959        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   6960     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6961     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6962                                      LN0->getChain(),
   6963                                      LN0->getBasePtr(), EVT,
   6964                                      LN0->getMemOperand());
   6965     CombineTo(N, ExtLoad);
   6966     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   6967     AddToWorklist(ExtLoad.getNode());
   6968     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6969   }
   6970   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
   6971   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6972       N0.hasOneUse() &&
   6973       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   6974       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   6975        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   6976     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6977     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6978                                      LN0->getChain(),
   6979                                      LN0->getBasePtr(), EVT,
   6980                                      LN0->getMemOperand());
   6981     CombineTo(N, ExtLoad);
   6982     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   6983     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6984   }
   6985 
   6986   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
   6987   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
   6988     SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   6989                                        N0.getOperand(1), false);
   6990     if (BSwap.getNode())
   6991       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   6992                          BSwap, N1);
   6993   }
   6994 
   6995   return SDValue();
   6996 }
   6997 
   6998 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
   6999   SDValue N0 = N->getOperand(0);
   7000   EVT VT = N->getValueType(0);
   7001 
   7002   if (N0.getOpcode() == ISD::UNDEF)
   7003     return DAG.getUNDEF(VT);
   7004 
   7005   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   7006                                               LegalOperations))
   7007     return SDValue(Res, 0);
   7008 
   7009   return SDValue();
   7010 }
   7011 
   7012 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   7013   SDValue N0 = N->getOperand(0);
   7014   EVT VT = N->getValueType(0);
   7015   bool isLE = DAG.getDataLayout().isLittleEndian();
   7016 
   7017   // noop truncate
   7018   if (N0.getValueType() == N->getValueType(0))
   7019     return N0;
   7020   // fold (truncate c1) -> c1
   7021   if (isConstantIntBuildVectorOrConstantInt(N0))
   7022     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   7023   // fold (truncate (truncate x)) -> (truncate x)
   7024   if (N0.getOpcode() == ISD::TRUNCATE)
   7025     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   7026   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
   7027   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
   7028       N0.getOpcode() == ISD::SIGN_EXTEND ||
   7029       N0.getOpcode() == ISD::ANY_EXTEND) {
   7030     if (N0.getOperand(0).getValueType().bitsLT(VT))
   7031       // if the source is smaller than the dest, we still need an extend
   7032       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   7033                          N0.getOperand(0));
   7034     if (N0.getOperand(0).getValueType().bitsGT(VT))
   7035       // if the source is larger than the dest, than we just need the truncate
   7036       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   7037     // if the source and dest are the same type, we can drop both the extend
   7038     // and the truncate.
   7039     return N0.getOperand(0);
   7040   }
   7041 
   7042   // Fold extract-and-trunc into a narrow extract. For example:
   7043   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   7044   //   i32 y = TRUNCATE(i64 x)
   7045   //        -- becomes --
   7046   //   v16i8 b = BITCAST (v2i64 val)
   7047   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
   7048   //
   7049   // Note: We only run this optimization after type legalization (which often
   7050   // creates this pattern) and before operation legalization after which
   7051   // we need to be more careful about the vector instructions that we generate.
   7052   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   7053       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
   7054 
   7055     EVT VecTy = N0.getOperand(0).getValueType();
   7056     EVT ExTy = N0.getValueType();
   7057     EVT TrTy = N->getValueType(0);
   7058 
   7059     unsigned NumElem = VecTy.getVectorNumElements();
   7060     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
   7061 
   7062     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
   7063     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
   7064 
   7065     SDValue EltNo = N0->getOperand(1);
   7066     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
   7067       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   7068       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   7069       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
   7070 
   7071       SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N),
   7072                               NVT, N0.getOperand(0));
   7073 
   7074       SDLoc DL(N);
   7075       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT,
   7076                          DL, TrTy, V,
   7077                          DAG.getConstant(Index, DL, IndexTy));
   7078     }
   7079   }
   7080 
   7081   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
   7082   if (N0.getOpcode() == ISD::SELECT) {
   7083     EVT SrcVT = N0.getValueType();
   7084     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
   7085         TLI.isTruncateFree(SrcVT, VT)) {
   7086       SDLoc SL(N0);
   7087       SDValue Cond = N0.getOperand(0);
   7088       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
   7089       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
   7090       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
   7091     }
   7092   }
   7093 
   7094   // Fold a series of buildvector, bitcast, and truncate if possible.
   7095   // For example fold
   7096   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
   7097   //   (2xi32 (buildvector x, y)).
   7098   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
   7099       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   7100       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
   7101       N0.getOperand(0).hasOneUse()) {
   7102 
   7103     SDValue BuildVect = N0.getOperand(0);
   7104     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
   7105     EVT TruncVecEltTy = VT.getVectorElementType();
   7106 
   7107     // Check that the element types match.
   7108     if (BuildVectEltTy == TruncVecEltTy) {
   7109       // Now we only need to compute the offset of the truncated elements.
   7110       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
   7111       unsigned TruncVecNumElts = VT.getVectorNumElements();
   7112       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
   7113 
   7114       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
   7115              "Invalid number of elements");
   7116 
   7117       SmallVector<SDValue, 8> Opnds;
   7118       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
   7119         Opnds.push_back(BuildVect.getOperand(i));
   7120 
   7121       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   7122     }
   7123   }
   7124 
   7125   // See if we can simplify the input to this truncate through knowledge that
   7126   // only the low bits are being used.
   7127   // For example "trunc (or (shl x, 8), y)" // -> trunc y
   7128   // Currently we only perform this optimization on scalars because vectors
   7129   // may have different active low bits.
   7130   if (!VT.isVector()) {
   7131     SDValue Shorter =
   7132       GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
   7133                                                VT.getSizeInBits()));
   7134     if (Shorter.getNode())
   7135       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
   7136   }
   7137   // fold (truncate (load x)) -> (smaller load x)
   7138   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   7139   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
   7140     if (SDValue Reduced = ReduceLoadWidth(N))
   7141       return Reduced;
   7142 
   7143     // Handle the case where the load remains an extending load even
   7144     // after truncation.
   7145     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
   7146       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7147       if (!LN0->isVolatile() &&
   7148           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
   7149         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
   7150                                          VT, LN0->getChain(), LN0->getBasePtr(),
   7151                                          LN0->getMemoryVT(),
   7152                                          LN0->getMemOperand());
   7153         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
   7154         return NewLoad;
   7155       }
   7156     }
   7157   }
   7158   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
   7159   // where ... are all 'undef'.
   7160   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
   7161     SmallVector<EVT, 8> VTs;
   7162     SDValue V;
   7163     unsigned Idx = 0;
   7164     unsigned NumDefs = 0;
   7165 
   7166     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
   7167       SDValue X = N0.getOperand(i);
   7168       if (X.getOpcode() != ISD::UNDEF) {
   7169         V = X;
   7170         Idx = i;
   7171         NumDefs++;
   7172       }
   7173       // Stop if more than one members are non-undef.
   7174       if (NumDefs > 1)
   7175         break;
   7176       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
   7177                                      VT.getVectorElementType(),
   7178                                      X.getValueType().getVectorNumElements()));
   7179     }
   7180 
   7181     if (NumDefs == 0)
   7182       return DAG.getUNDEF(VT);
   7183 
   7184     if (NumDefs == 1) {
   7185       assert(V.getNode() && "The single defined operand is empty!");
   7186       SmallVector<SDValue, 8> Opnds;
   7187       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
   7188         if (i != Idx) {
   7189           Opnds.push_back(DAG.getUNDEF(VTs[i]));
   7190           continue;
   7191         }
   7192         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
   7193         AddToWorklist(NV.getNode());
   7194         Opnds.push_back(NV);
   7195       }
   7196       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
   7197     }
   7198   }
   7199 
   7200   // Simplify the operands using demanded-bits information.
   7201   if (!VT.isVector() &&
   7202       SimplifyDemandedBits(SDValue(N, 0)))
   7203     return SDValue(N, 0);
   7204 
   7205   return SDValue();
   7206 }
   7207 
   7208 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
   7209   SDValue Elt = N->getOperand(i);
   7210   if (Elt.getOpcode() != ISD::MERGE_VALUES)
   7211     return Elt.getNode();
   7212   return Elt.getOperand(Elt.getResNo()).getNode();
   7213 }
   7214 
   7215 /// build_pair (load, load) -> load
   7216 /// if load locations are consecutive.
   7217 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   7218   assert(N->getOpcode() == ISD::BUILD_PAIR);
   7219 
   7220   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   7221   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   7222   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
   7223       LD1->getAddressSpace() != LD2->getAddressSpace())
   7224     return SDValue();
   7225   EVT LD1VT = LD1->getValueType(0);
   7226 
   7227   if (ISD::isNON_EXTLoad(LD2) &&
   7228       LD2->hasOneUse() &&
   7229       // If both are volatile this would reduce the number of volatile loads.
   7230       // If one is volatile it might be ok, but play conservative and bail out.
   7231       !LD1->isVolatile() &&
   7232       !LD2->isVolatile() &&
   7233       DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
   7234     unsigned Align = LD1->getAlignment();
   7235     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   7236         VT.getTypeForEVT(*DAG.getContext()));
   7237 
   7238     if (NewAlign <= Align &&
   7239         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
   7240       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
   7241                          LD1->getBasePtr(), LD1->getPointerInfo(),
   7242                          false, false, false, Align);
   7243   }
   7244 
   7245   return SDValue();
   7246 }
   7247 
   7248 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
   7249   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
   7250   // and Lo parts; on big-endian machines it doesn't.
   7251   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
   7252 }
   7253 
   7254 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   7255   SDValue N0 = N->getOperand(0);
   7256   EVT VT = N->getValueType(0);
   7257 
   7258   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   7259   // Only do this before legalize, since afterward the target may be depending
   7260   // on the bitconvert.
   7261   // First check to see if this is all constant.
   7262   if (!LegalTypes &&
   7263       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
   7264       VT.isVector()) {
   7265     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
   7266 
   7267     EVT DestEltVT = N->getValueType(0).getVectorElementType();
   7268     assert(!DestEltVT.isVector() &&
   7269            "Element type of vector ValueType must not be vector!");
   7270     if (isSimple)
   7271       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   7272   }
   7273 
   7274   // If the input is a constant, let getNode fold it.
   7275   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
   7276     // If we can't allow illegal operations, we need to check that this is just
   7277     // a fp -> int or int -> conversion and that the resulting operation will
   7278     // be legal.
   7279     if (!LegalOperations ||
   7280         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
   7281          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
   7282         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
   7283          TLI.isOperationLegal(ISD::Constant, VT)))
   7284       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0);
   7285   }
   7286 
   7287   // (conv (conv x, t1), t2) -> (conv x, t2)
   7288   if (N0.getOpcode() == ISD::BITCAST)
   7289     return DAG.getNode(ISD::BITCAST, SDLoc(N), VT,
   7290                        N0.getOperand(0));
   7291 
   7292   // fold (conv (load x)) -> (load (conv*)x)
   7293   // If the resultant load doesn't need a higher alignment than the original!
   7294   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   7295       // Do not change the width of a volatile load.
   7296       !cast<LoadSDNode>(N0)->isVolatile() &&
   7297       // Do not remove the cast if the types differ in endian layout.
   7298       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
   7299           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
   7300       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
   7301       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
   7302     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7303     unsigned Align = DAG.getDataLayout().getABITypeAlignment(
   7304         VT.getTypeForEVT(*DAG.getContext()));
   7305     unsigned OrigAlign = LN0->getAlignment();
   7306 
   7307     if (Align <= OrigAlign) {
   7308       SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
   7309                                  LN0->getBasePtr(), LN0->getPointerInfo(),
   7310                                  LN0->isVolatile(), LN0->isNonTemporal(),
   7311                                  LN0->isInvariant(), OrigAlign,
   7312                                  LN0->getAAInfo());
   7313       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   7314       return Load;
   7315     }
   7316   }
   7317 
   7318   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   7319   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   7320   //
   7321   // For ppc_fp128:
   7322   // fold (bitcast (fneg x)) ->
   7323   //     flipbit = signbit
   7324   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   7325   // fold (bitcast (fabs x)) ->
   7326   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
   7327   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   7328   // This often reduces constant pool loads.
   7329   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
   7330        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
   7331       N0.getNode()->hasOneUse() && VT.isInteger() &&
   7332       !VT.isVector() && !N0.getValueType().isVector()) {
   7333     SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT,
   7334                                   N0.getOperand(0));
   7335     AddToWorklist(NewConv.getNode());
   7336 
   7337     SDLoc DL(N);
   7338     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   7339       assert(VT.getSizeInBits() == 128);
   7340       SDValue SignBit = DAG.getConstant(
   7341           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
   7342       SDValue FlipBit;
   7343       if (N0.getOpcode() == ISD::FNEG) {
   7344         FlipBit = SignBit;
   7345         AddToWorklist(FlipBit.getNode());
   7346       } else {
   7347         assert(N0.getOpcode() == ISD::FABS);
   7348         SDValue Hi =
   7349             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
   7350                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   7351                                               SDLoc(NewConv)));
   7352         AddToWorklist(Hi.getNode());
   7353         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
   7354         AddToWorklist(FlipBit.getNode());
   7355       }
   7356       SDValue FlipBits =
   7357           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   7358       AddToWorklist(FlipBits.getNode());
   7359       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
   7360     }
   7361     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   7362     if (N0.getOpcode() == ISD::FNEG)
   7363       return DAG.getNode(ISD::XOR, DL, VT,
   7364                          NewConv, DAG.getConstant(SignBit, DL, VT));
   7365     assert(N0.getOpcode() == ISD::FABS);
   7366     return DAG.getNode(ISD::AND, DL, VT,
   7367                        NewConv, DAG.getConstant(~SignBit, DL, VT));
   7368   }
   7369 
   7370   // fold (bitconvert (fcopysign cst, x)) ->
   7371   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   7372   // Note that we don't handle (copysign x, cst) because this can always be
   7373   // folded to an fneg or fabs.
   7374   //
   7375   // For ppc_fp128:
   7376   // fold (bitcast (fcopysign cst, x)) ->
   7377   //     flipbit = (and (extract_element
   7378   //                     (xor (bitcast cst), (bitcast x)), 0),
   7379   //                    signbit)
   7380   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
   7381   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
   7382       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
   7383       VT.isInteger() && !VT.isVector()) {
   7384     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
   7385     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
   7386     if (isTypeLegal(IntXVT)) {
   7387       SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0),
   7388                               IntXVT, N0.getOperand(1));
   7389       AddToWorklist(X.getNode());
   7390 
   7391       // If X has a different width than the result/lhs, sext it or truncate it.
   7392       unsigned VTWidth = VT.getSizeInBits();
   7393       if (OrigXWidth < VTWidth) {
   7394         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
   7395         AddToWorklist(X.getNode());
   7396       } else if (OrigXWidth > VTWidth) {
   7397         // To get the sign bit in the right place, we have to shift it right
   7398         // before truncating.
   7399         SDLoc DL(X);
   7400         X = DAG.getNode(ISD::SRL, DL,
   7401                         X.getValueType(), X,
   7402                         DAG.getConstant(OrigXWidth-VTWidth, DL,
   7403                                         X.getValueType()));
   7404         AddToWorklist(X.getNode());
   7405         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   7406         AddToWorklist(X.getNode());
   7407       }
   7408 
   7409       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   7410         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
   7411         SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT,
   7412                                   N0.getOperand(0));
   7413         AddToWorklist(Cst.getNode());
   7414         SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT,
   7415                                 N0.getOperand(1));
   7416         AddToWorklist(X.getNode());
   7417         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
   7418         AddToWorklist(XorResult.getNode());
   7419         SDValue XorResult64 = DAG.getNode(
   7420             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
   7421             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   7422                                   SDLoc(XorResult)));
   7423         AddToWorklist(XorResult64.getNode());
   7424         SDValue FlipBit =
   7425             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
   7426                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
   7427         AddToWorklist(FlipBit.getNode());
   7428         SDValue FlipBits =
   7429             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   7430         AddToWorklist(FlipBits.getNode());
   7431         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
   7432       }
   7433       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   7434       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
   7435                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
   7436       AddToWorklist(X.getNode());
   7437 
   7438       SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0),
   7439                                 VT, N0.getOperand(0));
   7440       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
   7441                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
   7442       AddToWorklist(Cst.getNode());
   7443 
   7444       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
   7445     }
   7446   }
   7447 
   7448   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
   7449   if (N0.getOpcode() == ISD::BUILD_PAIR)
   7450     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
   7451       return CombineLD;
   7452 
   7453   // Remove double bitcasts from shuffles - this is often a legacy of
   7454   // XformToShuffleWithZero being used to combine bitmaskings (of
   7455   // float vectors bitcast to integer vectors) into shuffles.
   7456   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
   7457   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
   7458       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
   7459       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
   7460       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
   7461     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
   7462 
   7463     // If operands are a bitcast, peek through if it casts the original VT.
   7464     // If operands are a constant, just bitcast back to original VT.
   7465     auto PeekThroughBitcast = [&](SDValue Op) {
   7466       if (Op.getOpcode() == ISD::BITCAST &&
   7467           Op.getOperand(0).getValueType() == VT)
   7468         return SDValue(Op.getOperand(0));
   7469       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
   7470           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
   7471         return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op);
   7472       return SDValue();
   7473     };
   7474 
   7475     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
   7476     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
   7477     if (!(SV0 && SV1))
   7478       return SDValue();
   7479 
   7480     int MaskScale =
   7481         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
   7482     SmallVector<int, 8> NewMask;
   7483     for (int M : SVN->getMask())
   7484       for (int i = 0; i != MaskScale; ++i)
   7485         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
   7486 
   7487     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   7488     if (!LegalMask) {
   7489       std::swap(SV0, SV1);
   7490       ShuffleVectorSDNode::commuteMask(NewMask);
   7491       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   7492     }
   7493 
   7494     if (LegalMask)
   7495       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
   7496   }
   7497 
   7498   return SDValue();
   7499 }
   7500 
   7501 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   7502   EVT VT = N->getValueType(0);
   7503   return CombineConsecutiveLoads(N, VT);
   7504 }
   7505 
   7506 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
   7507 /// operands. DstEltVT indicates the destination element value type.
   7508 SDValue DAGCombiner::
   7509 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   7510   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
   7511 
   7512   // If this is already the right type, we're done.
   7513   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
   7514 
   7515   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
   7516   unsigned DstBitSize = DstEltVT.getSizeInBits();
   7517 
   7518   // If this is a conversion of N elements of one type to N elements of another
   7519   // type, convert each element.  This handles FP<->INT cases.
   7520   if (SrcBitSize == DstBitSize) {
   7521     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   7522                               BV->getValueType(0).getVectorNumElements());
   7523 
   7524     // Due to the FP element handling below calling this routine recursively,
   7525     // we can end up with a scalar-to-vector node here.
   7526     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
   7527       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
   7528                          DAG.getNode(ISD::BITCAST, SDLoc(BV),
   7529                                      DstEltVT, BV->getOperand(0)));
   7530 
   7531     SmallVector<SDValue, 8> Ops;
   7532     for (SDValue Op : BV->op_values()) {
   7533       // If the vector element type is not legal, the BUILD_VECTOR operands
   7534       // are promoted and implicitly truncated.  Make that explicit here.
   7535       if (Op.getValueType() != SrcEltVT)
   7536         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
   7537       Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV),
   7538                                 DstEltVT, Op));
   7539       AddToWorklist(Ops.back().getNode());
   7540     }
   7541     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops);
   7542   }
   7543 
   7544   // Otherwise, we're growing or shrinking the elements.  To avoid having to
   7545   // handle annoying details of growing/shrinking FP values, we convert them to
   7546   // int first.
   7547   if (SrcEltVT.isFloatingPoint()) {
   7548     // Convert the input float vector to a int vector where the elements are the
   7549     // same sizes.
   7550     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
   7551     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
   7552     SrcEltVT = IntVT;
   7553   }
   7554 
   7555   // Now we know the input is an integer vector.  If the output is a FP type,
   7556   // convert to integer first, then to FP of the right size.
   7557   if (DstEltVT.isFloatingPoint()) {
   7558     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
   7559     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
   7560 
   7561     // Next, convert to FP elements of the same size.
   7562     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   7563   }
   7564 
   7565   SDLoc DL(BV);
   7566 
   7567   // Okay, we know the src/dst types are both integers of differing types.
   7568   // Handling growing first.
   7569   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
   7570   if (SrcBitSize < DstBitSize) {
   7571     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
   7572 
   7573     SmallVector<SDValue, 8> Ops;
   7574     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
   7575          i += NumInputsPerOutput) {
   7576       bool isLE = DAG.getDataLayout().isLittleEndian();
   7577       APInt NewBits = APInt(DstBitSize, 0);
   7578       bool EltIsUndef = true;
   7579       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
   7580         // Shift the previously computed bits over.
   7581         NewBits <<= SrcBitSize;
   7582         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
   7583         if (Op.getOpcode() == ISD::UNDEF) continue;
   7584         EltIsUndef = false;
   7585 
   7586         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
   7587                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
   7588       }
   7589 
   7590       if (EltIsUndef)
   7591         Ops.push_back(DAG.getUNDEF(DstEltVT));
   7592       else
   7593         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
   7594     }
   7595 
   7596     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
   7597     return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   7598   }
   7599 
   7600   // Finally, this must be the case where we are shrinking elements: each input
   7601   // turns into multiple outputs.
   7602   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   7603   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   7604                             NumOutputsPerInput*BV->getNumOperands());
   7605   SmallVector<SDValue, 8> Ops;
   7606 
   7607   for (const SDValue &Op : BV->op_values()) {
   7608     if (Op.getOpcode() == ISD::UNDEF) {
   7609       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
   7610       continue;
   7611     }
   7612 
   7613     APInt OpVal = cast<ConstantSDNode>(Op)->
   7614                   getAPIntValue().zextOrTrunc(SrcBitSize);
   7615 
   7616     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
   7617       APInt ThisVal = OpVal.trunc(DstBitSize);
   7618       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
   7619       OpVal = OpVal.lshr(DstBitSize);
   7620     }
   7621 
   7622     // For big endian targets, swap the order of the pieces of each element.
   7623     if (DAG.getDataLayout().isBigEndian())
   7624       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   7625   }
   7626 
   7627   return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Ops);
   7628 }
   7629 
   7630 /// Try to perform FMA combining on a given FADD node.
   7631 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   7632   SDValue N0 = N->getOperand(0);
   7633   SDValue N1 = N->getOperand(1);
   7634   EVT VT = N->getValueType(0);
   7635   SDLoc SL(N);
   7636 
   7637   const TargetOptions &Options = DAG.getTarget().Options;
   7638   bool AllowFusion =
   7639       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   7640 
   7641   // Floating-point multiply-add with intermediate rounding.
   7642   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   7643 
   7644   // Floating-point multiply-add without intermediate rounding.
   7645   bool HasFMA =
   7646       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   7647       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   7648 
   7649   // No valid opcode, do not combine.
   7650   if (!HasFMAD && !HasFMA)
   7651     return SDValue();
   7652 
   7653   // Always prefer FMAD to FMA for precision.
   7654   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   7655   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   7656   bool LookThroughFPExt = TLI.isFPExtFree(VT);
   7657 
   7658   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
   7659   // prefer to fold the multiply with fewer uses.
   7660   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
   7661       N1.getOpcode() == ISD::FMUL) {
   7662     if (N0.getNode()->use_size() > N1.getNode()->use_size())
   7663       std::swap(N0, N1);
   7664   }
   7665 
   7666   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   7667   if (N0.getOpcode() == ISD::FMUL &&
   7668       (Aggressive || N0->hasOneUse())) {
   7669     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7670                        N0.getOperand(0), N0.getOperand(1), N1);
   7671   }
   7672 
   7673   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   7674   // Note: Commutes FADD operands.
   7675   if (N1.getOpcode() == ISD::FMUL &&
   7676       (Aggressive || N1->hasOneUse())) {
   7677     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7678                        N1.getOperand(0), N1.getOperand(1), N0);
   7679   }
   7680 
   7681   // Look through FP_EXTEND nodes to do more combining.
   7682   if (AllowFusion && LookThroughFPExt) {
   7683     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
   7684     if (N0.getOpcode() == ISD::FP_EXTEND) {
   7685       SDValue N00 = N0.getOperand(0);
   7686       if (N00.getOpcode() == ISD::FMUL)
   7687         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7688                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7689                                        N00.getOperand(0)),
   7690                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7691                                        N00.getOperand(1)), N1);
   7692     }
   7693 
   7694     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
   7695     // Note: Commutes FADD operands.
   7696     if (N1.getOpcode() == ISD::FP_EXTEND) {
   7697       SDValue N10 = N1.getOperand(0);
   7698       if (N10.getOpcode() == ISD::FMUL)
   7699         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7700                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7701                                        N10.getOperand(0)),
   7702                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7703                                        N10.getOperand(1)), N0);
   7704     }
   7705   }
   7706 
   7707   // More folding opportunities when target permits.
   7708   if ((AllowFusion || HasFMAD)  && Aggressive) {
   7709     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
   7710     if (N0.getOpcode() == PreferredFusedOpcode &&
   7711         N0.getOperand(2).getOpcode() == ISD::FMUL) {
   7712       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7713                          N0.getOperand(0), N0.getOperand(1),
   7714                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7715                                      N0.getOperand(2).getOperand(0),
   7716                                      N0.getOperand(2).getOperand(1),
   7717                                      N1));
   7718     }
   7719 
   7720     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
   7721     if (N1->getOpcode() == PreferredFusedOpcode &&
   7722         N1.getOperand(2).getOpcode() == ISD::FMUL) {
   7723       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7724                          N1.getOperand(0), N1.getOperand(1),
   7725                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7726                                      N1.getOperand(2).getOperand(0),
   7727                                      N1.getOperand(2).getOperand(1),
   7728                                      N0));
   7729     }
   7730 
   7731     if (AllowFusion && LookThroughFPExt) {
   7732       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
   7733       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
   7734       auto FoldFAddFMAFPExtFMul = [&] (
   7735           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
   7736         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
   7737                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   7738                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   7739                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   7740                                        Z));
   7741       };
   7742       if (N0.getOpcode() == PreferredFusedOpcode) {
   7743         SDValue N02 = N0.getOperand(2);
   7744         if (N02.getOpcode() == ISD::FP_EXTEND) {
   7745           SDValue N020 = N02.getOperand(0);
   7746           if (N020.getOpcode() == ISD::FMUL)
   7747             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
   7748                                         N020.getOperand(0), N020.getOperand(1),
   7749                                         N1);
   7750         }
   7751       }
   7752 
   7753       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
   7754       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
   7755       // FIXME: This turns two single-precision and one double-precision
   7756       // operation into two double-precision operations, which might not be
   7757       // interesting for all targets, especially GPUs.
   7758       auto FoldFAddFPExtFMAFMul = [&] (
   7759           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
   7760         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7761                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
   7762                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
   7763                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   7764                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   7765                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   7766                                        Z));
   7767       };
   7768       if (N0.getOpcode() == ISD::FP_EXTEND) {
   7769         SDValue N00 = N0.getOperand(0);
   7770         if (N00.getOpcode() == PreferredFusedOpcode) {
   7771           SDValue N002 = N00.getOperand(2);
   7772           if (N002.getOpcode() == ISD::FMUL)
   7773             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
   7774                                         N002.getOperand(0), N002.getOperand(1),
   7775                                         N1);
   7776         }
   7777       }
   7778 
   7779       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
   7780       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
   7781       if (N1.getOpcode() == PreferredFusedOpcode) {
   7782         SDValue N12 = N1.getOperand(2);
   7783         if (N12.getOpcode() == ISD::FP_EXTEND) {
   7784           SDValue N120 = N12.getOperand(0);
   7785           if (N120.getOpcode() == ISD::FMUL)
   7786             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
   7787                                         N120.getOperand(0), N120.getOperand(1),
   7788                                         N0);
   7789         }
   7790       }
   7791 
   7792       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
   7793       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
   7794       // FIXME: This turns two single-precision and one double-precision
   7795       // operation into two double-precision operations, which might not be
   7796       // interesting for all targets, especially GPUs.
   7797       if (N1.getOpcode() == ISD::FP_EXTEND) {
   7798         SDValue N10 = N1.getOperand(0);
   7799         if (N10.getOpcode() == PreferredFusedOpcode) {
   7800           SDValue N102 = N10.getOperand(2);
   7801           if (N102.getOpcode() == ISD::FMUL)
   7802             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
   7803                                         N102.getOperand(0), N102.getOperand(1),
   7804                                         N0);
   7805         }
   7806       }
   7807     }
   7808   }
   7809 
   7810   return SDValue();
   7811 }
   7812 
   7813 /// Try to perform FMA combining on a given FSUB node.
   7814 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   7815   SDValue N0 = N->getOperand(0);
   7816   SDValue N1 = N->getOperand(1);
   7817   EVT VT = N->getValueType(0);
   7818   SDLoc SL(N);
   7819 
   7820   const TargetOptions &Options = DAG.getTarget().Options;
   7821   bool AllowFusion =
   7822       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   7823 
   7824   // Floating-point multiply-add with intermediate rounding.
   7825   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   7826 
   7827   // Floating-point multiply-add without intermediate rounding.
   7828   bool HasFMA =
   7829       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   7830       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   7831 
   7832   // No valid opcode, do not combine.
   7833   if (!HasFMAD && !HasFMA)
   7834     return SDValue();
   7835 
   7836   // Always prefer FMAD to FMA for precision.
   7837   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   7838   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   7839   bool LookThroughFPExt = TLI.isFPExtFree(VT);
   7840 
   7841   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
   7842   if (N0.getOpcode() == ISD::FMUL &&
   7843       (Aggressive || N0->hasOneUse())) {
   7844     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7845                        N0.getOperand(0), N0.getOperand(1),
   7846                        DAG.getNode(ISD::FNEG, SL, VT, N1));
   7847   }
   7848 
   7849   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   7850   // Note: Commutes FSUB operands.
   7851   if (N1.getOpcode() == ISD::FMUL &&
   7852       (Aggressive || N1->hasOneUse()))
   7853     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7854                        DAG.getNode(ISD::FNEG, SL, VT,
   7855                                    N1.getOperand(0)),
   7856                        N1.getOperand(1), N0);
   7857 
   7858   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   7859   if (N0.getOpcode() == ISD::FNEG &&
   7860       N0.getOperand(0).getOpcode() == ISD::FMUL &&
   7861       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
   7862     SDValue N00 = N0.getOperand(0).getOperand(0);
   7863     SDValue N01 = N0.getOperand(0).getOperand(1);
   7864     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7865                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
   7866                        DAG.getNode(ISD::FNEG, SL, VT, N1));
   7867   }
   7868 
   7869   // Look through FP_EXTEND nodes to do more combining.
   7870   if (AllowFusion && LookThroughFPExt) {
   7871     // fold (fsub (fpext (fmul x, y)), z)
   7872     //   -> (fma (fpext x), (fpext y), (fneg z))
   7873     if (N0.getOpcode() == ISD::FP_EXTEND) {
   7874       SDValue N00 = N0.getOperand(0);
   7875       if (N00.getOpcode() == ISD::FMUL)
   7876         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7877                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7878                                        N00.getOperand(0)),
   7879                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7880                                        N00.getOperand(1)),
   7881                            DAG.getNode(ISD::FNEG, SL, VT, N1));
   7882     }
   7883 
   7884     // fold (fsub x, (fpext (fmul y, z)))
   7885     //   -> (fma (fneg (fpext y)), (fpext z), x)
   7886     // Note: Commutes FSUB operands.
   7887     if (N1.getOpcode() == ISD::FP_EXTEND) {
   7888       SDValue N10 = N1.getOperand(0);
   7889       if (N10.getOpcode() == ISD::FMUL)
   7890         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7891                            DAG.getNode(ISD::FNEG, SL, VT,
   7892                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7893                                                    N10.getOperand(0))),
   7894                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7895                                        N10.getOperand(1)),
   7896                            N0);
   7897     }
   7898 
   7899     // fold (fsub (fpext (fneg (fmul, x, y))), z)
   7900     //   -> (fneg (fma (fpext x), (fpext y), z))
   7901     // Note: This could be removed with appropriate canonicalization of the
   7902     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   7903     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   7904     // from implementing the canonicalization in visitFSUB.
   7905     if (N0.getOpcode() == ISD::FP_EXTEND) {
   7906       SDValue N00 = N0.getOperand(0);
   7907       if (N00.getOpcode() == ISD::FNEG) {
   7908         SDValue N000 = N00.getOperand(0);
   7909         if (N000.getOpcode() == ISD::FMUL) {
   7910           return DAG.getNode(ISD::FNEG, SL, VT,
   7911                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   7912                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7913                                                      N000.getOperand(0)),
   7914                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7915                                                      N000.getOperand(1)),
   7916                                          N1));
   7917         }
   7918       }
   7919     }
   7920 
   7921     // fold (fsub (fneg (fpext (fmul, x, y))), z)
   7922     //   -> (fneg (fma (fpext x)), (fpext y), z)
   7923     // Note: This could be removed with appropriate canonicalization of the
   7924     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   7925     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   7926     // from implementing the canonicalization in visitFSUB.
   7927     if (N0.getOpcode() == ISD::FNEG) {
   7928       SDValue N00 = N0.getOperand(0);
   7929       if (N00.getOpcode() == ISD::FP_EXTEND) {
   7930         SDValue N000 = N00.getOperand(0);
   7931         if (N000.getOpcode() == ISD::FMUL) {
   7932           return DAG.getNode(ISD::FNEG, SL, VT,
   7933                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   7934                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7935                                                      N000.getOperand(0)),
   7936                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7937                                                      N000.getOperand(1)),
   7938                                          N1));
   7939         }
   7940       }
   7941     }
   7942 
   7943   }
   7944 
   7945   // More folding opportunities when target permits.
   7946   if ((AllowFusion || HasFMAD) && Aggressive) {
   7947     // fold (fsub (fma x, y, (fmul u, v)), z)
   7948     //   -> (fma x, y (fma u, v, (fneg z)))
   7949     if (N0.getOpcode() == PreferredFusedOpcode &&
   7950         N0.getOperand(2).getOpcode() == ISD::FMUL) {
   7951       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7952                          N0.getOperand(0), N0.getOperand(1),
   7953                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7954                                      N0.getOperand(2).getOperand(0),
   7955                                      N0.getOperand(2).getOperand(1),
   7956                                      DAG.getNode(ISD::FNEG, SL, VT,
   7957                                                  N1)));
   7958     }
   7959 
   7960     // fold (fsub x, (fma y, z, (fmul u, v)))
   7961     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
   7962     if (N1.getOpcode() == PreferredFusedOpcode &&
   7963         N1.getOperand(2).getOpcode() == ISD::FMUL) {
   7964       SDValue N20 = N1.getOperand(2).getOperand(0);
   7965       SDValue N21 = N1.getOperand(2).getOperand(1);
   7966       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7967                          DAG.getNode(ISD::FNEG, SL, VT,
   7968                                      N1.getOperand(0)),
   7969                          N1.getOperand(1),
   7970                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7971                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
   7972 
   7973                                      N21, N0));
   7974     }
   7975 
   7976     if (AllowFusion && LookThroughFPExt) {
   7977       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
   7978       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
   7979       if (N0.getOpcode() == PreferredFusedOpcode) {
   7980         SDValue N02 = N0.getOperand(2);
   7981         if (N02.getOpcode() == ISD::FP_EXTEND) {
   7982           SDValue N020 = N02.getOperand(0);
   7983           if (N020.getOpcode() == ISD::FMUL)
   7984             return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7985                                N0.getOperand(0), N0.getOperand(1),
   7986                                DAG.getNode(PreferredFusedOpcode, SL, VT,
   7987                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7988                                                        N020.getOperand(0)),
   7989                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7990                                                        N020.getOperand(1)),
   7991                                            DAG.getNode(ISD::FNEG, SL, VT,
   7992                                                        N1)));
   7993         }
   7994       }
   7995 
   7996       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
   7997       //   -> (fma (fpext x), (fpext y),
   7998       //           (fma (fpext u), (fpext v), (fneg z)))
   7999       // FIXME: This turns two single-precision and one double-precision
   8000       // operation into two double-precision operations, which might not be
   8001       // interesting for all targets, especially GPUs.
   8002       if (N0.getOpcode() == ISD::FP_EXTEND) {
   8003         SDValue N00 = N0.getOperand(0);
   8004         if (N00.getOpcode() == PreferredFusedOpcode) {
   8005           SDValue N002 = N00.getOperand(2);
   8006           if (N002.getOpcode() == ISD::FMUL)
   8007             return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8008                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8009                                            N00.getOperand(0)),
   8010                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8011                                            N00.getOperand(1)),
   8012                                DAG.getNode(PreferredFusedOpcode, SL, VT,
   8013                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8014                                                        N002.getOperand(0)),
   8015                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8016                                                        N002.getOperand(1)),
   8017                                            DAG.getNode(ISD::FNEG, SL, VT,
   8018                                                        N1)));
   8019         }
   8020       }
   8021 
   8022       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
   8023       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
   8024       if (N1.getOpcode() == PreferredFusedOpcode &&
   8025         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
   8026         SDValue N120 = N1.getOperand(2).getOperand(0);
   8027         if (N120.getOpcode() == ISD::FMUL) {
   8028           SDValue N1200 = N120.getOperand(0);
   8029           SDValue N1201 = N120.getOperand(1);
   8030           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8031                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
   8032                              N1.getOperand(1),
   8033                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8034                                          DAG.getNode(ISD::FNEG, SL, VT,
   8035                                              DAG.getNode(ISD::FP_EXTEND, SL,
   8036                                                          VT, N1200)),
   8037                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8038                                                      N1201),
   8039                                          N0));
   8040         }
   8041       }
   8042 
   8043       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
   8044       //   -> (fma (fneg (fpext y)), (fpext z),
   8045       //           (fma (fneg (fpext u)), (fpext v), x))
   8046       // FIXME: This turns two single-precision and one double-precision
   8047       // operation into two double-precision operations, which might not be
   8048       // interesting for all targets, especially GPUs.
   8049       if (N1.getOpcode() == ISD::FP_EXTEND &&
   8050         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
   8051         SDValue N100 = N1.getOperand(0).getOperand(0);
   8052         SDValue N101 = N1.getOperand(0).getOperand(1);
   8053         SDValue N102 = N1.getOperand(0).getOperand(2);
   8054         if (N102.getOpcode() == ISD::FMUL) {
   8055           SDValue N1020 = N102.getOperand(0);
   8056           SDValue N1021 = N102.getOperand(1);
   8057           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8058                              DAG.getNode(ISD::FNEG, SL, VT,
   8059                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8060                                                      N100)),
   8061                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
   8062                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8063                                          DAG.getNode(ISD::FNEG, SL, VT,
   8064                                              DAG.getNode(ISD::FP_EXTEND, SL,
   8065                                                          VT, N1020)),
   8066                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8067                                                      N1021),
   8068                                          N0));
   8069         }
   8070       }
   8071     }
   8072   }
   8073 
   8074   return SDValue();
   8075 }
   8076 
   8077 /// Try to perform FMA combining on a given FMUL node.
   8078 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
   8079   SDValue N0 = N->getOperand(0);
   8080   SDValue N1 = N->getOperand(1);
   8081   EVT VT = N->getValueType(0);
   8082   SDLoc SL(N);
   8083 
   8084   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
   8085 
   8086   const TargetOptions &Options = DAG.getTarget().Options;
   8087   bool AllowFusion =
   8088       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   8089 
   8090   // Floating-point multiply-add with intermediate rounding.
   8091   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   8092 
   8093   // Floating-point multiply-add without intermediate rounding.
   8094   bool HasFMA =
   8095       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   8096       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   8097 
   8098   // No valid opcode, do not combine.
   8099   if (!HasFMAD && !HasFMA)
   8100     return SDValue();
   8101 
   8102   // Always prefer FMAD to FMA for precision.
   8103   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   8104   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   8105 
   8106   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
   8107   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
   8108   auto FuseFADD = [&](SDValue X, SDValue Y) {
   8109     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
   8110       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   8111       if (XC1 && XC1->isExactlyValue(+1.0))
   8112         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
   8113       if (XC1 && XC1->isExactlyValue(-1.0))
   8114         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   8115                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8116     }
   8117     return SDValue();
   8118   };
   8119 
   8120   if (SDValue FMA = FuseFADD(N0, N1))
   8121     return FMA;
   8122   if (SDValue FMA = FuseFADD(N1, N0))
   8123     return FMA;
   8124 
   8125   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
   8126   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
   8127   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
   8128   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
   8129   auto FuseFSUB = [&](SDValue X, SDValue Y) {
   8130     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
   8131       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
   8132       if (XC0 && XC0->isExactlyValue(+1.0))
   8133         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8134                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   8135                            Y);
   8136       if (XC0 && XC0->isExactlyValue(-1.0))
   8137         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8138                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   8139                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8140 
   8141       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   8142       if (XC1 && XC1->isExactlyValue(+1.0))
   8143         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   8144                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8145       if (XC1 && XC1->isExactlyValue(-1.0))
   8146         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
   8147     }
   8148     return SDValue();
   8149   };
   8150 
   8151   if (SDValue FMA = FuseFSUB(N0, N1))
   8152     return FMA;
   8153   if (SDValue FMA = FuseFSUB(N1, N0))
   8154     return FMA;
   8155 
   8156   return SDValue();
   8157 }
   8158 
   8159 SDValue DAGCombiner::visitFADD(SDNode *N) {
   8160   SDValue N0 = N->getOperand(0);
   8161   SDValue N1 = N->getOperand(1);
   8162   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
   8163   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   8164   EVT VT = N->getValueType(0);
   8165   SDLoc DL(N);
   8166   const TargetOptions &Options = DAG.getTarget().Options;
   8167   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8168 
   8169   // fold vector ops
   8170   if (VT.isVector())
   8171     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8172       return FoldedVOp;
   8173 
   8174   // fold (fadd c1, c2) -> c1 + c2
   8175   if (N0CFP && N1CFP)
   8176     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
   8177 
   8178   // canonicalize constant to RHS
   8179   if (N0CFP && !N1CFP)
   8180     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
   8181 
   8182   // fold (fadd A, (fneg B)) -> (fsub A, B)
   8183   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   8184       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
   8185     return DAG.getNode(ISD::FSUB, DL, VT, N0,
   8186                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   8187 
   8188   // fold (fadd (fneg A), B) -> (fsub B, A)
   8189   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   8190       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
   8191     return DAG.getNode(ISD::FSUB, DL, VT, N1,
   8192                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
   8193 
   8194   // If 'unsafe math' is enabled, fold lots of things.
   8195   if (Options.UnsafeFPMath) {
   8196     // No FP constant should be created after legalization as Instruction
   8197     // Selection pass has a hard time dealing with FP constants.
   8198     bool AllowNewConst = (Level < AfterLegalizeDAG);
   8199 
   8200     // fold (fadd A, 0) -> A
   8201     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
   8202       if (N1C->isZero())
   8203         return N0;
   8204 
   8205     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
   8206     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
   8207         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
   8208       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
   8209                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
   8210                                      Flags),
   8211                          Flags);
   8212 
   8213     // If allowed, fold (fadd (fneg x), x) -> 0.0
   8214     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
   8215       return DAG.getConstantFP(0.0, DL, VT);
   8216 
   8217     // If allowed, fold (fadd x, (fneg x)) -> 0.0
   8218     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
   8219       return DAG.getConstantFP(0.0, DL, VT);
   8220 
   8221     // We can fold chains of FADD's of the same value into multiplications.
   8222     // This transform is not safe in general because we are reducing the number
   8223     // of rounding steps.
   8224     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
   8225       if (N0.getOpcode() == ISD::FMUL) {
   8226         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   8227         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
   8228 
   8229         // (fadd (fmul x, c), x) -> (fmul x, c+1)
   8230         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
   8231           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   8232                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   8233           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
   8234         }
   8235 
   8236         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
   8237         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
   8238             N1.getOperand(0) == N1.getOperand(1) &&
   8239             N0.getOperand(0) == N1.getOperand(0)) {
   8240           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   8241                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   8242           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
   8243         }
   8244       }
   8245 
   8246       if (N1.getOpcode() == ISD::FMUL) {
   8247         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   8248         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
   8249 
   8250         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
   8251         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
   8252           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   8253                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   8254           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
   8255         }
   8256 
   8257         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
   8258         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
   8259             N0.getOperand(0) == N0.getOperand(1) &&
   8260             N1.getOperand(0) == N0.getOperand(0)) {
   8261           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   8262                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   8263           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
   8264         }
   8265       }
   8266 
   8267       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
   8268         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   8269         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
   8270         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
   8271             (N0.getOperand(0) == N1)) {
   8272           return DAG.getNode(ISD::FMUL, DL, VT,
   8273                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
   8274         }
   8275       }
   8276 
   8277       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
   8278         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   8279         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
   8280         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
   8281             N1.getOperand(0) == N0) {
   8282           return DAG.getNode(ISD::FMUL, DL, VT,
   8283                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
   8284         }
   8285       }
   8286 
   8287       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
   8288       if (AllowNewConst &&
   8289           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
   8290           N0.getOperand(0) == N0.getOperand(1) &&
   8291           N1.getOperand(0) == N1.getOperand(1) &&
   8292           N0.getOperand(0) == N1.getOperand(0)) {
   8293         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
   8294                            DAG.getConstantFP(4.0, DL, VT), Flags);
   8295       }
   8296     }
   8297   } // enable-unsafe-fp-math
   8298 
   8299   // FADD -> FMA combines:
   8300   if (SDValue Fused = visitFADDForFMACombine(N)) {
   8301     AddToWorklist(Fused.getNode());
   8302     return Fused;
   8303   }
   8304 
   8305   return SDValue();
   8306 }
   8307 
   8308 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   8309   SDValue N0 = N->getOperand(0);
   8310   SDValue N1 = N->getOperand(1);
   8311   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   8312   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   8313   EVT VT = N->getValueType(0);
   8314   SDLoc dl(N);
   8315   const TargetOptions &Options = DAG.getTarget().Options;
   8316   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8317 
   8318   // fold vector ops
   8319   if (VT.isVector())
   8320     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8321       return FoldedVOp;
   8322 
   8323   // fold (fsub c1, c2) -> c1-c2
   8324   if (N0CFP && N1CFP)
   8325     return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
   8326 
   8327   // fold (fsub A, (fneg B)) -> (fadd A, B)
   8328   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   8329     return DAG.getNode(ISD::FADD, dl, VT, N0,
   8330                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   8331 
   8332   // If 'unsafe math' is enabled, fold lots of things.
   8333   if (Options.UnsafeFPMath) {
   8334     // (fsub A, 0) -> A
   8335     if (N1CFP && N1CFP->isZero())
   8336       return N0;
   8337 
   8338     // (fsub 0, B) -> -B
   8339     if (N0CFP && N0CFP->isZero()) {
   8340       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   8341         return GetNegatedExpression(N1, DAG, LegalOperations);
   8342       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8343         return DAG.getNode(ISD::FNEG, dl, VT, N1);
   8344     }
   8345 
   8346     // (fsub x, x) -> 0.0
   8347     if (N0 == N1)
   8348       return DAG.getConstantFP(0.0f, dl, VT);
   8349 
   8350     // (fsub x, (fadd x, y)) -> (fneg y)
   8351     // (fsub x, (fadd y, x)) -> (fneg y)
   8352     if (N1.getOpcode() == ISD::FADD) {
   8353       SDValue N10 = N1->getOperand(0);
   8354       SDValue N11 = N1->getOperand(1);
   8355 
   8356       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
   8357         return GetNegatedExpression(N11, DAG, LegalOperations);
   8358 
   8359       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
   8360         return GetNegatedExpression(N10, DAG, LegalOperations);
   8361     }
   8362   }
   8363 
   8364   // FSUB -> FMA combines:
   8365   if (SDValue Fused = visitFSUBForFMACombine(N)) {
   8366     AddToWorklist(Fused.getNode());
   8367     return Fused;
   8368   }
   8369 
   8370   return SDValue();
   8371 }
   8372 
   8373 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   8374   SDValue N0 = N->getOperand(0);
   8375   SDValue N1 = N->getOperand(1);
   8376   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   8377   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   8378   EVT VT = N->getValueType(0);
   8379   SDLoc DL(N);
   8380   const TargetOptions &Options = DAG.getTarget().Options;
   8381   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8382 
   8383   // fold vector ops
   8384   if (VT.isVector()) {
   8385     // This just handles C1 * C2 for vectors. Other vector folds are below.
   8386     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8387       return FoldedVOp;
   8388   }
   8389 
   8390   // fold (fmul c1, c2) -> c1*c2
   8391   if (N0CFP && N1CFP)
   8392     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
   8393 
   8394   // canonicalize constant to RHS
   8395   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   8396      !isConstantFPBuildVectorOrConstantFP(N1))
   8397     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
   8398 
   8399   // fold (fmul A, 1.0) -> A
   8400   if (N1CFP && N1CFP->isExactlyValue(1.0))
   8401     return N0;
   8402 
   8403   if (Options.UnsafeFPMath) {
   8404     // fold (fmul A, 0) -> 0
   8405     if (N1CFP && N1CFP->isZero())
   8406       return N1;
   8407 
   8408     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
   8409     if (N0.getOpcode() == ISD::FMUL) {
   8410       // Fold scalars or any vector constants (not just splats).
   8411       // This fold is done in general by InstCombine, but extra fmul insts
   8412       // may have been generated during lowering.
   8413       SDValue N00 = N0.getOperand(0);
   8414       SDValue N01 = N0.getOperand(1);
   8415       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
   8416       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
   8417       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
   8418 
   8419       // Check 1: Make sure that the first operand of the inner multiply is NOT
   8420       // a constant. Otherwise, we may induce infinite looping.
   8421       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
   8422         // Check 2: Make sure that the second operand of the inner multiply and
   8423         // the second operand of the outer multiply are constants.
   8424         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
   8425             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
   8426           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
   8427           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
   8428         }
   8429       }
   8430     }
   8431 
   8432     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
   8433     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
   8434     // during an early run of DAGCombiner can prevent folding with fmuls
   8435     // inserted during lowering.
   8436     if (N0.getOpcode() == ISD::FADD &&
   8437         (N0.getOperand(0) == N0.getOperand(1)) &&
   8438         N0.hasOneUse()) {
   8439       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
   8440       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
   8441       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
   8442     }
   8443   }
   8444 
   8445   // fold (fmul X, 2.0) -> (fadd X, X)
   8446   if (N1CFP && N1CFP->isExactlyValue(+2.0))
   8447     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
   8448 
   8449   // fold (fmul X, -1.0) -> (fneg X)
   8450   if (N1CFP && N1CFP->isExactlyValue(-1.0))
   8451     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8452       return DAG.getNode(ISD::FNEG, DL, VT, N0);
   8453 
   8454   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   8455   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   8456     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   8457       // Both can be negated for free, check to see if at least one is cheaper
   8458       // negated.
   8459       if (LHSNeg == 2 || RHSNeg == 2)
   8460         return DAG.getNode(ISD::FMUL, DL, VT,
   8461                            GetNegatedExpression(N0, DAG, LegalOperations),
   8462                            GetNegatedExpression(N1, DAG, LegalOperations),
   8463                            Flags);
   8464     }
   8465   }
   8466 
   8467   // FMUL -> FMA combines:
   8468   if (SDValue Fused = visitFMULForFMACombine(N)) {
   8469     AddToWorklist(Fused.getNode());
   8470     return Fused;
   8471   }
   8472 
   8473   return SDValue();
   8474 }
   8475 
   8476 SDValue DAGCombiner::visitFMA(SDNode *N) {
   8477   SDValue N0 = N->getOperand(0);
   8478   SDValue N1 = N->getOperand(1);
   8479   SDValue N2 = N->getOperand(2);
   8480   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8481   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8482   EVT VT = N->getValueType(0);
   8483   SDLoc dl(N);
   8484   const TargetOptions &Options = DAG.getTarget().Options;
   8485 
   8486   // Constant fold FMA.
   8487   if (isa<ConstantFPSDNode>(N0) &&
   8488       isa<ConstantFPSDNode>(N1) &&
   8489       isa<ConstantFPSDNode>(N2)) {
   8490     return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
   8491   }
   8492 
   8493   if (Options.UnsafeFPMath) {
   8494     if (N0CFP && N0CFP->isZero())
   8495       return N2;
   8496     if (N1CFP && N1CFP->isZero())
   8497       return N2;
   8498   }
   8499   // TODO: The FMA node should have flags that propagate to these nodes.
   8500   if (N0CFP && N0CFP->isExactlyValue(1.0))
   8501     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   8502   if (N1CFP && N1CFP->isExactlyValue(1.0))
   8503     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
   8504 
   8505   // Canonicalize (fma c, x, y) -> (fma x, c, y)
   8506   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   8507      !isConstantFPBuildVectorOrConstantFP(N1))
   8508     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
   8509 
   8510   // TODO: FMA nodes should have flags that propagate to the created nodes.
   8511   // For now, create a Flags object for use with all unsafe math transforms.
   8512   SDNodeFlags Flags;
   8513   Flags.setUnsafeAlgebra(true);
   8514 
   8515   if (Options.UnsafeFPMath) {
   8516     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   8517     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
   8518         isConstantFPBuildVectorOrConstantFP(N1) &&
   8519         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
   8520       return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8521                          DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
   8522                                      &Flags), &Flags);
   8523     }
   8524 
   8525     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   8526     if (N0.getOpcode() == ISD::FMUL &&
   8527         isConstantFPBuildVectorOrConstantFP(N1) &&
   8528         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   8529       return DAG.getNode(ISD::FMA, dl, VT,
   8530                          N0.getOperand(0),
   8531                          DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
   8532                                      &Flags),
   8533                          N2);
   8534     }
   8535   }
   8536 
   8537   // (fma x, 1, y) -> (fadd x, y)
   8538   // (fma x, -1, y) -> (fadd (fneg x), y)
   8539   if (N1CFP) {
   8540     if (N1CFP->isExactlyValue(1.0))
   8541       // TODO: The FMA node should have flags that propagate to this node.
   8542       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
   8543 
   8544     if (N1CFP->isExactlyValue(-1.0) &&
   8545         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
   8546       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
   8547       AddToWorklist(RHSNeg.getNode());
   8548       // TODO: The FMA node should have flags that propagate to this node.
   8549       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
   8550     }
   8551   }
   8552 
   8553   if (Options.UnsafeFPMath) {
   8554     // (fma x, c, x) -> (fmul x, (c+1))
   8555     if (N1CFP && N0 == N2) {
   8556     return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8557                          DAG.getNode(ISD::FADD, dl, VT,
   8558                                      N1, DAG.getConstantFP(1.0, dl, VT),
   8559                                      &Flags), &Flags);
   8560     }
   8561 
   8562     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   8563     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
   8564       return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8565                          DAG.getNode(ISD::FADD, dl, VT,
   8566                                      N1, DAG.getConstantFP(-1.0, dl, VT),
   8567                                      &Flags), &Flags);
   8568     }
   8569   }
   8570 
   8571   return SDValue();
   8572 }
   8573 
   8574 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   8575 // reciprocal.
   8576 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
   8577 // Notice that this is not always beneficial. One reason is different target
   8578 // may have different costs for FDIV and FMUL, so sometimes the cost of two
   8579 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
   8580 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
   8581 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   8582   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   8583   const SDNodeFlags *Flags = N->getFlags();
   8584   if (!UnsafeMath && !Flags->hasAllowReciprocal())
   8585     return SDValue();
   8586 
   8587   // Skip if current node is a reciprocal.
   8588   SDValue N0 = N->getOperand(0);
   8589   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8590   if (N0CFP && N0CFP->isExactlyValue(1.0))
   8591     return SDValue();
   8592 
   8593   // Exit early if the target does not want this transform or if there can't
   8594   // possibly be enough uses of the divisor to make the transform worthwhile.
   8595   SDValue N1 = N->getOperand(1);
   8596   unsigned MinUses = TLI.combineRepeatedFPDivisors();
   8597   if (!MinUses || N1->use_size() < MinUses)
   8598     return SDValue();
   8599 
   8600   // Find all FDIV users of the same divisor.
   8601   // Use a set because duplicates may be present in the user list.
   8602   SetVector<SDNode *> Users;
   8603   for (auto *U : N1->uses()) {
   8604     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
   8605       // This division is eligible for optimization only if global unsafe math
   8606       // is enabled or if this division allows reciprocal formation.
   8607       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
   8608         Users.insert(U);
   8609     }
   8610   }
   8611 
   8612   // Now that we have the actual number of divisor uses, make sure it meets
   8613   // the minimum threshold specified by the target.
   8614   if (Users.size() < MinUses)
   8615     return SDValue();
   8616 
   8617   EVT VT = N->getValueType(0);
   8618   SDLoc DL(N);
   8619   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   8620   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
   8621 
   8622   // Dividend / Divisor -> Dividend * Reciprocal
   8623   for (auto *U : Users) {
   8624     SDValue Dividend = U->getOperand(0);
   8625     if (Dividend != FPOne) {
   8626       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
   8627                                     Reciprocal, Flags);
   8628       CombineTo(U, NewNode);
   8629     } else if (U != Reciprocal.getNode()) {
   8630       // In the absence of fast-math-flags, this user node is always the
   8631       // same node as Reciprocal, but with FMF they may be different nodes.
   8632       CombineTo(U, Reciprocal);
   8633     }
   8634   }
   8635   return SDValue(N, 0);  // N was replaced.
   8636 }
   8637 
   8638 SDValue DAGCombiner::visitFDIV(SDNode *N) {
   8639   SDValue N0 = N->getOperand(0);
   8640   SDValue N1 = N->getOperand(1);
   8641   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8642   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8643   EVT VT = N->getValueType(0);
   8644   SDLoc DL(N);
   8645   const TargetOptions &Options = DAG.getTarget().Options;
   8646   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8647 
   8648   // fold vector ops
   8649   if (VT.isVector())
   8650     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8651       return FoldedVOp;
   8652 
   8653   // fold (fdiv c1, c2) -> c1/c2
   8654   if (N0CFP && N1CFP)
   8655     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
   8656 
   8657   if (Options.UnsafeFPMath) {
   8658     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
   8659     if (N1CFP) {
   8660       // Compute the reciprocal 1.0 / c2.
   8661       APFloat N1APF = N1CFP->getValueAPF();
   8662       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
   8663       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
   8664       // Only do the transform if the reciprocal is a legal fp immediate that
   8665       // isn't too nasty (eg NaN, denormal, ...).
   8666       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
   8667           (!LegalOperations ||
   8668            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
   8669            // backend)... we should handle this gracefully after Legalize.
   8670            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
   8671            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
   8672            TLI.isFPImmLegal(Recip, VT)))
   8673         return DAG.getNode(ISD::FMUL, DL, VT, N0,
   8674                            DAG.getConstantFP(Recip, DL, VT), Flags);
   8675     }
   8676 
   8677     // If this FDIV is part of a reciprocal square root, it may be folded
   8678     // into a target-specific square root estimate instruction.
   8679     if (N1.getOpcode() == ISD::FSQRT) {
   8680       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0), Flags)) {
   8681         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8682       }
   8683     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
   8684                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8685       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   8686                                           Flags)) {
   8687         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
   8688         AddToWorklist(RV.getNode());
   8689         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8690       }
   8691     } else if (N1.getOpcode() == ISD::FP_ROUND &&
   8692                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8693       if (SDValue RV = BuildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   8694                                           Flags)) {
   8695         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
   8696         AddToWorklist(RV.getNode());
   8697         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8698       }
   8699     } else if (N1.getOpcode() == ISD::FMUL) {
   8700       // Look through an FMUL. Even though this won't remove the FDIV directly,
   8701       // it's still worthwhile to get rid of the FSQRT if possible.
   8702       SDValue SqrtOp;
   8703       SDValue OtherOp;
   8704       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8705         SqrtOp = N1.getOperand(0);
   8706         OtherOp = N1.getOperand(1);
   8707       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
   8708         SqrtOp = N1.getOperand(1);
   8709         OtherOp = N1.getOperand(0);
   8710       }
   8711       if (SqrtOp.getNode()) {
   8712         // We found a FSQRT, so try to make this fold:
   8713         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
   8714         if (SDValue RV = BuildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
   8715           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
   8716           AddToWorklist(RV.getNode());
   8717           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8718         }
   8719       }
   8720     }
   8721 
   8722     // Fold into a reciprocal estimate and multiply instead of a real divide.
   8723     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
   8724       AddToWorklist(RV.getNode());
   8725       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8726     }
   8727   }
   8728 
   8729   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
   8730   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   8731     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   8732       // Both can be negated for free, check to see if at least one is cheaper
   8733       // negated.
   8734       if (LHSNeg == 2 || RHSNeg == 2)
   8735         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
   8736                            GetNegatedExpression(N0, DAG, LegalOperations),
   8737                            GetNegatedExpression(N1, DAG, LegalOperations),
   8738                            Flags);
   8739     }
   8740   }
   8741 
   8742   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
   8743     return CombineRepeatedDivisors;
   8744 
   8745   return SDValue();
   8746 }
   8747 
   8748 SDValue DAGCombiner::visitFREM(SDNode *N) {
   8749   SDValue N0 = N->getOperand(0);
   8750   SDValue N1 = N->getOperand(1);
   8751   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8752   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8753   EVT VT = N->getValueType(0);
   8754 
   8755   // fold (frem c1, c2) -> fmod(c1,c2)
   8756   if (N0CFP && N1CFP)
   8757     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
   8758                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
   8759 
   8760   return SDValue();
   8761 }
   8762 
   8763 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   8764   if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
   8765     return SDValue();
   8766 
   8767   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
   8768   // For now, create a Flags object for use with all unsafe math transforms.
   8769   SDNodeFlags Flags;
   8770   Flags.setUnsafeAlgebra(true);
   8771 
   8772   // Compute this as X * (1/sqrt(X)) = X * (X ** -0.5)
   8773   SDValue RV = BuildRsqrtEstimate(N->getOperand(0), &Flags);
   8774   if (!RV)
   8775     return SDValue();
   8776 
   8777   EVT VT = RV.getValueType();
   8778   SDLoc DL(N);
   8779   RV = DAG.getNode(ISD::FMUL, DL, VT, N->getOperand(0), RV, &Flags);
   8780   AddToWorklist(RV.getNode());
   8781 
   8782   // Unfortunately, RV is now NaN if the input was exactly 0.
   8783   // Select out this case and force the answer to 0.
   8784   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
   8785   EVT CCVT = getSetCCResultType(VT);
   8786   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, N->getOperand(0), Zero, ISD::SETEQ);
   8787   AddToWorklist(ZeroCmp.getNode());
   8788   AddToWorklist(RV.getNode());
   8789 
   8790   return DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT,
   8791                      ZeroCmp, Zero, RV);
   8792 }
   8793 
   8794 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
   8795   // copysign(x, fp_extend(y)) -> copysign(x, y)
   8796   // copysign(x, fp_round(y)) -> copysign(x, y)
   8797   // Do not optimize out type conversion of f128 type yet.
   8798   // For some target like x86_64, configuration is changed
   8799   // to keep one f128 value in one SSE register, but
   8800   // instruction selection cannot handle FCOPYSIGN on
   8801   // SSE registers yet.
   8802   SDValue N1 = N->getOperand(1);
   8803   EVT N1VT = N1->getValueType(0);
   8804   EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
   8805   return (N1.getOpcode() == ISD::FP_EXTEND ||
   8806           N1.getOpcode() == ISD::FP_ROUND) &&
   8807          (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
   8808 }
   8809 
   8810 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   8811   SDValue N0 = N->getOperand(0);
   8812   SDValue N1 = N->getOperand(1);
   8813   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8814   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8815   EVT VT = N->getValueType(0);
   8816 
   8817   if (N0CFP && N1CFP)  // Constant fold
   8818     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
   8819 
   8820   if (N1CFP) {
   8821     const APFloat& V = N1CFP->getValueAPF();
   8822     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
   8823     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
   8824     if (!V.isNegative()) {
   8825       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
   8826         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   8827     } else {
   8828       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8829         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   8830                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
   8831     }
   8832   }
   8833 
   8834   // copysign(fabs(x), y) -> copysign(x, y)
   8835   // copysign(fneg(x), y) -> copysign(x, y)
   8836   // copysign(copysign(x,z), y) -> copysign(x, y)
   8837   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
   8838       N0.getOpcode() == ISD::FCOPYSIGN)
   8839     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8840                        N0.getOperand(0), N1);
   8841 
   8842   // copysign(x, abs(y)) -> abs(x)
   8843   if (N1.getOpcode() == ISD::FABS)
   8844     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   8845 
   8846   // copysign(x, copysign(y,z)) -> copysign(x, z)
   8847   if (N1.getOpcode() == ISD::FCOPYSIGN)
   8848     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8849                        N0, N1.getOperand(1));
   8850 
   8851   // copysign(x, fp_extend(y)) -> copysign(x, y)
   8852   // copysign(x, fp_round(y)) -> copysign(x, y)
   8853   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
   8854     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8855                        N0, N1.getOperand(0));
   8856 
   8857   return SDValue();
   8858 }
   8859 
   8860 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   8861   SDValue N0 = N->getOperand(0);
   8862   EVT VT = N->getValueType(0);
   8863   EVT OpVT = N0.getValueType();
   8864 
   8865   // fold (sint_to_fp c1) -> c1fp
   8866   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   8867       // ...but only if the target supports immediate floating-point values
   8868       (!LegalOperations ||
   8869        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   8870     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   8871 
   8872   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   8873   // but UINT_TO_FP is legal on this target, try to convert.
   8874   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
   8875       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
   8876     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
   8877     if (DAG.SignBitIsZero(N0))
   8878       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   8879   }
   8880 
   8881   // The next optimizations are desirable only if SELECT_CC can be lowered.
   8882   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   8883     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   8884     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
   8885         !VT.isVector() &&
   8886         (!LegalOperations ||
   8887          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   8888       SDLoc DL(N);
   8889       SDValue Ops[] =
   8890         { N0.getOperand(0), N0.getOperand(1),
   8891           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   8892           N0.getOperand(2) };
   8893       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   8894     }
   8895 
   8896     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
   8897     //      (select_cc x, y, 1.0, 0.0,, cc)
   8898     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
   8899         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
   8900         (!LegalOperations ||
   8901          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   8902       SDLoc DL(N);
   8903       SDValue Ops[] =
   8904         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
   8905           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   8906           N0.getOperand(0).getOperand(2) };
   8907       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   8908     }
   8909   }
   8910 
   8911   return SDValue();
   8912 }
   8913 
   8914 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   8915   SDValue N0 = N->getOperand(0);
   8916   EVT VT = N->getValueType(0);
   8917   EVT OpVT = N0.getValueType();
   8918 
   8919   // fold (uint_to_fp c1) -> c1fp
   8920   if (isConstantIntBuildVectorOrConstantInt(N0) &&
   8921       // ...but only if the target supports immediate floating-point values
   8922       (!LegalOperations ||
   8923        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   8924     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   8925 
   8926   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   8927   // but SINT_TO_FP is legal on this target, try to convert.
   8928   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
   8929       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
   8930     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
   8931     if (DAG.SignBitIsZero(N0))
   8932       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   8933   }
   8934 
   8935   // The next optimizations are desirable only if SELECT_CC can be lowered.
   8936   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   8937     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   8938 
   8939     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
   8940         (!LegalOperations ||
   8941          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   8942       SDLoc DL(N);
   8943       SDValue Ops[] =
   8944         { N0.getOperand(0), N0.getOperand(1),
   8945           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   8946           N0.getOperand(2) };
   8947       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   8948     }
   8949   }
   8950 
   8951   return SDValue();
   8952 }
   8953 
   8954 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
   8955 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
   8956   SDValue N0 = N->getOperand(0);
   8957   EVT VT = N->getValueType(0);
   8958 
   8959   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
   8960     return SDValue();
   8961 
   8962   SDValue Src = N0.getOperand(0);
   8963   EVT SrcVT = Src.getValueType();
   8964   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
   8965   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
   8966 
   8967   // We can safely assume the conversion won't overflow the output range,
   8968   // because (for example) (uint8_t)18293.f is undefined behavior.
   8969 
   8970   // Since we can assume the conversion won't overflow, our decision as to
   8971   // whether the input will fit in the float should depend on the minimum
   8972   // of the input range and output range.
   8973 
   8974   // This means this is also safe for a signed input and unsigned output, since
   8975   // a negative input would lead to undefined behavior.
   8976   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
   8977   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
   8978   unsigned ActualSize = std::min(InputSize, OutputSize);
   8979   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
   8980 
   8981   // We can only fold away the float conversion if the input range can be
   8982   // represented exactly in the float range.
   8983   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
   8984     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
   8985       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
   8986                                                        : ISD::ZERO_EXTEND;
   8987       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
   8988     }
   8989     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
   8990       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
   8991     if (SrcVT == VT)
   8992       return Src;
   8993     return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Src);
   8994   }
   8995   return SDValue();
   8996 }
   8997 
   8998 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   8999   SDValue N0 = N->getOperand(0);
   9000   EVT VT = N->getValueType(0);
   9001 
   9002   // fold (fp_to_sint c1fp) -> c1
   9003   if (isConstantFPBuildVectorOrConstantFP(N0))
   9004     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
   9005 
   9006   return FoldIntToFPToInt(N, DAG);
   9007 }
   9008 
   9009 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   9010   SDValue N0 = N->getOperand(0);
   9011   EVT VT = N->getValueType(0);
   9012 
   9013   // fold (fp_to_uint c1fp) -> c1
   9014   if (isConstantFPBuildVectorOrConstantFP(N0))
   9015     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
   9016 
   9017   return FoldIntToFPToInt(N, DAG);
   9018 }
   9019 
   9020 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   9021   SDValue N0 = N->getOperand(0);
   9022   SDValue N1 = N->getOperand(1);
   9023   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   9024   EVT VT = N->getValueType(0);
   9025 
   9026   // fold (fp_round c1fp) -> c1fp
   9027   if (N0CFP)
   9028     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
   9029 
   9030   // fold (fp_round (fp_extend x)) -> x
   9031   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
   9032     return N0.getOperand(0);
   9033 
   9034   // fold (fp_round (fp_round x)) -> (fp_round x)
   9035   if (N0.getOpcode() == ISD::FP_ROUND) {
   9036     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
   9037     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
   9038 
   9039     // Skip this folding if it results in an fp_round from f80 to f16.
   9040     //
   9041     // f80 to f16 always generates an expensive (and as yet, unimplemented)
   9042     // libcall to __truncxfhf2 instead of selecting native f16 conversion
   9043     // instructions from f32 or f64.  Moreover, the first (value-preserving)
   9044     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
   9045     // x86.
   9046     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
   9047       return SDValue();
   9048 
   9049     // If the first fp_round isn't a value preserving truncation, it might
   9050     // introduce a tie in the second fp_round, that wouldn't occur in the
   9051     // single-step fp_round we want to fold to.
   9052     // In other words, double rounding isn't the same as rounding.
   9053     // Also, this is a value preserving truncation iff both fp_round's are.
   9054     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
   9055       SDLoc DL(N);
   9056       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
   9057                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
   9058     }
   9059   }
   9060 
   9061   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
   9062   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
   9063     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
   9064                               N0.getOperand(0), N1);
   9065     AddToWorklist(Tmp.getNode());
   9066     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   9067                        Tmp, N0.getOperand(1));
   9068   }
   9069 
   9070   return SDValue();
   9071 }
   9072 
   9073 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   9074   SDValue N0 = N->getOperand(0);
   9075   EVT VT = N->getValueType(0);
   9076   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   9077   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   9078 
   9079   // fold (fp_round_inreg c1fp) -> c1fp
   9080   if (N0CFP && isTypeLegal(EVT)) {
   9081     SDLoc DL(N);
   9082     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
   9083     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   9084   }
   9085 
   9086   return SDValue();
   9087 }
   9088 
   9089 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   9090   SDValue N0 = N->getOperand(0);
   9091   EVT VT = N->getValueType(0);
   9092 
   9093   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   9094   if (N->hasOneUse() &&
   9095       N->use_begin()->getOpcode() == ISD::FP_ROUND)
   9096     return SDValue();
   9097 
   9098   // fold (fp_extend c1fp) -> c1fp
   9099   if (isConstantFPBuildVectorOrConstantFP(N0))
   9100     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
   9101 
   9102   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
   9103   if (N0.getOpcode() == ISD::FP16_TO_FP &&
   9104       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
   9105     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
   9106 
   9107   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   9108   // value of X.
   9109   if (N0.getOpcode() == ISD::FP_ROUND
   9110       && N0.getNode()->getConstantOperandVal(1) == 1) {
   9111     SDValue In = N0.getOperand(0);
   9112     if (In.getValueType() == VT) return In;
   9113     if (VT.bitsLT(In.getValueType()))
   9114       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
   9115                          In, N0.getOperand(1));
   9116     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
   9117   }
   9118 
   9119   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
   9120   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   9121        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   9122     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9123     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   9124                                      LN0->getChain(),
   9125                                      LN0->getBasePtr(), N0.getValueType(),
   9126                                      LN0->getMemOperand());
   9127     CombineTo(N, ExtLoad);
   9128     CombineTo(N0.getNode(),
   9129               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
   9130                           N0.getValueType(), ExtLoad,
   9131                           DAG.getIntPtrConstant(1, SDLoc(N0))),
   9132               ExtLoad.getValue(1));
   9133     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9134   }
   9135 
   9136   return SDValue();
   9137 }
   9138 
   9139 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   9140   SDValue N0 = N->getOperand(0);
   9141   EVT VT = N->getValueType(0);
   9142 
   9143   // fold (fceil c1) -> fceil(c1)
   9144   if (isConstantFPBuildVectorOrConstantFP(N0))
   9145     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
   9146 
   9147   return SDValue();
   9148 }
   9149 
   9150 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   9151   SDValue N0 = N->getOperand(0);
   9152   EVT VT = N->getValueType(0);
   9153 
   9154   // fold (ftrunc c1) -> ftrunc(c1)
   9155   if (isConstantFPBuildVectorOrConstantFP(N0))
   9156     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
   9157 
   9158   return SDValue();
   9159 }
   9160 
   9161 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   9162   SDValue N0 = N->getOperand(0);
   9163   EVT VT = N->getValueType(0);
   9164 
   9165   // fold (ffloor c1) -> ffloor(c1)
   9166   if (isConstantFPBuildVectorOrConstantFP(N0))
   9167     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
   9168 
   9169   return SDValue();
   9170 }
   9171 
   9172 // FIXME: FNEG and FABS have a lot in common; refactor.
   9173 SDValue DAGCombiner::visitFNEG(SDNode *N) {
   9174   SDValue N0 = N->getOperand(0);
   9175   EVT VT = N->getValueType(0);
   9176 
   9177   // Constant fold FNEG.
   9178   if (isConstantFPBuildVectorOrConstantFP(N0))
   9179     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
   9180 
   9181   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
   9182                          &DAG.getTarget().Options))
   9183     return GetNegatedExpression(N0, DAG, LegalOperations);
   9184 
   9185   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   9186   // constant pool values.
   9187   if (!TLI.isFNegFree(VT) &&
   9188       N0.getOpcode() == ISD::BITCAST &&
   9189       N0.getNode()->hasOneUse()) {
   9190     SDValue Int = N0.getOperand(0);
   9191     EVT IntVT = Int.getValueType();
   9192     if (IntVT.isInteger() && !IntVT.isVector()) {
   9193       APInt SignMask;
   9194       if (N0.getValueType().isVector()) {
   9195         // For a vector, get a mask such as 0x80... per scalar element
   9196         // and splat it.
   9197         SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
   9198         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   9199       } else {
   9200         // For a scalar, just generate 0x80...
   9201         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
   9202       }
   9203       SDLoc DL0(N0);
   9204       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
   9205                         DAG.getConstant(SignMask, DL0, IntVT));
   9206       AddToWorklist(Int.getNode());
   9207       return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int);
   9208     }
   9209   }
   9210 
   9211   // (fneg (fmul c, x)) -> (fmul -c, x)
   9212   if (N0.getOpcode() == ISD::FMUL &&
   9213       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
   9214     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   9215     if (CFP1) {
   9216       APFloat CVal = CFP1->getValueAPF();
   9217       CVal.changeSign();
   9218       if (Level >= AfterLegalizeDAG &&
   9219           (TLI.isFPImmLegal(CVal, VT) ||
   9220            TLI.isOperationLegal(ISD::ConstantFP, VT)))
   9221         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   9222                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   9223                                        N0.getOperand(1)),
   9224                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
   9225     }
   9226   }
   9227 
   9228   return SDValue();
   9229 }
   9230 
   9231 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   9232   SDValue N0 = N->getOperand(0);
   9233   SDValue N1 = N->getOperand(1);
   9234   EVT VT = N->getValueType(0);
   9235   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   9236   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   9237 
   9238   if (N0CFP && N1CFP) {
   9239     const APFloat &C0 = N0CFP->getValueAPF();
   9240     const APFloat &C1 = N1CFP->getValueAPF();
   9241     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
   9242   }
   9243 
   9244   // Canonicalize to constant on RHS.
   9245   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   9246      !isConstantFPBuildVectorOrConstantFP(N1))
   9247     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
   9248 
   9249   return SDValue();
   9250 }
   9251 
   9252 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   9253   SDValue N0 = N->getOperand(0);
   9254   SDValue N1 = N->getOperand(1);
   9255   EVT VT = N->getValueType(0);
   9256   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   9257   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   9258 
   9259   if (N0CFP && N1CFP) {
   9260     const APFloat &C0 = N0CFP->getValueAPF();
   9261     const APFloat &C1 = N1CFP->getValueAPF();
   9262     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
   9263   }
   9264 
   9265   // Canonicalize to constant on RHS.
   9266   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   9267      !isConstantFPBuildVectorOrConstantFP(N1))
   9268     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
   9269 
   9270   return SDValue();
   9271 }
   9272 
   9273 SDValue DAGCombiner::visitFABS(SDNode *N) {
   9274   SDValue N0 = N->getOperand(0);
   9275   EVT VT = N->getValueType(0);
   9276 
   9277   // fold (fabs c1) -> fabs(c1)
   9278   if (isConstantFPBuildVectorOrConstantFP(N0))
   9279     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   9280 
   9281   // fold (fabs (fabs x)) -> (fabs x)
   9282   if (N0.getOpcode() == ISD::FABS)
   9283     return N->getOperand(0);
   9284 
   9285   // fold (fabs (fneg x)) -> (fabs x)
   9286   // fold (fabs (fcopysign x, y)) -> (fabs x)
   9287   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
   9288     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
   9289 
   9290   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
   9291   // constant pool values.
   9292   if (!TLI.isFAbsFree(VT) &&
   9293       N0.getOpcode() == ISD::BITCAST &&
   9294       N0.getNode()->hasOneUse()) {
   9295     SDValue Int = N0.getOperand(0);
   9296     EVT IntVT = Int.getValueType();
   9297     if (IntVT.isInteger() && !IntVT.isVector()) {
   9298       APInt SignMask;
   9299       if (N0.getValueType().isVector()) {
   9300         // For a vector, get a mask such as 0x7f... per scalar element
   9301         // and splat it.
   9302         SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
   9303         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   9304       } else {
   9305         // For a scalar, just generate 0x7f...
   9306         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
   9307       }
   9308       SDLoc DL(N0);
   9309       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
   9310                         DAG.getConstant(SignMask, DL, IntVT));
   9311       AddToWorklist(Int.getNode());
   9312       return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int);
   9313     }
   9314   }
   9315 
   9316   return SDValue();
   9317 }
   9318 
   9319 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   9320   SDValue Chain = N->getOperand(0);
   9321   SDValue N1 = N->getOperand(1);
   9322   SDValue N2 = N->getOperand(2);
   9323 
   9324   // If N is a constant we could fold this into a fallthrough or unconditional
   9325   // branch. However that doesn't happen very often in normal code, because
   9326   // Instcombine/SimplifyCFG should have handled the available opportunities.
   9327   // If we did this folding here, it would be necessary to update the
   9328   // MachineBasicBlock CFG, which is awkward.
   9329 
   9330   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   9331   // on the target.
   9332   if (N1.getOpcode() == ISD::SETCC &&
   9333       TLI.isOperationLegalOrCustom(ISD::BR_CC,
   9334                                    N1.getOperand(0).getValueType())) {
   9335     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   9336                        Chain, N1.getOperand(2),
   9337                        N1.getOperand(0), N1.getOperand(1), N2);
   9338   }
   9339 
   9340   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
   9341       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
   9342        (N1.getOperand(0).hasOneUse() &&
   9343         N1.getOperand(0).getOpcode() == ISD::SRL))) {
   9344     SDNode *Trunc = nullptr;
   9345     if (N1.getOpcode() == ISD::TRUNCATE) {
   9346       // Look pass the truncate.
   9347       Trunc = N1.getNode();
   9348       N1 = N1.getOperand(0);
   9349     }
   9350 
   9351     // Match this pattern so that we can generate simpler code:
   9352     //
   9353     //   %a = ...
   9354     //   %b = and i32 %a, 2
   9355     //   %c = srl i32 %b, 1
   9356     //   brcond i32 %c ...
   9357     //
   9358     // into
   9359     //
   9360     //   %a = ...
   9361     //   %b = and i32 %a, 2
   9362     //   %c = setcc eq %b, 0
   9363     //   brcond %c ...
   9364     //
   9365     // This applies only when the AND constant value has one bit set and the
   9366     // SRL constant is equal to the log2 of the AND constant. The back-end is
   9367     // smart enough to convert the result into a TEST/JMP sequence.
   9368     SDValue Op0 = N1.getOperand(0);
   9369     SDValue Op1 = N1.getOperand(1);
   9370 
   9371     if (Op0.getOpcode() == ISD::AND &&
   9372         Op1.getOpcode() == ISD::Constant) {
   9373       SDValue AndOp1 = Op0.getOperand(1);
   9374 
   9375       if (AndOp1.getOpcode() == ISD::Constant) {
   9376         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
   9377 
   9378         if (AndConst.isPowerOf2() &&
   9379             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
   9380           SDLoc DL(N);
   9381           SDValue SetCC =
   9382             DAG.getSetCC(DL,
   9383                          getSetCCResultType(Op0.getValueType()),
   9384                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
   9385                          ISD::SETNE);
   9386 
   9387           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
   9388                                           MVT::Other, Chain, SetCC, N2);
   9389           // Don't add the new BRCond into the worklist or else SimplifySelectCC
   9390           // will convert it back to (X & C1) >> C2.
   9391           CombineTo(N, NewBRCond, false);
   9392           // Truncate is dead.
   9393           if (Trunc)
   9394             deleteAndRecombine(Trunc);
   9395           // Replace the uses of SRL with SETCC
   9396           WorklistRemover DeadNodes(*this);
   9397           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   9398           deleteAndRecombine(N1.getNode());
   9399           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9400         }
   9401       }
   9402     }
   9403 
   9404     if (Trunc)
   9405       // Restore N1 if the above transformation doesn't match.
   9406       N1 = N->getOperand(1);
   9407   }
   9408 
   9409   // Transform br(xor(x, y)) -> br(x != y)
   9410   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   9411   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
   9412     SDNode *TheXor = N1.getNode();
   9413     SDValue Op0 = TheXor->getOperand(0);
   9414     SDValue Op1 = TheXor->getOperand(1);
   9415     if (Op0.getOpcode() == Op1.getOpcode()) {
   9416       // Avoid missing important xor optimizations.
   9417       if (SDValue Tmp = visitXOR(TheXor)) {
   9418         if (Tmp.getNode() != TheXor) {
   9419           DEBUG(dbgs() << "\nReplacing.8 ";
   9420                 TheXor->dump(&DAG);
   9421                 dbgs() << "\nWith: ";
   9422                 Tmp.getNode()->dump(&DAG);
   9423                 dbgs() << '\n');
   9424           WorklistRemover DeadNodes(*this);
   9425           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
   9426           deleteAndRecombine(TheXor);
   9427           return DAG.getNode(ISD::BRCOND, SDLoc(N),
   9428                              MVT::Other, Chain, Tmp, N2);
   9429         }
   9430 
   9431         // visitXOR has changed XOR's operands or replaced the XOR completely,
   9432         // bail out.
   9433         return SDValue(N, 0);
   9434       }
   9435     }
   9436 
   9437     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
   9438       bool Equal = false;
   9439       if (isOneConstant(Op0) && Op0.hasOneUse() &&
   9440           Op0.getOpcode() == ISD::XOR) {
   9441         TheXor = Op0.getNode();
   9442         Equal = true;
   9443       }
   9444 
   9445       EVT SetCCVT = N1.getValueType();
   9446       if (LegalTypes)
   9447         SetCCVT = getSetCCResultType(SetCCVT);
   9448       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
   9449                                    SetCCVT,
   9450                                    Op0, Op1,
   9451                                    Equal ? ISD::SETEQ : ISD::SETNE);
   9452       // Replace the uses of XOR with SETCC
   9453       WorklistRemover DeadNodes(*this);
   9454       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   9455       deleteAndRecombine(N1.getNode());
   9456       return DAG.getNode(ISD::BRCOND, SDLoc(N),
   9457                          MVT::Other, Chain, SetCC, N2);
   9458     }
   9459   }
   9460 
   9461   return SDValue();
   9462 }
   9463 
   9464 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
   9465 //
   9466 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   9467   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   9468   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
   9469 
   9470   // If N is a constant we could fold this into a fallthrough or unconditional
   9471   // branch. However that doesn't happen very often in normal code, because
   9472   // Instcombine/SimplifyCFG should have handled the available opportunities.
   9473   // If we did this folding here, it would be necessary to update the
   9474   // MachineBasicBlock CFG, which is awkward.
   9475 
   9476   // Use SimplifySetCC to simplify SETCC's.
   9477   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
   9478                                CondLHS, CondRHS, CC->get(), SDLoc(N),
   9479                                false);
   9480   if (Simp.getNode()) AddToWorklist(Simp.getNode());
   9481 
   9482   // fold to a simpler setcc
   9483   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
   9484     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   9485                        N->getOperand(0), Simp.getOperand(2),
   9486                        Simp.getOperand(0), Simp.getOperand(1),
   9487                        N->getOperand(4));
   9488 
   9489   return SDValue();
   9490 }
   9491 
   9492 /// Return true if 'Use' is a load or a store that uses N as its base pointer
   9493 /// and that N may be folded in the load / store addressing mode.
   9494 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   9495                                     SelectionDAG &DAG,
   9496                                     const TargetLowering &TLI) {
   9497   EVT VT;
   9498   unsigned AS;
   9499 
   9500   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
   9501     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
   9502       return false;
   9503     VT = LD->getMemoryVT();
   9504     AS = LD->getAddressSpace();
   9505   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
   9506     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
   9507       return false;
   9508     VT = ST->getMemoryVT();
   9509     AS = ST->getAddressSpace();
   9510   } else
   9511     return false;
   9512 
   9513   TargetLowering::AddrMode AM;
   9514   if (N->getOpcode() == ISD::ADD) {
   9515     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   9516     if (Offset)
   9517       // [reg +/- imm]
   9518       AM.BaseOffs = Offset->getSExtValue();
   9519     else
   9520       // [reg +/- reg]
   9521       AM.Scale = 1;
   9522   } else if (N->getOpcode() == ISD::SUB) {
   9523     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   9524     if (Offset)
   9525       // [reg +/- imm]
   9526       AM.BaseOffs = -Offset->getSExtValue();
   9527     else
   9528       // [reg +/- reg]
   9529       AM.Scale = 1;
   9530   } else
   9531     return false;
   9532 
   9533   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
   9534                                    VT.getTypeForEVT(*DAG.getContext()), AS);
   9535 }
   9536 
   9537 /// Try turning a load/store into a pre-indexed load/store when the base
   9538 /// pointer is an add or subtract and it has other uses besides the load/store.
   9539 /// After the transformation, the new indexed load/store has effectively folded
   9540 /// the add/subtract in and all of its other uses are redirected to the
   9541 /// new load/store.
   9542 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   9543   if (Level < AfterLegalizeDAG)
   9544     return false;
   9545 
   9546   bool isLoad = true;
   9547   SDValue Ptr;
   9548   EVT VT;
   9549   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   9550     if (LD->isIndexed())
   9551       return false;
   9552     VT = LD->getMemoryVT();
   9553     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
   9554         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
   9555       return false;
   9556     Ptr = LD->getBasePtr();
   9557   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   9558     if (ST->isIndexed())
   9559       return false;
   9560     VT = ST->getMemoryVT();
   9561     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
   9562         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
   9563       return false;
   9564     Ptr = ST->getBasePtr();
   9565     isLoad = false;
   9566   } else {
   9567     return false;
   9568   }
   9569 
   9570   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   9571   // out.  There is no reason to make this a preinc/predec.
   9572   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
   9573       Ptr.getNode()->hasOneUse())
   9574     return false;
   9575 
   9576   // Ask the target to do addressing mode selection.
   9577   SDValue BasePtr;
   9578   SDValue Offset;
   9579   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   9580   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
   9581     return false;
   9582 
   9583   // Backends without true r+i pre-indexed forms may need to pass a
   9584   // constant base with a variable offset so that constant coercion
   9585   // will work with the patterns in canonical form.
   9586   bool Swapped = false;
   9587   if (isa<ConstantSDNode>(BasePtr)) {
   9588     std::swap(BasePtr, Offset);
   9589     Swapped = true;
   9590   }
   9591 
   9592   // Don't create a indexed load / store with zero offset.
   9593   if (isNullConstant(Offset))
   9594     return false;
   9595 
   9596   // Try turning it into a pre-indexed load / store except when:
   9597   // 1) The new base ptr is a frame index.
   9598   // 2) If N is a store and the new base ptr is either the same as or is a
   9599   //    predecessor of the value being stored.
   9600   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
   9601   //    that would create a cycle.
   9602   // 4) All uses are load / store ops that use it as old base ptr.
   9603 
   9604   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
   9605   // (plus the implicit offset) to a register to preinc anyway.
   9606   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   9607     return false;
   9608 
   9609   // Check #2.
   9610   if (!isLoad) {
   9611     SDValue Val = cast<StoreSDNode>(N)->getValue();
   9612     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
   9613       return false;
   9614   }
   9615 
   9616   // If the offset is a constant, there may be other adds of constants that
   9617   // can be folded with this one. We should do this to avoid having to keep
   9618   // a copy of the original base pointer.
   9619   SmallVector<SDNode *, 16> OtherUses;
   9620   if (isa<ConstantSDNode>(Offset))
   9621     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
   9622                               UE = BasePtr.getNode()->use_end();
   9623          UI != UE; ++UI) {
   9624       SDUse &Use = UI.getUse();
   9625       // Skip the use that is Ptr and uses of other results from BasePtr's
   9626       // node (important for nodes that return multiple results).
   9627       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
   9628         continue;
   9629 
   9630       if (Use.getUser()->isPredecessorOf(N))
   9631         continue;
   9632 
   9633       if (Use.getUser()->getOpcode() != ISD::ADD &&
   9634           Use.getUser()->getOpcode() != ISD::SUB) {
   9635         OtherUses.clear();
   9636         break;
   9637       }
   9638 
   9639       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
   9640       if (!isa<ConstantSDNode>(Op1)) {
   9641         OtherUses.clear();
   9642         break;
   9643       }
   9644 
   9645       // FIXME: In some cases, we can be smarter about this.
   9646       if (Op1.getValueType() != Offset.getValueType()) {
   9647         OtherUses.clear();
   9648         break;
   9649       }
   9650 
   9651       OtherUses.push_back(Use.getUser());
   9652     }
   9653 
   9654   if (Swapped)
   9655     std::swap(BasePtr, Offset);
   9656 
   9657   // Now check for #3 and #4.
   9658   bool RealUse = false;
   9659 
   9660   // Caches for hasPredecessorHelper
   9661   SmallPtrSet<const SDNode *, 32> Visited;
   9662   SmallVector<const SDNode *, 16> Worklist;
   9663 
   9664   for (SDNode *Use : Ptr.getNode()->uses()) {
   9665     if (Use == N)
   9666       continue;
   9667     if (N->hasPredecessorHelper(Use, Visited, Worklist))
   9668       return false;
   9669 
   9670     // If Ptr may be folded in addressing mode of other use, then it's
   9671     // not profitable to do this transformation.
   9672     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
   9673       RealUse = true;
   9674   }
   9675 
   9676   if (!RealUse)
   9677     return false;
   9678 
   9679   SDValue Result;
   9680   if (isLoad)
   9681     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   9682                                 BasePtr, Offset, AM);
   9683   else
   9684     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   9685                                  BasePtr, Offset, AM);
   9686   ++PreIndexedNodes;
   9687   ++NodesCombined;
   9688   DEBUG(dbgs() << "\nReplacing.4 ";
   9689         N->dump(&DAG);
   9690         dbgs() << "\nWith: ";
   9691         Result.getNode()->dump(&DAG);
   9692         dbgs() << '\n');
   9693   WorklistRemover DeadNodes(*this);
   9694   if (isLoad) {
   9695     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   9696     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   9697   } else {
   9698     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   9699   }
   9700 
   9701   // Finally, since the node is now dead, remove it from the graph.
   9702   deleteAndRecombine(N);
   9703 
   9704   if (Swapped)
   9705     std::swap(BasePtr, Offset);
   9706 
   9707   // Replace other uses of BasePtr that can be updated to use Ptr
   9708   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
   9709     unsigned OffsetIdx = 1;
   9710     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
   9711       OffsetIdx = 0;
   9712     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
   9713            BasePtr.getNode() && "Expected BasePtr operand");
   9714 
   9715     // We need to replace ptr0 in the following expression:
   9716     //   x0 * offset0 + y0 * ptr0 = t0
   9717     // knowing that
   9718     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
   9719     //
   9720     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
   9721     // indexed load/store and the expresion that needs to be re-written.
   9722     //
   9723     // Therefore, we have:
   9724     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
   9725 
   9726     ConstantSDNode *CN =
   9727       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
   9728     int X0, X1, Y0, Y1;
   9729     APInt Offset0 = CN->getAPIntValue();
   9730     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
   9731 
   9732     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
   9733     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
   9734     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
   9735     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
   9736 
   9737     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
   9738 
   9739     APInt CNV = Offset0;
   9740     if (X0 < 0) CNV = -CNV;
   9741     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
   9742     else CNV = CNV - Offset1;
   9743 
   9744     SDLoc DL(OtherUses[i]);
   9745 
   9746     // We can now generate the new expression.
   9747     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
   9748     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
   9749 
   9750     SDValue NewUse = DAG.getNode(Opcode,
   9751                                  DL,
   9752                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
   9753     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
   9754     deleteAndRecombine(OtherUses[i]);
   9755   }
   9756 
   9757   // Replace the uses of Ptr with uses of the updated base value.
   9758   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   9759   deleteAndRecombine(Ptr.getNode());
   9760 
   9761   return true;
   9762 }
   9763 
   9764 /// Try to combine a load/store with a add/sub of the base pointer node into a
   9765 /// post-indexed load/store. The transformation folded the add/subtract into the
   9766 /// new indexed load/store effectively and all of its uses are redirected to the
   9767 /// new load/store.
   9768 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   9769   if (Level < AfterLegalizeDAG)
   9770     return false;
   9771 
   9772   bool isLoad = true;
   9773   SDValue Ptr;
   9774   EVT VT;
   9775   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   9776     if (LD->isIndexed())
   9777       return false;
   9778     VT = LD->getMemoryVT();
   9779     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
   9780         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
   9781       return false;
   9782     Ptr = LD->getBasePtr();
   9783   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   9784     if (ST->isIndexed())
   9785       return false;
   9786     VT = ST->getMemoryVT();
   9787     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
   9788         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
   9789       return false;
   9790     Ptr = ST->getBasePtr();
   9791     isLoad = false;
   9792   } else {
   9793     return false;
   9794   }
   9795 
   9796   if (Ptr.getNode()->hasOneUse())
   9797     return false;
   9798 
   9799   for (SDNode *Op : Ptr.getNode()->uses()) {
   9800     if (Op == N ||
   9801         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
   9802       continue;
   9803 
   9804     SDValue BasePtr;
   9805     SDValue Offset;
   9806     ISD::MemIndexedMode AM = ISD::UNINDEXED;
   9807     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
   9808       // Don't create a indexed load / store with zero offset.
   9809       if (isNullConstant(Offset))
   9810         continue;
   9811 
   9812       // Try turning it into a post-indexed load / store except when
   9813       // 1) All uses are load / store ops that use it as base ptr (and
   9814       //    it may be folded as addressing mmode).
   9815       // 2) Op must be independent of N, i.e. Op is neither a predecessor
   9816       //    nor a successor of N. Otherwise, if Op is folded that would
   9817       //    create a cycle.
   9818 
   9819       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   9820         continue;
   9821 
   9822       // Check for #1.
   9823       bool TryNext = false;
   9824       for (SDNode *Use : BasePtr.getNode()->uses()) {
   9825         if (Use == Ptr.getNode())
   9826           continue;
   9827 
   9828         // If all the uses are load / store addresses, then don't do the
   9829         // transformation.
   9830         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
   9831           bool RealUse = false;
   9832           for (SDNode *UseUse : Use->uses()) {
   9833             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
   9834               RealUse = true;
   9835           }
   9836 
   9837           if (!RealUse) {
   9838             TryNext = true;
   9839             break;
   9840           }
   9841         }
   9842       }
   9843 
   9844       if (TryNext)
   9845         continue;
   9846 
   9847       // Check for #2
   9848       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
   9849         SDValue Result = isLoad
   9850           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   9851                                BasePtr, Offset, AM)
   9852           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   9853                                 BasePtr, Offset, AM);
   9854         ++PostIndexedNodes;
   9855         ++NodesCombined;
   9856         DEBUG(dbgs() << "\nReplacing.5 ";
   9857               N->dump(&DAG);
   9858               dbgs() << "\nWith: ";
   9859               Result.getNode()->dump(&DAG);
   9860               dbgs() << '\n');
   9861         WorklistRemover DeadNodes(*this);
   9862         if (isLoad) {
   9863           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   9864           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   9865         } else {
   9866           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   9867         }
   9868 
   9869         // Finally, since the node is now dead, remove it from the graph.
   9870         deleteAndRecombine(N);
   9871 
   9872         // Replace the uses of Use with uses of the updated base value.
   9873         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
   9874                                       Result.getValue(isLoad ? 1 : 0));
   9875         deleteAndRecombine(Op);
   9876         return true;
   9877       }
   9878     }
   9879   }
   9880 
   9881   return false;
   9882 }
   9883 
   9884 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
   9885 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
   9886   ISD::MemIndexedMode AM = LD->getAddressingMode();
   9887   assert(AM != ISD::UNINDEXED);
   9888   SDValue BP = LD->getOperand(1);
   9889   SDValue Inc = LD->getOperand(2);
   9890 
   9891   // Some backends use TargetConstants for load offsets, but don't expect
   9892   // TargetConstants in general ADD nodes. We can convert these constants into
   9893   // regular Constants (if the constant is not opaque).
   9894   assert((Inc.getOpcode() != ISD::TargetConstant ||
   9895           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
   9896          "Cannot split out indexing using opaque target constants");
   9897   if (Inc.getOpcode() == ISD::TargetConstant) {
   9898     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
   9899     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
   9900                           ConstInc->getValueType(0));
   9901   }
   9902 
   9903   unsigned Opc =
   9904       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
   9905   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
   9906 }
   9907 
   9908 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   9909   LoadSDNode *LD  = cast<LoadSDNode>(N);
   9910   SDValue Chain = LD->getChain();
   9911   SDValue Ptr   = LD->getBasePtr();
   9912 
   9913   // If load is not volatile and there are no uses of the loaded value (and
   9914   // the updated indexed value in case of indexed loads), change uses of the
   9915   // chain value into uses of the chain input (i.e. delete the dead load).
   9916   if (!LD->isVolatile()) {
   9917     if (N->getValueType(1) == MVT::Other) {
   9918       // Unindexed loads.
   9919       if (!N->hasAnyUseOfValue(0)) {
   9920         // It's not safe to use the two value CombineTo variant here. e.g.
   9921         // v1, chain2 = load chain1, loc
   9922         // v2, chain3 = load chain2, loc
   9923         // v3         = add v2, c
   9924         // Now we replace use of chain2 with chain1.  This makes the second load
   9925         // isomorphic to the one we are deleting, and thus makes this load live.
   9926         DEBUG(dbgs() << "\nReplacing.6 ";
   9927               N->dump(&DAG);
   9928               dbgs() << "\nWith chain: ";
   9929               Chain.getNode()->dump(&DAG);
   9930               dbgs() << "\n");
   9931         WorklistRemover DeadNodes(*this);
   9932         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   9933 
   9934         if (N->use_empty())
   9935           deleteAndRecombine(N);
   9936 
   9937         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9938       }
   9939     } else {
   9940       // Indexed loads.
   9941       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
   9942 
   9943       // If this load has an opaque TargetConstant offset, then we cannot split
   9944       // the indexing into an add/sub directly (that TargetConstant may not be
   9945       // valid for a different type of node, and we cannot convert an opaque
   9946       // target constant into a regular constant).
   9947       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
   9948                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
   9949 
   9950       if (!N->hasAnyUseOfValue(0) &&
   9951           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
   9952         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
   9953         SDValue Index;
   9954         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
   9955           Index = SplitIndexingFromLoad(LD);
   9956           // Try to fold the base pointer arithmetic into subsequent loads and
   9957           // stores.
   9958           AddUsersToWorklist(N);
   9959         } else
   9960           Index = DAG.getUNDEF(N->getValueType(1));
   9961         DEBUG(dbgs() << "\nReplacing.7 ";
   9962               N->dump(&DAG);
   9963               dbgs() << "\nWith: ";
   9964               Undef.getNode()->dump(&DAG);
   9965               dbgs() << " and 2 other values\n");
   9966         WorklistRemover DeadNodes(*this);
   9967         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
   9968         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
   9969         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
   9970         deleteAndRecombine(N);
   9971         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9972       }
   9973     }
   9974   }
   9975 
   9976   // If this load is directly stored, replace the load value with the stored
   9977   // value.
   9978   // TODO: Handle store large -> read small portion.
   9979   // TODO: Handle TRUNCSTORE/LOADEXT
   9980   if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
   9981     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
   9982       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
   9983       if (PrevST->getBasePtr() == Ptr &&
   9984           PrevST->getValue().getValueType() == N->getValueType(0))
   9985       return CombineTo(N, Chain.getOperand(1), Chain);
   9986     }
   9987   }
   9988 
   9989   // Try to infer better alignment information than the load already has.
   9990   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
   9991     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   9992       if (Align > LD->getMemOperand()->getBaseAlignment()) {
   9993         SDValue NewLoad =
   9994                DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
   9995                               LD->getValueType(0),
   9996                               Chain, Ptr, LD->getPointerInfo(),
   9997                               LD->getMemoryVT(),
   9998                               LD->isVolatile(), LD->isNonTemporal(),
   9999                               LD->isInvariant(), Align, LD->getAAInfo());
   10000         if (NewLoad.getNode() != N)
   10001           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
   10002       }
   10003     }
   10004   }
   10005 
   10006   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   10007                                                   : DAG.getSubtarget().useAA();
   10008 #ifndef NDEBUG
   10009   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   10010       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   10011     UseAA = false;
   10012 #endif
   10013   if (UseAA && LD->isUnindexed()) {
   10014     // Walk up chain skipping non-aliasing memory nodes.
   10015     SDValue BetterChain = FindBetterChain(N, Chain);
   10016 
   10017     // If there is a better chain.
   10018     if (Chain != BetterChain) {
   10019       SDValue ReplLoad;
   10020 
   10021       // Replace the chain to void dependency.
   10022       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
   10023         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
   10024                                BetterChain, Ptr, LD->getMemOperand());
   10025       } else {
   10026         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
   10027                                   LD->getValueType(0),
   10028                                   BetterChain, Ptr, LD->getMemoryVT(),
   10029                                   LD->getMemOperand());
   10030       }
   10031 
   10032       // Create token factor to keep old chain connected.
   10033       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   10034                                   MVT::Other, Chain, ReplLoad.getValue(1));
   10035 
   10036       // Make sure the new and old chains are cleaned up.
   10037       AddToWorklist(Token.getNode());
   10038 
   10039       // Replace uses with load result and token factor. Don't add users
   10040       // to work list.
   10041       return CombineTo(N, ReplLoad.getValue(0), Token, false);
   10042     }
   10043   }
   10044 
   10045   // Try transforming N to an indexed load.
   10046   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   10047     return SDValue(N, 0);
   10048 
   10049   // Try to slice up N to more direct loads if the slices are mapped to
   10050   // different register banks or pairing can take place.
   10051   if (SliceUpLoad(N))
   10052     return SDValue(N, 0);
   10053 
   10054   return SDValue();
   10055 }
   10056 
   10057 namespace {
   10058 /// \brief Helper structure used to slice a load in smaller loads.
   10059 /// Basically a slice is obtained from the following sequence:
   10060 /// Origin = load Ty1, Base
   10061 /// Shift = srl Ty1 Origin, CstTy Amount
   10062 /// Inst = trunc Shift to Ty2
   10063 ///
   10064 /// Then, it will be rewriten into:
   10065 /// Slice = load SliceTy, Base + SliceOffset
   10066 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
   10067 ///
   10068 /// SliceTy is deduced from the number of bits that are actually used to
   10069 /// build Inst.
   10070 struct LoadedSlice {
   10071   /// \brief Helper structure used to compute the cost of a slice.
   10072   struct Cost {
   10073     /// Are we optimizing for code size.
   10074     bool ForCodeSize;
   10075     /// Various cost.
   10076     unsigned Loads;
   10077     unsigned Truncates;
   10078     unsigned CrossRegisterBanksCopies;
   10079     unsigned ZExts;
   10080     unsigned Shift;
   10081 
   10082     Cost(bool ForCodeSize = false)
   10083         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
   10084           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
   10085 
   10086     /// \brief Get the cost of one isolated slice.
   10087     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
   10088         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
   10089           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
   10090       EVT TruncType = LS.Inst->getValueType(0);
   10091       EVT LoadedType = LS.getLoadedType();
   10092       if (TruncType != LoadedType &&
   10093           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
   10094         ZExts = 1;
   10095     }
   10096 
   10097     /// \brief Account for slicing gain in the current cost.
   10098     /// Slicing provide a few gains like removing a shift or a
   10099     /// truncate. This method allows to grow the cost of the original
   10100     /// load with the gain from this slice.
   10101     void addSliceGain(const LoadedSlice &LS) {
   10102       // Each slice saves a truncate.
   10103       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
   10104       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
   10105                               LS.Inst->getValueType(0)))
   10106         ++Truncates;
   10107       // If there is a shift amount, this slice gets rid of it.
   10108       if (LS.Shift)
   10109         ++Shift;
   10110       // If this slice can merge a cross register bank copy, account for it.
   10111       if (LS.canMergeExpensiveCrossRegisterBankCopy())
   10112         ++CrossRegisterBanksCopies;
   10113     }
   10114 
   10115     Cost &operator+=(const Cost &RHS) {
   10116       Loads += RHS.Loads;
   10117       Truncates += RHS.Truncates;
   10118       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
   10119       ZExts += RHS.ZExts;
   10120       Shift += RHS.Shift;
   10121       return *this;
   10122     }
   10123 
   10124     bool operator==(const Cost &RHS) const {
   10125       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
   10126              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
   10127              ZExts == RHS.ZExts && Shift == RHS.Shift;
   10128     }
   10129 
   10130     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
   10131 
   10132     bool operator<(const Cost &RHS) const {
   10133       // Assume cross register banks copies are as expensive as loads.
   10134       // FIXME: Do we want some more target hooks?
   10135       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
   10136       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
   10137       // Unless we are optimizing for code size, consider the
   10138       // expensive operation first.
   10139       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
   10140         return ExpensiveOpsLHS < ExpensiveOpsRHS;
   10141       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
   10142              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
   10143     }
   10144 
   10145     bool operator>(const Cost &RHS) const { return RHS < *this; }
   10146 
   10147     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
   10148 
   10149     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   10150   };
   10151   // The last instruction that represent the slice. This should be a
   10152   // truncate instruction.
   10153   SDNode *Inst;
   10154   // The original load instruction.
   10155   LoadSDNode *Origin;
   10156   // The right shift amount in bits from the original load.
   10157   unsigned Shift;
   10158   // The DAG from which Origin came from.
   10159   // This is used to get some contextual information about legal types, etc.
   10160   SelectionDAG *DAG;
   10161 
   10162   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
   10163               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
   10164       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
   10165 
   10166   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
   10167   /// \return Result is \p BitWidth and has used bits set to 1 and
   10168   ///         not used bits set to 0.
   10169   APInt getUsedBits() const {
   10170     // Reproduce the trunc(lshr) sequence:
   10171     // - Start from the truncated value.
   10172     // - Zero extend to the desired bit width.
   10173     // - Shift left.
   10174     assert(Origin && "No original load to compare against.");
   10175     unsigned BitWidth = Origin->getValueSizeInBits(0);
   10176     assert(Inst && "This slice is not bound to an instruction");
   10177     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
   10178            "Extracted slice is bigger than the whole type!");
   10179     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
   10180     UsedBits.setAllBits();
   10181     UsedBits = UsedBits.zext(BitWidth);
   10182     UsedBits <<= Shift;
   10183     return UsedBits;
   10184   }
   10185 
   10186   /// \brief Get the size of the slice to be loaded in bytes.
   10187   unsigned getLoadedSize() const {
   10188     unsigned SliceSize = getUsedBits().countPopulation();
   10189     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
   10190     return SliceSize / 8;
   10191   }
   10192 
   10193   /// \brief Get the type that will be loaded for this slice.
   10194   /// Note: This may not be the final type for the slice.
   10195   EVT getLoadedType() const {
   10196     assert(DAG && "Missing context");
   10197     LLVMContext &Ctxt = *DAG->getContext();
   10198     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
   10199   }
   10200 
   10201   /// \brief Get the alignment of the load used for this slice.
   10202   unsigned getAlignment() const {
   10203     unsigned Alignment = Origin->getAlignment();
   10204     unsigned Offset = getOffsetFromBase();
   10205     if (Offset != 0)
   10206       Alignment = MinAlign(Alignment, Alignment + Offset);
   10207     return Alignment;
   10208   }
   10209 
   10210   /// \brief Check if this slice can be rewritten with legal operations.
   10211   bool isLegal() const {
   10212     // An invalid slice is not legal.
   10213     if (!Origin || !Inst || !DAG)
   10214       return false;
   10215 
   10216     // Offsets are for indexed load only, we do not handle that.
   10217     if (Origin->getOffset().getOpcode() != ISD::UNDEF)
   10218       return false;
   10219 
   10220     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   10221 
   10222     // Check that the type is legal.
   10223     EVT SliceType = getLoadedType();
   10224     if (!TLI.isTypeLegal(SliceType))
   10225       return false;
   10226 
   10227     // Check that the load is legal for this type.
   10228     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
   10229       return false;
   10230 
   10231     // Check that the offset can be computed.
   10232     // 1. Check its type.
   10233     EVT PtrType = Origin->getBasePtr().getValueType();
   10234     if (PtrType == MVT::Untyped || PtrType.isExtended())
   10235       return false;
   10236 
   10237     // 2. Check that it fits in the immediate.
   10238     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
   10239       return false;
   10240 
   10241     // 3. Check that the computation is legal.
   10242     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
   10243       return false;
   10244 
   10245     // Check that the zext is legal if it needs one.
   10246     EVT TruncateType = Inst->getValueType(0);
   10247     if (TruncateType != SliceType &&
   10248         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
   10249       return false;
   10250 
   10251     return true;
   10252   }
   10253 
   10254   /// \brief Get the offset in bytes of this slice in the original chunk of
   10255   /// bits.
   10256   /// \pre DAG != nullptr.
   10257   uint64_t getOffsetFromBase() const {
   10258     assert(DAG && "Missing context.");
   10259     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
   10260     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
   10261     uint64_t Offset = Shift / 8;
   10262     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
   10263     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
   10264            "The size of the original loaded type is not a multiple of a"
   10265            " byte.");
   10266     // If Offset is bigger than TySizeInBytes, it means we are loading all
   10267     // zeros. This should have been optimized before in the process.
   10268     assert(TySizeInBytes > Offset &&
   10269            "Invalid shift amount for given loaded size");
   10270     if (IsBigEndian)
   10271       Offset = TySizeInBytes - Offset - getLoadedSize();
   10272     return Offset;
   10273   }
   10274 
   10275   /// \brief Generate the sequence of instructions to load the slice
   10276   /// represented by this object and redirect the uses of this slice to
   10277   /// this new sequence of instructions.
   10278   /// \pre this->Inst && this->Origin are valid Instructions and this
   10279   /// object passed the legal check: LoadedSlice::isLegal returned true.
   10280   /// \return The last instruction of the sequence used to load the slice.
   10281   SDValue loadSlice() const {
   10282     assert(Inst && Origin && "Unable to replace a non-existing slice.");
   10283     const SDValue &OldBaseAddr = Origin->getBasePtr();
   10284     SDValue BaseAddr = OldBaseAddr;
   10285     // Get the offset in that chunk of bytes w.r.t. the endianess.
   10286     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
   10287     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
   10288     if (Offset) {
   10289       // BaseAddr = BaseAddr + Offset.
   10290       EVT ArithType = BaseAddr.getValueType();
   10291       SDLoc DL(Origin);
   10292       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
   10293                               DAG->getConstant(Offset, DL, ArithType));
   10294     }
   10295 
   10296     // Create the type of the loaded slice according to its size.
   10297     EVT SliceType = getLoadedType();
   10298 
   10299     // Create the load for the slice.
   10300     SDValue LastInst = DAG->getLoad(
   10301         SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
   10302         Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
   10303         Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
   10304     // If the final type is not the same as the loaded type, this means that
   10305     // we have to pad with zero. Create a zero extend for that.
   10306     EVT FinalType = Inst->getValueType(0);
   10307     if (SliceType != FinalType)
   10308       LastInst =
   10309           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
   10310     return LastInst;
   10311   }
   10312 
   10313   /// \brief Check if this slice can be merged with an expensive cross register
   10314   /// bank copy. E.g.,
   10315   /// i = load i32
   10316   /// f = bitcast i32 i to float
   10317   bool canMergeExpensiveCrossRegisterBankCopy() const {
   10318     if (!Inst || !Inst->hasOneUse())
   10319       return false;
   10320     SDNode *Use = *Inst->use_begin();
   10321     if (Use->getOpcode() != ISD::BITCAST)
   10322       return false;
   10323     assert(DAG && "Missing context");
   10324     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   10325     EVT ResVT = Use->getValueType(0);
   10326     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
   10327     const TargetRegisterClass *ArgRC =
   10328         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
   10329     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
   10330       return false;
   10331 
   10332     // At this point, we know that we perform a cross-register-bank copy.
   10333     // Check if it is expensive.
   10334     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
   10335     // Assume bitcasts are cheap, unless both register classes do not
   10336     // explicitly share a common sub class.
   10337     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
   10338       return false;
   10339 
   10340     // Check if it will be merged with the load.
   10341     // 1. Check the alignment constraint.
   10342     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
   10343         ResVT.getTypeForEVT(*DAG->getContext()));
   10344 
   10345     if (RequiredAlignment > getAlignment())
   10346       return false;
   10347 
   10348     // 2. Check that the load is a legal operation for that type.
   10349     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
   10350       return false;
   10351 
   10352     // 3. Check that we do not have a zext in the way.
   10353     if (Inst->getValueType(0) != getLoadedType())
   10354       return false;
   10355 
   10356     return true;
   10357   }
   10358 };
   10359 }
   10360 
   10361 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
   10362 /// \p UsedBits looks like 0..0 1..1 0..0.
   10363 static bool areUsedBitsDense(const APInt &UsedBits) {
   10364   // If all the bits are one, this is dense!
   10365   if (UsedBits.isAllOnesValue())
   10366     return true;
   10367 
   10368   // Get rid of the unused bits on the right.
   10369   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
   10370   // Get rid of the unused bits on the left.
   10371   if (NarrowedUsedBits.countLeadingZeros())
   10372     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
   10373   // Check that the chunk of bits is completely used.
   10374   return NarrowedUsedBits.isAllOnesValue();
   10375 }
   10376 
   10377 /// \brief Check whether or not \p First and \p Second are next to each other
   10378 /// in memory. This means that there is no hole between the bits loaded
   10379 /// by \p First and the bits loaded by \p Second.
   10380 static bool areSlicesNextToEachOther(const LoadedSlice &First,
   10381                                      const LoadedSlice &Second) {
   10382   assert(First.Origin == Second.Origin && First.Origin &&
   10383          "Unable to match different memory origins.");
   10384   APInt UsedBits = First.getUsedBits();
   10385   assert((UsedBits & Second.getUsedBits()) == 0 &&
   10386          "Slices are not supposed to overlap.");
   10387   UsedBits |= Second.getUsedBits();
   10388   return areUsedBitsDense(UsedBits);
   10389 }
   10390 
   10391 /// \brief Adjust the \p GlobalLSCost according to the target
   10392 /// paring capabilities and the layout of the slices.
   10393 /// \pre \p GlobalLSCost should account for at least as many loads as
   10394 /// there is in the slices in \p LoadedSlices.
   10395 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   10396                                  LoadedSlice::Cost &GlobalLSCost) {
   10397   unsigned NumberOfSlices = LoadedSlices.size();
   10398   // If there is less than 2 elements, no pairing is possible.
   10399   if (NumberOfSlices < 2)
   10400     return;
   10401 
   10402   // Sort the slices so that elements that are likely to be next to each
   10403   // other in memory are next to each other in the list.
   10404   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
   10405             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
   10406     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
   10407     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   10408   });
   10409   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   10410   // First (resp. Second) is the first (resp. Second) potentially candidate
   10411   // to be placed in a paired load.
   10412   const LoadedSlice *First = nullptr;
   10413   const LoadedSlice *Second = nullptr;
   10414   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
   10415                 // Set the beginning of the pair.
   10416                                                            First = Second) {
   10417 
   10418     Second = &LoadedSlices[CurrSlice];
   10419 
   10420     // If First is NULL, it means we start a new pair.
   10421     // Get to the next slice.
   10422     if (!First)
   10423       continue;
   10424 
   10425     EVT LoadedType = First->getLoadedType();
   10426 
   10427     // If the types of the slices are different, we cannot pair them.
   10428     if (LoadedType != Second->getLoadedType())
   10429       continue;
   10430 
   10431     // Check if the target supplies paired loads for this type.
   10432     unsigned RequiredAlignment = 0;
   10433     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
   10434       // move to the next pair, this type is hopeless.
   10435       Second = nullptr;
   10436       continue;
   10437     }
   10438     // Check if we meet the alignment requirement.
   10439     if (RequiredAlignment > First->getAlignment())
   10440       continue;
   10441 
   10442     // Check that both loads are next to each other in memory.
   10443     if (!areSlicesNextToEachOther(*First, *Second))
   10444       continue;
   10445 
   10446     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
   10447     --GlobalLSCost.Loads;
   10448     // Move to the next pair.
   10449     Second = nullptr;
   10450   }
   10451 }
   10452 
   10453 /// \brief Check the profitability of all involved LoadedSlice.
   10454 /// Currently, it is considered profitable if there is exactly two
   10455 /// involved slices (1) which are (2) next to each other in memory, and
   10456 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
   10457 ///
   10458 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
   10459 /// the elements themselves.
   10460 ///
   10461 /// FIXME: When the cost model will be mature enough, we can relax
   10462 /// constraints (1) and (2).
   10463 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   10464                                 const APInt &UsedBits, bool ForCodeSize) {
   10465   unsigned NumberOfSlices = LoadedSlices.size();
   10466   if (StressLoadSlicing)
   10467     return NumberOfSlices > 1;
   10468 
   10469   // Check (1).
   10470   if (NumberOfSlices != 2)
   10471     return false;
   10472 
   10473   // Check (2).
   10474   if (!areUsedBitsDense(UsedBits))
   10475     return false;
   10476 
   10477   // Check (3).
   10478   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
   10479   // The original code has one big load.
   10480   OrigCost.Loads = 1;
   10481   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
   10482     const LoadedSlice &LS = LoadedSlices[CurrSlice];
   10483     // Accumulate the cost of all the slices.
   10484     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
   10485     GlobalSlicingCost += SliceCost;
   10486 
   10487     // Account as cost in the original configuration the gain obtained
   10488     // with the current slices.
   10489     OrigCost.addSliceGain(LS);
   10490   }
   10491 
   10492   // If the target supports paired load, adjust the cost accordingly.
   10493   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
   10494   return OrigCost > GlobalSlicingCost;
   10495 }
   10496 
   10497 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
   10498 /// operations, split it in the various pieces being extracted.
   10499 ///
   10500 /// This sort of thing is introduced by SROA.
   10501 /// This slicing takes care not to insert overlapping loads.
   10502 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
   10503 bool DAGCombiner::SliceUpLoad(SDNode *N) {
   10504   if (Level < AfterLegalizeDAG)
   10505     return false;
   10506 
   10507   LoadSDNode *LD = cast<LoadSDNode>(N);
   10508   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
   10509       !LD->getValueType(0).isInteger())
   10510     return false;
   10511 
   10512   // Keep track of already used bits to detect overlapping values.
   10513   // In that case, we will just abort the transformation.
   10514   APInt UsedBits(LD->getValueSizeInBits(0), 0);
   10515 
   10516   SmallVector<LoadedSlice, 4> LoadedSlices;
   10517 
   10518   // Check if this load is used as several smaller chunks of bits.
   10519   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
   10520   // of computation for each trunc.
   10521   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
   10522        UI != UIEnd; ++UI) {
   10523     // Skip the uses of the chain.
   10524     if (UI.getUse().getResNo() != 0)
   10525       continue;
   10526 
   10527     SDNode *User = *UI;
   10528     unsigned Shift = 0;
   10529 
   10530     // Check if this is a trunc(lshr).
   10531     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
   10532         isa<ConstantSDNode>(User->getOperand(1))) {
   10533       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
   10534       User = *User->use_begin();
   10535     }
   10536 
   10537     // At this point, User is a Truncate, iff we encountered, trunc or
   10538     // trunc(lshr).
   10539     if (User->getOpcode() != ISD::TRUNCATE)
   10540       return false;
   10541 
   10542     // The width of the type must be a power of 2 and greater than 8-bits.
   10543     // Otherwise the load cannot be represented in LLVM IR.
   10544     // Moreover, if we shifted with a non-8-bits multiple, the slice
   10545     // will be across several bytes. We do not support that.
   10546     unsigned Width = User->getValueSizeInBits(0);
   10547     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
   10548       return 0;
   10549 
   10550     // Build the slice for this chain of computations.
   10551     LoadedSlice LS(User, LD, Shift, &DAG);
   10552     APInt CurrentUsedBits = LS.getUsedBits();
   10553 
   10554     // Check if this slice overlaps with another.
   10555     if ((CurrentUsedBits & UsedBits) != 0)
   10556       return false;
   10557     // Update the bits used globally.
   10558     UsedBits |= CurrentUsedBits;
   10559 
   10560     // Check if the new slice would be legal.
   10561     if (!LS.isLegal())
   10562       return false;
   10563 
   10564     // Record the slice.
   10565     LoadedSlices.push_back(LS);
   10566   }
   10567 
   10568   // Abort slicing if it does not seem to be profitable.
   10569   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
   10570     return false;
   10571 
   10572   ++SlicedLoads;
   10573 
   10574   // Rewrite each chain to use an independent load.
   10575   // By construction, each chain can be represented by a unique load.
   10576 
   10577   // Prepare the argument for the new token factor for all the slices.
   10578   SmallVector<SDValue, 8> ArgChains;
   10579   for (SmallVectorImpl<LoadedSlice>::const_iterator
   10580            LSIt = LoadedSlices.begin(),
   10581            LSItEnd = LoadedSlices.end();
   10582        LSIt != LSItEnd; ++LSIt) {
   10583     SDValue SliceInst = LSIt->loadSlice();
   10584     CombineTo(LSIt->Inst, SliceInst, true);
   10585     if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
   10586       SliceInst = SliceInst.getOperand(0);
   10587     assert(SliceInst->getOpcode() == ISD::LOAD &&
   10588            "It takes more than a zext to get to the loaded slice!!");
   10589     ArgChains.push_back(SliceInst.getValue(1));
   10590   }
   10591 
   10592   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
   10593                               ArgChains);
   10594   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   10595   return true;
   10596 }
   10597 
   10598 /// Check to see if V is (and load (ptr), imm), where the load is having
   10599 /// specific bytes cleared out.  If so, return the byte size being masked out
   10600 /// and the shift amount.
   10601 static std::pair<unsigned, unsigned>
   10602 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   10603   std::pair<unsigned, unsigned> Result(0, 0);
   10604 
   10605   // Check for the structure we're looking for.
   10606   if (V->getOpcode() != ISD::AND ||
   10607       !isa<ConstantSDNode>(V->getOperand(1)) ||
   10608       !ISD::isNormalLoad(V->getOperand(0).getNode()))
   10609     return Result;
   10610 
   10611   // Check the chain and pointer.
   10612   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   10613   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
   10614 
   10615   // The store should be chained directly to the load or be an operand of a
   10616   // tokenfactor.
   10617   if (LD == Chain.getNode())
   10618     ; // ok.
   10619   else if (Chain->getOpcode() != ISD::TokenFactor)
   10620     return Result; // Fail.
   10621   else {
   10622     bool isOk = false;
   10623     for (const SDValue &ChainOp : Chain->op_values())
   10624       if (ChainOp.getNode() == LD) {
   10625         isOk = true;
   10626         break;
   10627       }
   10628     if (!isOk) return Result;
   10629   }
   10630 
   10631   // This only handles simple types.
   10632   if (V.getValueType() != MVT::i16 &&
   10633       V.getValueType() != MVT::i32 &&
   10634       V.getValueType() != MVT::i64)
   10635     return Result;
   10636 
   10637   // Check the constant mask.  Invert it so that the bits being masked out are
   10638   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
   10639   // follow the sign bit for uniformity.
   10640   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
   10641   unsigned NotMaskLZ = countLeadingZeros(NotMask);
   10642   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
   10643   unsigned NotMaskTZ = countTrailingZeros(NotMask);
   10644   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   10645   if (NotMaskLZ == 64) return Result;  // All zero mask.
   10646 
   10647   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   10648   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
   10649     return Result;
   10650 
   10651   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   10652   if (V.getValueType() != MVT::i64 && NotMaskLZ)
   10653     NotMaskLZ -= 64-V.getValueSizeInBits();
   10654 
   10655   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   10656   switch (MaskedBytes) {
   10657   case 1:
   10658   case 2:
   10659   case 4: break;
   10660   default: return Result; // All one mask, or 5-byte mask.
   10661   }
   10662 
   10663   // Verify that the first bit starts at a multiple of mask so that the access
   10664   // is aligned the same as the access width.
   10665   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
   10666 
   10667   Result.first = MaskedBytes;
   10668   Result.second = NotMaskTZ/8;
   10669   return Result;
   10670 }
   10671 
   10672 
   10673 /// Check to see if IVal is something that provides a value as specified by
   10674 /// MaskInfo. If so, replace the specified store with a narrower store of
   10675 /// truncated IVal.
   10676 static SDNode *
   10677 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   10678                                 SDValue IVal, StoreSDNode *St,
   10679                                 DAGCombiner *DC) {
   10680   unsigned NumBytes = MaskInfo.first;
   10681   unsigned ByteShift = MaskInfo.second;
   10682   SelectionDAG &DAG = DC->getDAG();
   10683 
   10684   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   10685   // that uses this.  If not, this is not a replacement.
   10686   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
   10687                                   ByteShift*8, (ByteShift+NumBytes)*8);
   10688   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
   10689 
   10690   // Check that it is legal on the target to do this.  It is legal if the new
   10691   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   10692   // legalization.
   10693   MVT VT = MVT::getIntegerVT(NumBytes*8);
   10694   if (!DC->isTypeLegal(VT))
   10695     return nullptr;
   10696 
   10697   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   10698   // shifted by ByteShift and truncated down to NumBytes.
   10699   if (ByteShift) {
   10700     SDLoc DL(IVal);
   10701     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
   10702                        DAG.getConstant(ByteShift*8, DL,
   10703                                     DC->getShiftAmountTy(IVal.getValueType())));
   10704   }
   10705 
   10706   // Figure out the offset for the store and the alignment of the access.
   10707   unsigned StOffset;
   10708   unsigned NewAlign = St->getAlignment();
   10709 
   10710   if (DAG.getDataLayout().isLittleEndian())
   10711     StOffset = ByteShift;
   10712   else
   10713     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
   10714 
   10715   SDValue Ptr = St->getBasePtr();
   10716   if (StOffset) {
   10717     SDLoc DL(IVal);
   10718     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
   10719                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
   10720     NewAlign = MinAlign(NewAlign, StOffset);
   10721   }
   10722 
   10723   // Truncate down to the new size.
   10724   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
   10725 
   10726   ++OpsNarrowed;
   10727   return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
   10728                       St->getPointerInfo().getWithOffset(StOffset),
   10729                       false, false, NewAlign).getNode();
   10730 }
   10731 
   10732 
   10733 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
   10734 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
   10735 /// narrowing the load and store if it would end up being a win for performance
   10736 /// or code size.
   10737 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   10738   StoreSDNode *ST  = cast<StoreSDNode>(N);
   10739   if (ST->isVolatile())
   10740     return SDValue();
   10741 
   10742   SDValue Chain = ST->getChain();
   10743   SDValue Value = ST->getValue();
   10744   SDValue Ptr   = ST->getBasePtr();
   10745   EVT VT = Value.getValueType();
   10746 
   10747   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
   10748     return SDValue();
   10749 
   10750   unsigned Opc = Value.getOpcode();
   10751 
   10752   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   10753   // is a byte mask indicating a consecutive number of bytes, check to see if
   10754   // Y is known to provide just those bytes.  If so, we try to replace the
   10755   // load + replace + store sequence with a single (narrower) store, which makes
   10756   // the load dead.
   10757   if (Opc == ISD::OR) {
   10758     std::pair<unsigned, unsigned> MaskedLoad;
   10759     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
   10760     if (MaskedLoad.first)
   10761       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   10762                                                   Value.getOperand(1), ST,this))
   10763         return SDValue(NewST, 0);
   10764 
   10765     // Or is commutative, so try swapping X and Y.
   10766     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
   10767     if (MaskedLoad.first)
   10768       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   10769                                                   Value.getOperand(0), ST,this))
   10770         return SDValue(NewST, 0);
   10771   }
   10772 
   10773   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
   10774       Value.getOperand(1).getOpcode() != ISD::Constant)
   10775     return SDValue();
   10776 
   10777   SDValue N0 = Value.getOperand(0);
   10778   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   10779       Chain == SDValue(N0.getNode(), 1)) {
   10780     LoadSDNode *LD = cast<LoadSDNode>(N0);
   10781     if (LD->getBasePtr() != Ptr ||
   10782         LD->getPointerInfo().getAddrSpace() !=
   10783         ST->getPointerInfo().getAddrSpace())
   10784       return SDValue();
   10785 
   10786     // Find the type to narrow it the load / op / store to.
   10787     SDValue N1 = Value.getOperand(1);
   10788     unsigned BitWidth = N1.getValueSizeInBits();
   10789     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
   10790     if (Opc == ISD::AND)
   10791       Imm ^= APInt::getAllOnesValue(BitWidth);
   10792     if (Imm == 0 || Imm.isAllOnesValue())
   10793       return SDValue();
   10794     unsigned ShAmt = Imm.countTrailingZeros();
   10795     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
   10796     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
   10797     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   10798     // The narrowing should be profitable, the load/store operation should be
   10799     // legal (or custom) and the store size should be equal to the NewVT width.
   10800     while (NewBW < BitWidth &&
   10801            (NewVT.getStoreSizeInBits() != NewBW ||
   10802             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
   10803             !TLI.isNarrowingProfitable(VT, NewVT))) {
   10804       NewBW = NextPowerOf2(NewBW);
   10805       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   10806     }
   10807     if (NewBW >= BitWidth)
   10808       return SDValue();
   10809 
   10810     // If the lsb changed does not start at the type bitwidth boundary,
   10811     // start at the previous one.
   10812     if (ShAmt % NewBW)
   10813       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
   10814     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
   10815                                    std::min(BitWidth, ShAmt + NewBW));
   10816     if ((Imm & Mask) == Imm) {
   10817       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
   10818       if (Opc == ISD::AND)
   10819         NewImm ^= APInt::getAllOnesValue(NewBW);
   10820       uint64_t PtrOff = ShAmt / 8;
   10821       // For big endian targets, we need to adjust the offset to the pointer to
   10822       // load the correct bytes.
   10823       if (DAG.getDataLayout().isBigEndian())
   10824         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
   10825 
   10826       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
   10827       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
   10828       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
   10829         return SDValue();
   10830 
   10831       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
   10832                                    Ptr.getValueType(), Ptr,
   10833                                    DAG.getConstant(PtrOff, SDLoc(LD),
   10834                                                    Ptr.getValueType()));
   10835       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
   10836                                   LD->getChain(), NewPtr,
   10837                                   LD->getPointerInfo().getWithOffset(PtrOff),
   10838                                   LD->isVolatile(), LD->isNonTemporal(),
   10839                                   LD->isInvariant(), NewAlign,
   10840                                   LD->getAAInfo());
   10841       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
   10842                                    DAG.getConstant(NewImm, SDLoc(Value),
   10843                                                    NewVT));
   10844       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
   10845                                    NewVal, NewPtr,
   10846                                    ST->getPointerInfo().getWithOffset(PtrOff),
   10847                                    false, false, NewAlign);
   10848 
   10849       AddToWorklist(NewPtr.getNode());
   10850       AddToWorklist(NewLD.getNode());
   10851       AddToWorklist(NewVal.getNode());
   10852       WorklistRemover DeadNodes(*this);
   10853       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
   10854       ++OpsNarrowed;
   10855       return NewST;
   10856     }
   10857   }
   10858 
   10859   return SDValue();
   10860 }
   10861 
   10862 /// For a given floating point load / store pair, if the load value isn't used
   10863 /// by any other operations, then consider transforming the pair to integer
   10864 /// load / store operations if the target deems the transformation profitable.
   10865 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   10866   StoreSDNode *ST  = cast<StoreSDNode>(N);
   10867   SDValue Chain = ST->getChain();
   10868   SDValue Value = ST->getValue();
   10869   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
   10870       Value.hasOneUse() &&
   10871       Chain == SDValue(Value.getNode(), 1)) {
   10872     LoadSDNode *LD = cast<LoadSDNode>(Value);
   10873     EVT VT = LD->getMemoryVT();
   10874     if (!VT.isFloatingPoint() ||
   10875         VT != ST->getMemoryVT() ||
   10876         LD->isNonTemporal() ||
   10877         ST->isNonTemporal() ||
   10878         LD->getPointerInfo().getAddrSpace() != 0 ||
   10879         ST->getPointerInfo().getAddrSpace() != 0)
   10880       return SDValue();
   10881 
   10882     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   10883     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
   10884         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
   10885         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
   10886         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
   10887       return SDValue();
   10888 
   10889     unsigned LDAlign = LD->getAlignment();
   10890     unsigned STAlign = ST->getAlignment();
   10891     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
   10892     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
   10893     if (LDAlign < ABIAlign || STAlign < ABIAlign)
   10894       return SDValue();
   10895 
   10896     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
   10897                                 LD->getChain(), LD->getBasePtr(),
   10898                                 LD->getPointerInfo(),
   10899                                 false, false, false, LDAlign);
   10900 
   10901     SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
   10902                                  NewLD, ST->getBasePtr(),
   10903                                  ST->getPointerInfo(),
   10904                                  false, false, STAlign);
   10905 
   10906     AddToWorklist(NewLD.getNode());
   10907     AddToWorklist(NewST.getNode());
   10908     WorklistRemover DeadNodes(*this);
   10909     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
   10910     ++LdStFP2Int;
   10911     return NewST;
   10912   }
   10913 
   10914   return SDValue();
   10915 }
   10916 
   10917 namespace {
   10918 /// Helper struct to parse and store a memory address as base + index + offset.
   10919 /// We ignore sign extensions when it is safe to do so.
   10920 /// The following two expressions are not equivalent. To differentiate we need
   10921 /// to store whether there was a sign extension involved in the index
   10922 /// computation.
   10923 ///  (load (i64 add (i64 copyfromreg %c)
   10924 ///                 (i64 signextend (add (i8 load %index)
   10925 ///                                      (i8 1))))
   10926 /// vs
   10927 ///
   10928 /// (load (i64 add (i64 copyfromreg %c)
   10929 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
   10930 ///                                         (i32 1)))))
   10931 struct BaseIndexOffset {
   10932   SDValue Base;
   10933   SDValue Index;
   10934   int64_t Offset;
   10935   bool IsIndexSignExt;
   10936 
   10937   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
   10938 
   10939   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
   10940                   bool IsIndexSignExt) :
   10941     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
   10942 
   10943   bool equalBaseIndex(const BaseIndexOffset &Other) {
   10944     return Other.Base == Base && Other.Index == Index &&
   10945       Other.IsIndexSignExt == IsIndexSignExt;
   10946   }
   10947 
   10948   /// Parses tree in Ptr for base, index, offset addresses.
   10949   static BaseIndexOffset match(SDValue Ptr) {
   10950     bool IsIndexSignExt = false;
   10951 
   10952     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
   10953     // instruction, then it could be just the BASE or everything else we don't
   10954     // know how to handle. Just use Ptr as BASE and give up.
   10955     if (Ptr->getOpcode() != ISD::ADD)
   10956       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   10957 
   10958     // We know that we have at least an ADD instruction. Try to pattern match
   10959     // the simple case of BASE + OFFSET.
   10960     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
   10961       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
   10962       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
   10963                               IsIndexSignExt);
   10964     }
   10965 
   10966     // Inside a loop the current BASE pointer is calculated using an ADD and a
   10967     // MUL instruction. In this case Ptr is the actual BASE pointer.
   10968     // (i64 add (i64 %array_ptr)
   10969     //          (i64 mul (i64 %induction_var)
   10970     //                   (i64 %element_size)))
   10971     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
   10972       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   10973 
   10974     // Look at Base + Index + Offset cases.
   10975     SDValue Base = Ptr->getOperand(0);
   10976     SDValue IndexOffset = Ptr->getOperand(1);
   10977 
   10978     // Skip signextends.
   10979     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
   10980       IndexOffset = IndexOffset->getOperand(0);
   10981       IsIndexSignExt = true;
   10982     }
   10983 
   10984     // Either the case of Base + Index (no offset) or something else.
   10985     if (IndexOffset->getOpcode() != ISD::ADD)
   10986       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
   10987 
   10988     // Now we have the case of Base + Index + offset.
   10989     SDValue Index = IndexOffset->getOperand(0);
   10990     SDValue Offset = IndexOffset->getOperand(1);
   10991 
   10992     if (!isa<ConstantSDNode>(Offset))
   10993       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   10994 
   10995     // Ignore signextends.
   10996     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
   10997       Index = Index->getOperand(0);
   10998       IsIndexSignExt = true;
   10999     } else IsIndexSignExt = false;
   11000 
   11001     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
   11002     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
   11003   }
   11004 };
   11005 } // namespace
   11006 
   11007 // This is a helper function for visitMUL to check the profitability
   11008 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
   11009 // MulNode is the original multiply, AddNode is (add x, c1),
   11010 // and ConstNode is c2.
   11011 //
   11012 // If the (add x, c1) has multiple uses, we could increase
   11013 // the number of adds if we make this transformation.
   11014 // It would only be worth doing this if we can remove a
   11015 // multiply in the process. Check for that here.
   11016 // To illustrate:
   11017 //     (A + c1) * c3
   11018 //     (A + c2) * c3
   11019 // We're checking for cases where we have common "c3 * A" expressions.
   11020 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
   11021                                               SDValue &AddNode,
   11022                                               SDValue &ConstNode) {
   11023   APInt Val;
   11024 
   11025   // If the add only has one use, this would be OK to do.
   11026   if (AddNode.getNode()->hasOneUse())
   11027     return true;
   11028 
   11029   // Walk all the users of the constant with which we're multiplying.
   11030   for (SDNode *Use : ConstNode->uses()) {
   11031 
   11032     if (Use == MulNode) // This use is the one we're on right now. Skip it.
   11033       continue;
   11034 
   11035     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
   11036       SDNode *OtherOp;
   11037       SDNode *MulVar = AddNode.getOperand(0).getNode();
   11038 
   11039       // OtherOp is what we're multiplying against the constant.
   11040       if (Use->getOperand(0) == ConstNode)
   11041         OtherOp = Use->getOperand(1).getNode();
   11042       else
   11043         OtherOp = Use->getOperand(0).getNode();
   11044 
   11045       // Check to see if multiply is with the same operand of our "add".
   11046       //
   11047       //     ConstNode  = CONST
   11048       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
   11049       //     ...
   11050       //     AddNode  = (A + c1)  <-- MulVar is A.
   11051       //         = AddNode * ConstNode   <-- current visiting instruction.
   11052       //
   11053       // If we make this transformation, we will have a common
   11054       // multiply (ConstNode * A) that we can save.
   11055       if (OtherOp == MulVar)
   11056         return true;
   11057 
   11058       // Now check to see if a future expansion will give us a common
   11059       // multiply.
   11060       //
   11061       //     ConstNode  = CONST
   11062       //     AddNode    = (A + c1)
   11063       //     ...   = AddNode * ConstNode <-- current visiting instruction.
   11064       //     ...
   11065       //     OtherOp = (A + c2)
   11066       //     Use     = OtherOp * ConstNode <-- visiting Use.
   11067       //
   11068       // If we make this transformation, we will have a common
   11069       // multiply (CONST * A) after we also do the same transformation
   11070       // to the "t2" instruction.
   11071       if (OtherOp->getOpcode() == ISD::ADD &&
   11072           isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
   11073           OtherOp->getOperand(0).getNode() == MulVar)
   11074         return true;
   11075     }
   11076   }
   11077 
   11078   // Didn't find a case where this would be profitable.
   11079   return false;
   11080 }
   11081 
   11082 SDValue DAGCombiner::getMergedConstantVectorStore(SelectionDAG &DAG,
   11083                                                   SDLoc SL,
   11084                                                   ArrayRef<MemOpLink> Stores,
   11085                                                   SmallVectorImpl<SDValue> &Chains,
   11086                                                   EVT Ty) const {
   11087   SmallVector<SDValue, 8> BuildVector;
   11088 
   11089   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
   11090     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
   11091     Chains.push_back(St->getChain());
   11092     BuildVector.push_back(St->getValue());
   11093   }
   11094 
   11095   return DAG.getNode(ISD::BUILD_VECTOR, SL, Ty, BuildVector);
   11096 }
   11097 
   11098 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   11099                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
   11100                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
   11101   // Make sure we have something to merge.
   11102   if (NumStores < 2)
   11103     return false;
   11104 
   11105   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   11106   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   11107   unsigned LatestNodeUsed = 0;
   11108 
   11109   for (unsigned i=0; i < NumStores; ++i) {
   11110     // Find a chain for the new wide-store operand. Notice that some
   11111     // of the store nodes that we found may not be selected for inclusion
   11112     // in the wide store. The chain we use needs to be the chain of the
   11113     // latest store node which is *used* and replaced by the wide store.
   11114     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
   11115       LatestNodeUsed = i;
   11116   }
   11117 
   11118   SmallVector<SDValue, 8> Chains;
   11119 
   11120   // The latest Node in the DAG.
   11121   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   11122   SDLoc DL(StoreNodes[0].MemNode);
   11123 
   11124   SDValue StoredVal;
   11125   if (UseVector) {
   11126     bool IsVec = MemVT.isVector();
   11127     unsigned Elts = NumStores;
   11128     if (IsVec) {
   11129       // When merging vector stores, get the total number of elements.
   11130       Elts *= MemVT.getVectorNumElements();
   11131     }
   11132     // Get the type for the merged vector store.
   11133     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   11134     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
   11135 
   11136     if (IsConstantSrc) {
   11137       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
   11138     } else {
   11139       SmallVector<SDValue, 8> Ops;
   11140       for (unsigned i = 0; i < NumStores; ++i) {
   11141         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11142         SDValue Val = St->getValue();
   11143         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
   11144         if (Val.getValueType() != MemVT)
   11145           return false;
   11146         Ops.push_back(Val);
   11147         Chains.push_back(St->getChain());
   11148       }
   11149 
   11150       // Build the extracted vector elements back into a vector.
   11151       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
   11152                               DL, Ty, Ops);    }
   11153   } else {
   11154     // We should always use a vector store when merging extracted vector
   11155     // elements, so this path implies a store of constants.
   11156     assert(IsConstantSrc && "Merged vector elements should use vector store");
   11157 
   11158     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
   11159     APInt StoreInt(SizeInBits, 0);
   11160 
   11161     // Construct a single integer constant which is made of the smaller
   11162     // constant inputs.
   11163     bool IsLE = DAG.getDataLayout().isLittleEndian();
   11164     for (unsigned i = 0; i < NumStores; ++i) {
   11165       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
   11166       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
   11167       Chains.push_back(St->getChain());
   11168 
   11169       SDValue Val = St->getValue();
   11170       StoreInt <<= ElementSizeBytes * 8;
   11171       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
   11172         StoreInt |= C->getAPIntValue().zext(SizeInBits);
   11173       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
   11174         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
   11175       } else {
   11176         llvm_unreachable("Invalid constant element type");
   11177       }
   11178     }
   11179 
   11180     // Create the new Load and Store operations.
   11181     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
   11182     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   11183   }
   11184 
   11185   assert(!Chains.empty());
   11186 
   11187   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
   11188   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
   11189                                   FirstInChain->getBasePtr(),
   11190                                   FirstInChain->getPointerInfo(),
   11191                                   false, false,
   11192                                   FirstInChain->getAlignment());
   11193 
   11194   // Replace the last store with the new store
   11195   CombineTo(LatestOp, NewStore);
   11196   // Erase all other stores.
   11197   for (unsigned i = 0; i < NumStores; ++i) {
   11198     if (StoreNodes[i].MemNode == LatestOp)
   11199       continue;
   11200     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11201     // ReplaceAllUsesWith will replace all uses that existed when it was
   11202     // called, but graph optimizations may cause new ones to appear. For
   11203     // example, the case in pr14333 looks like
   11204     //
   11205     //  St's chain -> St -> another store -> X
   11206     //
   11207     // And the only difference from St to the other store is the chain.
   11208     // When we change it's chain to be St's chain they become identical,
   11209     // get CSEed and the net result is that X is now a use of St.
   11210     // Since we know that St is redundant, just iterate.
   11211     while (!St->use_empty())
   11212       DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
   11213     deleteAndRecombine(St);
   11214   }
   11215 
   11216   return true;
   11217 }
   11218 
   11219 void DAGCombiner::getStoreMergeAndAliasCandidates(
   11220     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
   11221     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
   11222   // This holds the base pointer, index, and the offset in bytes from the base
   11223   // pointer.
   11224   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
   11225 
   11226   // We must have a base and an offset.
   11227   if (!BasePtr.Base.getNode())
   11228     return;
   11229 
   11230   // Do not handle stores to undef base pointers.
   11231   if (BasePtr.Base.getOpcode() == ISD::UNDEF)
   11232     return;
   11233 
   11234   // Walk up the chain and look for nodes with offsets from the same
   11235   // base pointer. Stop when reaching an instruction with a different kind
   11236   // or instruction which has a different base pointer.
   11237   EVT MemVT = St->getMemoryVT();
   11238   unsigned Seq = 0;
   11239   StoreSDNode *Index = St;
   11240 
   11241 
   11242   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   11243                                                   : DAG.getSubtarget().useAA();
   11244 
   11245   if (UseAA) {
   11246     // Look at other users of the same chain. Stores on the same chain do not
   11247     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
   11248     // to be on the same chain, so don't bother looking at adjacent chains.
   11249 
   11250     SDValue Chain = St->getChain();
   11251     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
   11252       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
   11253         if (I.getOperandNo() != 0)
   11254           continue;
   11255 
   11256         if (OtherST->isVolatile() || OtherST->isIndexed())
   11257           continue;
   11258 
   11259         if (OtherST->getMemoryVT() != MemVT)
   11260           continue;
   11261 
   11262         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr());
   11263 
   11264         if (Ptr.equalBaseIndex(BasePtr))
   11265           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
   11266       }
   11267     }
   11268 
   11269     return;
   11270   }
   11271 
   11272   while (Index) {
   11273     // If the chain has more than one use, then we can't reorder the mem ops.
   11274     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   11275       break;
   11276 
   11277     // Find the base pointer and offset for this memory node.
   11278     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
   11279 
   11280     // Check that the base pointer is the same as the original one.
   11281     if (!Ptr.equalBaseIndex(BasePtr))
   11282       break;
   11283 
   11284     // The memory operands must not be volatile.
   11285     if (Index->isVolatile() || Index->isIndexed())
   11286       break;
   11287 
   11288     // No truncation.
   11289     if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index))
   11290       if (St->isTruncatingStore())
   11291         break;
   11292 
   11293     // The stored memory type must be the same.
   11294     if (Index->getMemoryVT() != MemVT)
   11295       break;
   11296 
   11297     // We do not allow under-aligned stores in order to prevent
   11298     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
   11299     // be irrelevant here; what MATTERS is that we not move memory
   11300     // operations that potentially overlap past each-other.
   11301     if (Index->getAlignment() < MemVT.getStoreSize())
   11302       break;
   11303 
   11304     // We found a potential memory operand to merge.
   11305     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
   11306 
   11307     // Find the next memory operand in the chain. If the next operand in the
   11308     // chain is a store then move up and continue the scan with the next
   11309     // memory operand. If the next operand is a load save it and use alias
   11310     // information to check if it interferes with anything.
   11311     SDNode *NextInChain = Index->getChain().getNode();
   11312     while (1) {
   11313       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   11314         // We found a store node. Use it for the next iteration.
   11315         Index = STn;
   11316         break;
   11317       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   11318         if (Ldn->isVolatile()) {
   11319           Index = nullptr;
   11320           break;
   11321         }
   11322 
   11323         // Save the load node for later. Continue the scan.
   11324         AliasLoadNodes.push_back(Ldn);
   11325         NextInChain = Ldn->getChain().getNode();
   11326         continue;
   11327       } else {
   11328         Index = nullptr;
   11329         break;
   11330       }
   11331     }
   11332   }
   11333 }
   11334 
   11335 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   11336   if (OptLevel == CodeGenOpt::None)
   11337     return false;
   11338 
   11339   EVT MemVT = St->getMemoryVT();
   11340   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   11341   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
   11342       Attribute::NoImplicitFloat);
   11343 
   11344   // This function cannot currently deal with non-byte-sized memory sizes.
   11345   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
   11346     return false;
   11347 
   11348   if (!MemVT.isSimple())
   11349     return false;
   11350 
   11351   // Perform an early exit check. Do not bother looking at stored values that
   11352   // are not constants, loads, or extracted vector elements.
   11353   SDValue StoredVal = St->getValue();
   11354   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   11355   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
   11356                        isa<ConstantFPSDNode>(StoredVal);
   11357   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   11358                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   11359 
   11360   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
   11361     return false;
   11362 
   11363   // Don't merge vectors into wider vectors if the source data comes from loads.
   11364   // TODO: This restriction can be lifted by using logic similar to the
   11365   // ExtractVecSrc case.
   11366   if (MemVT.isVector() && IsLoadSrc)
   11367     return false;
   11368 
   11369   // Only look at ends of store sequences.
   11370   SDValue Chain = SDValue(St, 0);
   11371   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
   11372     return false;
   11373 
   11374   // Save the LoadSDNodes that we find in the chain.
   11375   // We need to make sure that these nodes do not interfere with
   11376   // any of the store nodes.
   11377   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
   11378 
   11379   // Save the StoreSDNodes that we find in the chain.
   11380   SmallVector<MemOpLink, 8> StoreNodes;
   11381 
   11382   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
   11383 
   11384   // Check if there is anything to merge.
   11385   if (StoreNodes.size() < 2)
   11386     return false;
   11387 
   11388   // Sort the memory operands according to their distance from the
   11389   // base pointer.  As a secondary criteria: make sure stores coming
   11390   // later in the code come first in the list. This is important for
   11391   // the non-UseAA case, because we're merging stores into the FINAL
   11392   // store along a chain which potentially contains aliasing stores.
   11393   // Thus, if there are multiple stores to the same address, the last
   11394   // one can be considered for merging but not the others.
   11395   std::sort(StoreNodes.begin(), StoreNodes.end(),
   11396             [](MemOpLink LHS, MemOpLink RHS) {
   11397     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
   11398            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
   11399             LHS.SequenceNum < RHS.SequenceNum);
   11400   });
   11401 
   11402   // Scan the memory operations on the chain and find the first non-consecutive
   11403   // store memory address.
   11404   unsigned LastConsecutiveStore = 0;
   11405   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
   11406   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
   11407 
   11408     // Check that the addresses are consecutive starting from the second
   11409     // element in the list of stores.
   11410     if (i > 0) {
   11411       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
   11412       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   11413         break;
   11414     }
   11415 
   11416     // Check if this store interferes with any of the loads that we found.
   11417     // If we find a load that alias with this store. Stop the sequence.
   11418     if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
   11419                     [&](LSBaseSDNode* Ldn) {
   11420                       return isAlias(Ldn, StoreNodes[i].MemNode);
   11421                     }))
   11422       break;
   11423 
   11424     // Mark this node as useful.
   11425     LastConsecutiveStore = i;
   11426   }
   11427 
   11428   // The node with the lowest store address.
   11429   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   11430   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   11431   unsigned FirstStoreAlign = FirstInChain->getAlignment();
   11432   LLVMContext &Context = *DAG.getContext();
   11433   const DataLayout &DL = DAG.getDataLayout();
   11434 
   11435   // Store the constants into memory as one consecutive store.
   11436   if (IsConstantSrc) {
   11437     unsigned LastLegalType = 0;
   11438     unsigned LastLegalVectorType = 0;
   11439     bool NonZero = false;
   11440     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   11441       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11442       SDValue StoredVal = St->getValue();
   11443 
   11444       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
   11445         NonZero |= !C->isNullValue();
   11446       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
   11447         NonZero |= !C->getConstantFPValue()->isNullValue();
   11448       } else {
   11449         // Non-constant.
   11450         break;
   11451       }
   11452 
   11453       // Find a legal type for the constant store.
   11454       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
   11455       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   11456       bool IsFast;
   11457       if (TLI.isTypeLegal(StoreTy) &&
   11458           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11459                                  FirstStoreAlign, &IsFast) && IsFast) {
   11460         LastLegalType = i+1;
   11461       // Or check whether a truncstore is legal.
   11462       } else if (TLI.getTypeAction(Context, StoreTy) ==
   11463                  TargetLowering::TypePromoteInteger) {
   11464         EVT LegalizedStoredValueTy =
   11465           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
   11466         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
   11467             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11468                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
   11469             IsFast) {
   11470           LastLegalType = i + 1;
   11471         }
   11472       }
   11473 
   11474       // We only use vectors if the constant is known to be zero or the target
   11475       // allows it and the function is not marked with the noimplicitfloat
   11476       // attribute.
   11477       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
   11478                                                         FirstStoreAS)) &&
   11479           !NoVectors) {
   11480         // Find a legal type for the vector store.
   11481         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
   11482         if (TLI.isTypeLegal(Ty) &&
   11483             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   11484                                    FirstStoreAlign, &IsFast) && IsFast)
   11485           LastLegalVectorType = i + 1;
   11486       }
   11487     }
   11488 
   11489     // Check if we found a legal integer type to store.
   11490     if (LastLegalType == 0 && LastLegalVectorType == 0)
   11491       return false;
   11492 
   11493     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
   11494     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
   11495 
   11496     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
   11497                                            true, UseVector);
   11498   }
   11499 
   11500   // When extracting multiple vector elements, try to store them
   11501   // in one vector store rather than a sequence of scalar stores.
   11502   if (IsExtractVecSrc) {
   11503     unsigned NumStoresToMerge = 0;
   11504     bool IsVec = MemVT.isVector();
   11505     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
   11506       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11507       unsigned StoreValOpcode = St->getValue().getOpcode();
   11508       // This restriction could be loosened.
   11509       // Bail out if any stored values are not elements extracted from a vector.
   11510       // It should be possible to handle mixed sources, but load sources need
   11511       // more careful handling (see the block of code below that handles
   11512       // consecutive loads).
   11513       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
   11514           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
   11515         return false;
   11516 
   11517       // Find a legal type for the vector store.
   11518       unsigned Elts = i + 1;
   11519       if (IsVec) {
   11520         // When merging vector stores, get the total number of elements.
   11521         Elts *= MemVT.getVectorNumElements();
   11522       }
   11523       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   11524       bool IsFast;
   11525       if (TLI.isTypeLegal(Ty) &&
   11526           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   11527                                  FirstStoreAlign, &IsFast) && IsFast)
   11528         NumStoresToMerge = i + 1;
   11529     }
   11530 
   11531     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
   11532                                            false, true);
   11533   }
   11534 
   11535   // Below we handle the case of multiple consecutive stores that
   11536   // come from multiple consecutive loads. We merge them into a single
   11537   // wide load and a single wide store.
   11538 
   11539   // Look for load nodes which are used by the stored values.
   11540   SmallVector<MemOpLink, 8> LoadNodes;
   11541 
   11542   // Find acceptable loads. Loads need to have the same chain (token factor),
   11543   // must not be zext, volatile, indexed, and they must be consecutive.
   11544   BaseIndexOffset LdBasePtr;
   11545   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   11546     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11547     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
   11548     if (!Ld) break;
   11549 
   11550     // Loads must only have one use.
   11551     if (!Ld->hasNUsesOfValue(1, 0))
   11552       break;
   11553 
   11554     // The memory operands must not be volatile.
   11555     if (Ld->isVolatile() || Ld->isIndexed())
   11556       break;
   11557 
   11558     // We do not accept ext loads.
   11559     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
   11560       break;
   11561 
   11562     // The stored memory type must be the same.
   11563     if (Ld->getMemoryVT() != MemVT)
   11564       break;
   11565 
   11566     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr());
   11567     // If this is not the first ptr that we check.
   11568     if (LdBasePtr.Base.getNode()) {
   11569       // The base ptr must be the same.
   11570       if (!LdPtr.equalBaseIndex(LdBasePtr))
   11571         break;
   11572     } else {
   11573       // Check that all other base pointers are the same as this one.
   11574       LdBasePtr = LdPtr;
   11575     }
   11576 
   11577     // We found a potential memory operand to merge.
   11578     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
   11579   }
   11580 
   11581   if (LoadNodes.size() < 2)
   11582     return false;
   11583 
   11584   // If we have load/store pair instructions and we only have two values,
   11585   // don't bother.
   11586   unsigned RequiredAlignment;
   11587   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
   11588       St->getAlignment() >= RequiredAlignment)
   11589     return false;
   11590 
   11591   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
   11592   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
   11593   unsigned FirstLoadAlign = FirstLoad->getAlignment();
   11594 
   11595   // Scan the memory operations on the chain and find the first non-consecutive
   11596   // load memory address. These variables hold the index in the store node
   11597   // array.
   11598   unsigned LastConsecutiveLoad = 0;
   11599   // This variable refers to the size and not index in the array.
   11600   unsigned LastLegalVectorType = 0;
   11601   unsigned LastLegalIntegerType = 0;
   11602   StartAddress = LoadNodes[0].OffsetFromBase;
   11603   SDValue FirstChain = FirstLoad->getChain();
   11604   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
   11605     // All loads must share the same chain.
   11606     if (LoadNodes[i].MemNode->getChain() != FirstChain)
   11607       break;
   11608 
   11609     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
   11610     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   11611       break;
   11612     LastConsecutiveLoad = i;
   11613     // Find a legal type for the vector store.
   11614     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
   11615     bool IsFastSt, IsFastLd;
   11616     if (TLI.isTypeLegal(StoreTy) &&
   11617         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11618                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
   11619         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   11620                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
   11621       LastLegalVectorType = i + 1;
   11622     }
   11623 
   11624     // Find a legal type for the integer store.
   11625     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
   11626     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   11627     if (TLI.isTypeLegal(StoreTy) &&
   11628         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11629                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
   11630         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   11631                                FirstLoadAlign, &IsFastLd) && IsFastLd)
   11632       LastLegalIntegerType = i + 1;
   11633     // Or check whether a truncstore and extload is legal.
   11634     else if (TLI.getTypeAction(Context, StoreTy) ==
   11635              TargetLowering::TypePromoteInteger) {
   11636       EVT LegalizedStoredValueTy =
   11637         TLI.getTypeToTransformTo(Context, StoreTy);
   11638       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
   11639           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11640           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11641           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11642           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11643                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
   11644           IsFastSt &&
   11645           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11646                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
   11647           IsFastLd)
   11648         LastLegalIntegerType = i+1;
   11649     }
   11650   }
   11651 
   11652   // Only use vector types if the vector type is larger than the integer type.
   11653   // If they are the same, use integers.
   11654   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   11655   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
   11656 
   11657   // We add +1 here because the LastXXX variables refer to location while
   11658   // the NumElem refers to array/index size.
   11659   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
   11660   NumElem = std::min(LastLegalType, NumElem);
   11661 
   11662   if (NumElem < 2)
   11663     return false;
   11664 
   11665   // Collect the chains from all merged stores.
   11666   SmallVector<SDValue, 8> MergeStoreChains;
   11667   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
   11668 
   11669   // The latest Node in the DAG.
   11670   unsigned LatestNodeUsed = 0;
   11671   for (unsigned i=1; i<NumElem; ++i) {
   11672     // Find a chain for the new wide-store operand. Notice that some
   11673     // of the store nodes that we found may not be selected for inclusion
   11674     // in the wide store. The chain we use needs to be the chain of the
   11675     // latest store node which is *used* and replaced by the wide store.
   11676     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
   11677       LatestNodeUsed = i;
   11678 
   11679     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
   11680   }
   11681 
   11682   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   11683 
   11684   // Find if it is better to use vectors or integers to load and store
   11685   // to memory.
   11686   EVT JointMemOpVT;
   11687   if (UseVectorTy) {
   11688     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
   11689   } else {
   11690     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
   11691     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
   11692   }
   11693 
   11694   SDLoc LoadDL(LoadNodes[0].MemNode);
   11695   SDLoc StoreDL(StoreNodes[0].MemNode);
   11696 
   11697   // The merged loads are required to have the same incoming chain, so
   11698   // using the first's chain is acceptable.
   11699   SDValue NewLoad = DAG.getLoad(
   11700       JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
   11701       FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
   11702 
   11703   SDValue NewStoreChain =
   11704     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
   11705 
   11706   SDValue NewStore = DAG.getStore(
   11707     NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
   11708       FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
   11709 
   11710   // Transfer chain users from old loads to the new load.
   11711   for (unsigned i = 0; i < NumElem; ++i) {
   11712     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
   11713     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
   11714                                   SDValue(NewLoad.getNode(), 1));
   11715   }
   11716 
   11717   // Replace the last store with the new store.
   11718   CombineTo(LatestOp, NewStore);
   11719   // Erase all other stores.
   11720   for (unsigned i = 0; i < NumElem ; ++i) {
   11721     // Remove all Store nodes.
   11722     if (StoreNodes[i].MemNode == LatestOp)
   11723       continue;
   11724     StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11725     DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
   11726     deleteAndRecombine(St);
   11727   }
   11728 
   11729   return true;
   11730 }
   11731 
   11732 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
   11733   SDLoc SL(ST);
   11734   SDValue ReplStore;
   11735 
   11736   // Replace the chain to avoid dependency.
   11737   if (ST->isTruncatingStore()) {
   11738     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
   11739                                   ST->getBasePtr(), ST->getMemoryVT(),
   11740                                   ST->getMemOperand());
   11741   } else {
   11742     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
   11743                              ST->getMemOperand());
   11744   }
   11745 
   11746   // Create token to keep both nodes around.
   11747   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
   11748                               MVT::Other, ST->getChain(), ReplStore);
   11749 
   11750   // Make sure the new and old chains are cleaned up.
   11751   AddToWorklist(Token.getNode());
   11752 
   11753   // Don't add users to work list.
   11754   return CombineTo(ST, Token, false);
   11755 }
   11756 
   11757 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
   11758   SDValue Value = ST->getValue();
   11759   if (Value.getOpcode() == ISD::TargetConstantFP)
   11760     return SDValue();
   11761 
   11762   SDLoc DL(ST);
   11763 
   11764   SDValue Chain = ST->getChain();
   11765   SDValue Ptr = ST->getBasePtr();
   11766 
   11767   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
   11768 
   11769   // NOTE: If the original store is volatile, this transform must not increase
   11770   // the number of stores.  For example, on x86-32 an f64 can be stored in one
   11771   // processor operation but an i64 (which is not legal) requires two.  So the
   11772   // transform should not be done in this case.
   11773 
   11774   SDValue Tmp;
   11775   switch (CFP->getSimpleValueType(0).SimpleTy) {
   11776   default:
   11777     llvm_unreachable("Unknown FP type");
   11778   case MVT::f16:    // We don't do this for these yet.
   11779   case MVT::f80:
   11780   case MVT::f128:
   11781   case MVT::ppcf128:
   11782     return SDValue();
   11783   case MVT::f32:
   11784     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
   11785         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   11786       ;
   11787       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
   11788                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
   11789                             MVT::i32);
   11790       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
   11791     }
   11792 
   11793     return SDValue();
   11794   case MVT::f64:
   11795     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
   11796          !ST->isVolatile()) ||
   11797         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
   11798       ;
   11799       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
   11800                             getZExtValue(), SDLoc(CFP), MVT::i64);
   11801       return DAG.getStore(Chain, DL, Tmp,
   11802                           Ptr, ST->getMemOperand());
   11803     }
   11804 
   11805     if (!ST->isVolatile() &&
   11806         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   11807       // Many FP stores are not made apparent until after legalize, e.g. for
   11808       // argument passing.  Since this is so common, custom legalize the
   11809       // 64-bit integer store into two 32-bit stores.
   11810       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
   11811       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
   11812       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
   11813       if (DAG.getDataLayout().isBigEndian())
   11814         std::swap(Lo, Hi);
   11815 
   11816       unsigned Alignment = ST->getAlignment();
   11817       bool isVolatile = ST->isVolatile();
   11818       bool isNonTemporal = ST->isNonTemporal();
   11819       AAMDNodes AAInfo = ST->getAAInfo();
   11820 
   11821       SDValue St0 = DAG.getStore(Chain, DL, Lo,
   11822                                  Ptr, ST->getPointerInfo(),
   11823                                  isVolatile, isNonTemporal,
   11824                                  ST->getAlignment(), AAInfo);
   11825       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   11826                         DAG.getConstant(4, DL, Ptr.getValueType()));
   11827       Alignment = MinAlign(Alignment, 4U);
   11828       SDValue St1 = DAG.getStore(Chain, DL, Hi,
   11829                                  Ptr, ST->getPointerInfo().getWithOffset(4),
   11830                                  isVolatile, isNonTemporal,
   11831                                  Alignment, AAInfo);
   11832       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
   11833                          St0, St1);
   11834     }
   11835 
   11836     return SDValue();
   11837   }
   11838 }
   11839 
   11840 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   11841   StoreSDNode *ST  = cast<StoreSDNode>(N);
   11842   SDValue Chain = ST->getChain();
   11843   SDValue Value = ST->getValue();
   11844   SDValue Ptr   = ST->getBasePtr();
   11845 
   11846   // If this is a store of a bit convert, store the input value if the
   11847   // resultant store does not need a higher alignment than the original.
   11848   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
   11849       ST->isUnindexed()) {
   11850     unsigned OrigAlign = ST->getAlignment();
   11851     EVT SVT = Value.getOperand(0).getValueType();
   11852     unsigned Align = DAG.getDataLayout().getABITypeAlignment(
   11853         SVT.getTypeForEVT(*DAG.getContext()));
   11854     if (Align <= OrigAlign &&
   11855         ((!LegalOperations && !ST->isVolatile()) ||
   11856          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)))
   11857       return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
   11858                           Ptr, ST->getPointerInfo(), ST->isVolatile(),
   11859                           ST->isNonTemporal(), OrigAlign,
   11860                           ST->getAAInfo());
   11861   }
   11862 
   11863   // Turn 'store undef, Ptr' -> nothing.
   11864   if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed())
   11865     return Chain;
   11866 
   11867   // Try to infer better alignment information than the store already has.
   11868   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
   11869     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   11870       if (Align > ST->getAlignment()) {
   11871         SDValue NewStore =
   11872                DAG.getTruncStore(Chain, SDLoc(N), Value,
   11873                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
   11874                                  ST->isVolatile(), ST->isNonTemporal(), Align,
   11875                                  ST->getAAInfo());
   11876         if (NewStore.getNode() != N)
   11877           return CombineTo(ST, NewStore, true);
   11878       }
   11879     }
   11880   }
   11881 
   11882   // Try transforming a pair floating point load / store ops to integer
   11883   // load / store ops.
   11884   if (SDValue NewST = TransformFPLoadStorePair(N))
   11885     return NewST;
   11886 
   11887   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   11888                                                   : DAG.getSubtarget().useAA();
   11889 #ifndef NDEBUG
   11890   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   11891       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   11892     UseAA = false;
   11893 #endif
   11894   if (UseAA && ST->isUnindexed()) {
   11895     // FIXME: We should do this even without AA enabled. AA will just allow
   11896     // FindBetterChain to work in more situations. The problem with this is that
   11897     // any combine that expects memory operations to be on consecutive chains
   11898     // first needs to be updated to look for users of the same chain.
   11899 
   11900     // Walk up chain skipping non-aliasing memory nodes, on this store and any
   11901     // adjacent stores.
   11902     if (findBetterNeighborChains(ST)) {
   11903       // replaceStoreChain uses CombineTo, which handled all of the worklist
   11904       // manipulation. Return the original node to not do anything else.
   11905       return SDValue(ST, 0);
   11906     }
   11907   }
   11908 
   11909   // Try transforming N to an indexed store.
   11910   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   11911     return SDValue(N, 0);
   11912 
   11913   // FIXME: is there such a thing as a truncating indexed store?
   11914   if (ST->isTruncatingStore() && ST->isUnindexed() &&
   11915       Value.getValueType().isInteger()) {
   11916     // See if we can simplify the input to this truncstore with knowledge that
   11917     // only the low bits are being used.  For example:
   11918     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
   11919     SDValue Shorter =
   11920       GetDemandedBits(Value,
   11921                       APInt::getLowBitsSet(
   11922                         Value.getValueType().getScalarType().getSizeInBits(),
   11923                         ST->getMemoryVT().getScalarType().getSizeInBits()));
   11924     AddToWorklist(Value.getNode());
   11925     if (Shorter.getNode())
   11926       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
   11927                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
   11928 
   11929     // Otherwise, see if we can simplify the operation with
   11930     // SimplifyDemandedBits, which only works if the value has a single use.
   11931     if (SimplifyDemandedBits(Value,
   11932                         APInt::getLowBitsSet(
   11933                           Value.getValueType().getScalarType().getSizeInBits(),
   11934                           ST->getMemoryVT().getScalarType().getSizeInBits())))
   11935       return SDValue(N, 0);
   11936   }
   11937 
   11938   // If this is a load followed by a store to the same location, then the store
   11939   // is dead/noop.
   11940   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
   11941     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
   11942         ST->isUnindexed() && !ST->isVolatile() &&
   11943         // There can't be any side effects between the load and store, such as
   11944         // a call or store.
   11945         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
   11946       // The store is dead, remove it.
   11947       return Chain;
   11948     }
   11949   }
   11950 
   11951   // If this is a store followed by a store with the same value to the same
   11952   // location, then the store is dead/noop.
   11953   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
   11954     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
   11955         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
   11956         ST1->isUnindexed() && !ST1->isVolatile()) {
   11957       // The store is dead, remove it.
   11958       return Chain;
   11959     }
   11960   }
   11961 
   11962   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   11963   // truncating store.  We can do this even if this is already a truncstore.
   11964   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
   11965       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
   11966       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
   11967                             ST->getMemoryVT())) {
   11968     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
   11969                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   11970   }
   11971 
   11972   // Only perform this optimization before the types are legal, because we
   11973   // don't want to perform this optimization on every DAGCombine invocation.
   11974   if (!LegalTypes) {
   11975     bool EverChanged = false;
   11976 
   11977     do {
   11978       // There can be multiple store sequences on the same chain.
   11979       // Keep trying to merge store sequences until we are unable to do so
   11980       // or until we merge the last store on the chain.
   11981       bool Changed = MergeConsecutiveStores(ST);
   11982       EverChanged |= Changed;
   11983       if (!Changed) break;
   11984     } while (ST->getOpcode() != ISD::DELETED_NODE);
   11985 
   11986     if (EverChanged)
   11987       return SDValue(N, 0);
   11988   }
   11989 
   11990   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   11991   //
   11992   // Make sure to do this only after attempting to merge stores in order to
   11993   //  avoid changing the types of some subset of stores due to visit order,
   11994   //  preventing their merging.
   11995   if (isa<ConstantFPSDNode>(Value)) {
   11996     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
   11997       return NewSt;
   11998   }
   11999 
   12000   return ReduceLoadOpStoreWidth(N);
   12001 }
   12002 
   12003 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   12004   SDValue InVec = N->getOperand(0);
   12005   SDValue InVal = N->getOperand(1);
   12006   SDValue EltNo = N->getOperand(2);
   12007   SDLoc dl(N);
   12008 
   12009   // If the inserted element is an UNDEF, just use the input vector.
   12010   if (InVal.getOpcode() == ISD::UNDEF)
   12011     return InVec;
   12012 
   12013   EVT VT = InVec.getValueType();
   12014 
   12015   // If we can't generate a legal BUILD_VECTOR, exit
   12016   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   12017     return SDValue();
   12018 
   12019   // Check that we know which element is being inserted
   12020   if (!isa<ConstantSDNode>(EltNo))
   12021     return SDValue();
   12022   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   12023 
   12024   // Canonicalize insert_vector_elt dag nodes.
   12025   // Example:
   12026   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
   12027   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
   12028   //
   12029   // Do this only if the child insert_vector node has one use; also
   12030   // do this only if indices are both constants and Idx1 < Idx0.
   12031   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
   12032       && isa<ConstantSDNode>(InVec.getOperand(2))) {
   12033     unsigned OtherElt =
   12034       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
   12035     if (Elt < OtherElt) {
   12036       // Swap nodes.
   12037       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
   12038                                   InVec.getOperand(0), InVal, EltNo);
   12039       AddToWorklist(NewOp.getNode());
   12040       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
   12041                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
   12042     }
   12043   }
   12044 
   12045   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   12046   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   12047   // vector elements.
   12048   SmallVector<SDValue, 8> Ops;
   12049   // Do not combine these two vectors if the output vector will not replace
   12050   // the input vector.
   12051   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
   12052     Ops.append(InVec.getNode()->op_begin(),
   12053                InVec.getNode()->op_end());
   12054   } else if (InVec.getOpcode() == ISD::UNDEF) {
   12055     unsigned NElts = VT.getVectorNumElements();
   12056     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
   12057   } else {
   12058     return SDValue();
   12059   }
   12060 
   12061   // Insert the element
   12062   if (Elt < Ops.size()) {
   12063     // All the operands of BUILD_VECTOR must have the same type;
   12064     // we enforce that here.
   12065     EVT OpVT = Ops[0].getValueType();
   12066     if (InVal.getValueType() != OpVT)
   12067       InVal = OpVT.bitsGT(InVal.getValueType()) ?
   12068                 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
   12069                 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
   12070     Ops[Elt] = InVal;
   12071   }
   12072 
   12073   // Return the new vector
   12074   return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops);
   12075 }
   12076 
   12077 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   12078     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
   12079   EVT ResultVT = EVE->getValueType(0);
   12080   EVT VecEltVT = InVecVT.getVectorElementType();
   12081   unsigned Align = OriginalLoad->getAlignment();
   12082   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   12083       VecEltVT.getTypeForEVT(*DAG.getContext()));
   12084 
   12085   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
   12086     return SDValue();
   12087 
   12088   Align = NewAlign;
   12089 
   12090   SDValue NewPtr = OriginalLoad->getBasePtr();
   12091   SDValue Offset;
   12092   EVT PtrType = NewPtr.getValueType();
   12093   MachinePointerInfo MPI;
   12094   SDLoc DL(EVE);
   12095   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
   12096     int Elt = ConstEltNo->getZExtValue();
   12097     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
   12098     Offset = DAG.getConstant(PtrOff, DL, PtrType);
   12099     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   12100   } else {
   12101     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
   12102     Offset = DAG.getNode(
   12103         ISD::MUL, DL, PtrType, Offset,
   12104         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
   12105     MPI = OriginalLoad->getPointerInfo();
   12106   }
   12107   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
   12108 
   12109   // The replacement we need to do here is a little tricky: we need to
   12110   // replace an extractelement of a load with a load.
   12111   // Use ReplaceAllUsesOfValuesWith to do the replacement.
   12112   // Note that this replacement assumes that the extractvalue is the only
   12113   // use of the load; that's okay because we don't want to perform this
   12114   // transformation in other cases anyway.
   12115   SDValue Load;
   12116   SDValue Chain;
   12117   if (ResultVT.bitsGT(VecEltVT)) {
   12118     // If the result type of vextract is wider than the load, then issue an
   12119     // extending load instead.
   12120     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
   12121                                                   VecEltVT)
   12122                                    ? ISD::ZEXTLOAD
   12123                                    : ISD::EXTLOAD;
   12124     Load = DAG.getExtLoad(
   12125         ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
   12126         VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
   12127         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
   12128     Chain = Load.getValue(1);
   12129   } else {
   12130     Load = DAG.getLoad(
   12131         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
   12132         OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
   12133         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
   12134     Chain = Load.getValue(1);
   12135     if (ResultVT.bitsLT(VecEltVT))
   12136       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
   12137     else
   12138       Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load);
   12139   }
   12140   WorklistRemover DeadNodes(*this);
   12141   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
   12142   SDValue To[] = { Load, Chain };
   12143   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
   12144   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   12145   // worklist explicitly as well.
   12146   AddToWorklist(Load.getNode());
   12147   AddUsersToWorklist(Load.getNode()); // Add users too
   12148   // Make sure to revisit this node to clean it up; it will usually be dead.
   12149   AddToWorklist(EVE);
   12150   ++OpsNarrowed;
   12151   return SDValue(EVE, 0);
   12152 }
   12153 
   12154 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   12155   // (vextract (scalar_to_vector val, 0) -> val
   12156   SDValue InVec = N->getOperand(0);
   12157   EVT VT = InVec.getValueType();
   12158   EVT NVT = N->getValueType(0);
   12159 
   12160   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   12161     // Check if the result type doesn't match the inserted element type. A
   12162     // SCALAR_TO_VECTOR may truncate the inserted element and the
   12163     // EXTRACT_VECTOR_ELT may widen the extracted vector.
   12164     SDValue InOp = InVec.getOperand(0);
   12165     if (InOp.getValueType() != NVT) {
   12166       assert(InOp.getValueType().isInteger() && NVT.isInteger());
   12167       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
   12168     }
   12169     return InOp;
   12170   }
   12171 
   12172   SDValue EltNo = N->getOperand(1);
   12173   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
   12174 
   12175   // extract_vector_elt (build_vector x, y), 1 -> y
   12176   if (ConstEltNo &&
   12177       InVec.getOpcode() == ISD::BUILD_VECTOR &&
   12178       TLI.isTypeLegal(VT) &&
   12179       (InVec.hasOneUse() ||
   12180        TLI.aggressivelyPreferBuildVectorSources(VT))) {
   12181     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
   12182     EVT InEltVT = Elt.getValueType();
   12183 
   12184     // Sometimes build_vector's scalar input types do not match result type.
   12185     if (NVT == InEltVT)
   12186       return Elt;
   12187 
   12188     // TODO: It may be useful to truncate if free if the build_vector implicitly
   12189     // converts.
   12190   }
   12191 
   12192   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   12193   // We only perform this optimization before the op legalization phase because
   12194   // we may introduce new vector instructions which are not backed by TD
   12195   // patterns. For example on AVX, extracting elements from a wide vector
   12196   // without using extract_subvector. However, if we can find an underlying
   12197   // scalar value, then we can always use that.
   12198   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
   12199     int NumElem = VT.getVectorNumElements();
   12200     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
   12201     // Find the new index to extract from.
   12202     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
   12203 
   12204     // Extracting an undef index is undef.
   12205     if (OrigElt == -1)
   12206       return DAG.getUNDEF(NVT);
   12207 
   12208     // Select the right vector half to extract from.
   12209     SDValue SVInVec;
   12210     if (OrigElt < NumElem) {
   12211       SVInVec = InVec->getOperand(0);
   12212     } else {
   12213       SVInVec = InVec->getOperand(1);
   12214       OrigElt -= NumElem;
   12215     }
   12216 
   12217     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
   12218       SDValue InOp = SVInVec.getOperand(OrigElt);
   12219       if (InOp.getValueType() != NVT) {
   12220         assert(InOp.getValueType().isInteger() && NVT.isInteger());
   12221         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
   12222       }
   12223 
   12224       return InOp;
   12225     }
   12226 
   12227     // FIXME: We should handle recursing on other vector shuffles and
   12228     // scalar_to_vector here as well.
   12229 
   12230     if (!LegalOperations) {
   12231       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   12232       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
   12233                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
   12234     }
   12235   }
   12236 
   12237   bool BCNumEltsChanged = false;
   12238   EVT ExtVT = VT.getVectorElementType();
   12239   EVT LVT = ExtVT;
   12240 
   12241   // If the result of load has to be truncated, then it's not necessarily
   12242   // profitable.
   12243   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
   12244     return SDValue();
   12245 
   12246   if (InVec.getOpcode() == ISD::BITCAST) {
   12247     // Don't duplicate a load with other uses.
   12248     if (!InVec.hasOneUse())
   12249       return SDValue();
   12250 
   12251     EVT BCVT = InVec.getOperand(0).getValueType();
   12252     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
   12253       return SDValue();
   12254     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
   12255       BCNumEltsChanged = true;
   12256     InVec = InVec.getOperand(0);
   12257     ExtVT = BCVT.getVectorElementType();
   12258   }
   12259 
   12260   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
   12261   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
   12262       ISD::isNormalLoad(InVec.getNode()) &&
   12263       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
   12264     SDValue Index = N->getOperand(1);
   12265     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec))
   12266       return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
   12267                                                            OrigLoad);
   12268   }
   12269 
   12270   // Perform only after legalization to ensure build_vector / vector_shuffle
   12271   // optimizations have already been done.
   12272   if (!LegalOperations) return SDValue();
   12273 
   12274   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   12275   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   12276   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
   12277 
   12278   if (ConstEltNo) {
   12279     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   12280 
   12281     LoadSDNode *LN0 = nullptr;
   12282     const ShuffleVectorSDNode *SVN = nullptr;
   12283     if (ISD::isNormalLoad(InVec.getNode())) {
   12284       LN0 = cast<LoadSDNode>(InVec);
   12285     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   12286                InVec.getOperand(0).getValueType() == ExtVT &&
   12287                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
   12288       // Don't duplicate a load with other uses.
   12289       if (!InVec.hasOneUse())
   12290         return SDValue();
   12291 
   12292       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
   12293     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
   12294       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
   12295       // =>
   12296       // (load $addr+1*size)
   12297 
   12298       // Don't duplicate a load with other uses.
   12299       if (!InVec.hasOneUse())
   12300         return SDValue();
   12301 
   12302       // If the bit convert changed the number of elements, it is unsafe
   12303       // to examine the mask.
   12304       if (BCNumEltsChanged)
   12305         return SDValue();
   12306 
   12307       // Select the input vector, guarding against out of range extract vector.
   12308       unsigned NumElems = VT.getVectorNumElements();
   12309       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
   12310       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
   12311 
   12312       if (InVec.getOpcode() == ISD::BITCAST) {
   12313         // Don't duplicate a load with other uses.
   12314         if (!InVec.hasOneUse())
   12315           return SDValue();
   12316 
   12317         InVec = InVec.getOperand(0);
   12318       }
   12319       if (ISD::isNormalLoad(InVec.getNode())) {
   12320         LN0 = cast<LoadSDNode>(InVec);
   12321         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
   12322         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
   12323       }
   12324     }
   12325 
   12326     // Make sure we found a non-volatile load and the extractelement is
   12327     // the only use.
   12328     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
   12329       return SDValue();
   12330 
   12331     // If Idx was -1 above, Elt is going to be -1, so just return undef.
   12332     if (Elt == -1)
   12333       return DAG.getUNDEF(LVT);
   12334 
   12335     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   12336   }
   12337 
   12338   return SDValue();
   12339 }
   12340 
   12341 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
   12342 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   12343   // We perform this optimization post type-legalization because
   12344   // the type-legalizer often scalarizes integer-promoted vectors.
   12345   // Performing this optimization before may create bit-casts which
   12346   // will be type-legalized to complex code sequences.
   12347   // We perform this optimization only before the operation legalizer because we
   12348   // may introduce illegal operations.
   12349   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
   12350     return SDValue();
   12351 
   12352   unsigned NumInScalars = N->getNumOperands();
   12353   SDLoc dl(N);
   12354   EVT VT = N->getValueType(0);
   12355 
   12356   // Check to see if this is a BUILD_VECTOR of a bunch of values
   12357   // which come from any_extend or zero_extend nodes. If so, we can create
   12358   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
   12359   // optimizations. We do not handle sign-extend because we can't fill the sign
   12360   // using shuffles.
   12361   EVT SourceType = MVT::Other;
   12362   bool AllAnyExt = true;
   12363 
   12364   for (unsigned i = 0; i != NumInScalars; ++i) {
   12365     SDValue In = N->getOperand(i);
   12366     // Ignore undef inputs.
   12367     if (In.getOpcode() == ISD::UNDEF) continue;
   12368 
   12369     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
   12370     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
   12371 
   12372     // Abort if the element is not an extension.
   12373     if (!ZeroExt && !AnyExt) {
   12374       SourceType = MVT::Other;
   12375       break;
   12376     }
   12377 
   12378     // The input is a ZeroExt or AnyExt. Check the original type.
   12379     EVT InTy = In.getOperand(0).getValueType();
   12380 
   12381     // Check that all of the widened source types are the same.
   12382     if (SourceType == MVT::Other)
   12383       // First time.
   12384       SourceType = InTy;
   12385     else if (InTy != SourceType) {
   12386       // Multiple income types. Abort.
   12387       SourceType = MVT::Other;
   12388       break;
   12389     }
   12390 
   12391     // Check if all of the extends are ANY_EXTENDs.
   12392     AllAnyExt &= AnyExt;
   12393   }
   12394 
   12395   // In order to have valid types, all of the inputs must be extended from the
   12396   // same source type and all of the inputs must be any or zero extend.
   12397   // Scalar sizes must be a power of two.
   12398   EVT OutScalarTy = VT.getScalarType();
   12399   bool ValidTypes = SourceType != MVT::Other &&
   12400                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
   12401                  isPowerOf2_32(SourceType.getSizeInBits());
   12402 
   12403   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   12404   // turn into a single shuffle instruction.
   12405   if (!ValidTypes)
   12406     return SDValue();
   12407 
   12408   bool isLE = DAG.getDataLayout().isLittleEndian();
   12409   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   12410   assert(ElemRatio > 1 && "Invalid element size ratio");
   12411   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
   12412                                DAG.getConstant(0, SDLoc(N), SourceType);
   12413 
   12414   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   12415   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
   12416 
   12417   // Populate the new build_vector
   12418   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   12419     SDValue Cast = N->getOperand(i);
   12420     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
   12421             Cast.getOpcode() == ISD::ZERO_EXTEND ||
   12422             Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode");
   12423     SDValue In;
   12424     if (Cast.getOpcode() == ISD::UNDEF)
   12425       In = DAG.getUNDEF(SourceType);
   12426     else
   12427       In = Cast->getOperand(0);
   12428     unsigned Index = isLE ? (i * ElemRatio) :
   12429                             (i * ElemRatio + (ElemRatio - 1));
   12430 
   12431     assert(Index < Ops.size() && "Invalid index");
   12432     Ops[Index] = In;
   12433   }
   12434 
   12435   // The type of the new BUILD_VECTOR node.
   12436   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
   12437   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
   12438          "Invalid vector size");
   12439   // Check if the new vector type is legal.
   12440   if (!isTypeLegal(VecVT)) return SDValue();
   12441 
   12442   // Make the new BUILD_VECTOR.
   12443   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops);
   12444 
   12445   // The new BUILD_VECTOR node has the potential to be further optimized.
   12446   AddToWorklist(BV.getNode());
   12447   // Bitcast to the desired type.
   12448   return DAG.getNode(ISD::BITCAST, dl, VT, BV);
   12449 }
   12450 
   12451 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   12452   EVT VT = N->getValueType(0);
   12453 
   12454   unsigned NumInScalars = N->getNumOperands();
   12455   SDLoc dl(N);
   12456 
   12457   EVT SrcVT = MVT::Other;
   12458   unsigned Opcode = ISD::DELETED_NODE;
   12459   unsigned NumDefs = 0;
   12460 
   12461   for (unsigned i = 0; i != NumInScalars; ++i) {
   12462     SDValue In = N->getOperand(i);
   12463     unsigned Opc = In.getOpcode();
   12464 
   12465     if (Opc == ISD::UNDEF)
   12466       continue;
   12467 
   12468     // If all scalar values are floats and converted from integers.
   12469     if (Opcode == ISD::DELETED_NODE &&
   12470         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
   12471       Opcode = Opc;
   12472     }
   12473 
   12474     if (Opc != Opcode)
   12475       return SDValue();
   12476 
   12477     EVT InVT = In.getOperand(0).getValueType();
   12478 
   12479     // If all scalar values are typed differently, bail out. It's chosen to
   12480     // simplify BUILD_VECTOR of integer types.
   12481     if (SrcVT == MVT::Other)
   12482       SrcVT = InVT;
   12483     if (SrcVT != InVT)
   12484       return SDValue();
   12485     NumDefs++;
   12486   }
   12487 
   12488   // If the vector has just one element defined, it's not worth to fold it into
   12489   // a vectorized one.
   12490   if (NumDefs < 2)
   12491     return SDValue();
   12492 
   12493   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
   12494          && "Should only handle conversion from integer to float.");
   12495   assert(SrcVT != MVT::Other && "Cannot determine source type!");
   12496 
   12497   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
   12498 
   12499   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
   12500     return SDValue();
   12501 
   12502   // Just because the floating-point vector type is legal does not necessarily
   12503   // mean that the corresponding integer vector type is.
   12504   if (!isTypeLegal(NVT))
   12505     return SDValue();
   12506 
   12507   SmallVector<SDValue, 8> Opnds;
   12508   for (unsigned i = 0; i != NumInScalars; ++i) {
   12509     SDValue In = N->getOperand(i);
   12510 
   12511     if (In.getOpcode() == ISD::UNDEF)
   12512       Opnds.push_back(DAG.getUNDEF(SrcVT));
   12513     else
   12514       Opnds.push_back(In.getOperand(0));
   12515   }
   12516   SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds);
   12517   AddToWorklist(BV.getNode());
   12518 
   12519   return DAG.getNode(Opcode, dl, VT, BV);
   12520 }
   12521 
   12522 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   12523   unsigned NumInScalars = N->getNumOperands();
   12524   SDLoc dl(N);
   12525   EVT VT = N->getValueType(0);
   12526 
   12527   // A vector built entirely of undefs is undef.
   12528   if (ISD::allOperandsUndef(N))
   12529     return DAG.getUNDEF(VT);
   12530 
   12531   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
   12532     return V;
   12533 
   12534   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
   12535     return V;
   12536 
   12537   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   12538   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   12539   // at most two distinct vectors, turn this into a shuffle node.
   12540 
   12541   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   12542   if (!isTypeLegal(VT))
   12543     return SDValue();
   12544 
   12545   // May only combine to shuffle after legalize if shuffle is legal.
   12546   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
   12547     return SDValue();
   12548 
   12549   SDValue VecIn1, VecIn2;
   12550   bool UsesZeroVector = false;
   12551   for (unsigned i = 0; i != NumInScalars; ++i) {
   12552     SDValue Op = N->getOperand(i);
   12553     // Ignore undef inputs.
   12554     if (Op.getOpcode() == ISD::UNDEF) continue;
   12555 
   12556     // See if we can combine this build_vector into a blend with a zero vector.
   12557     if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
   12558       UsesZeroVector = true;
   12559       continue;
   12560     }
   12561 
   12562     // If this input is something other than a EXTRACT_VECTOR_ELT with a
   12563     // constant index, bail out.
   12564     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
   12565         !isa<ConstantSDNode>(Op.getOperand(1))) {
   12566       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   12567       break;
   12568     }
   12569 
   12570     // We allow up to two distinct input vectors.
   12571     SDValue ExtractedFromVec = Op.getOperand(0);
   12572     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
   12573       continue;
   12574 
   12575     if (!VecIn1.getNode()) {
   12576       VecIn1 = ExtractedFromVec;
   12577     } else if (!VecIn2.getNode() && !UsesZeroVector) {
   12578       VecIn2 = ExtractedFromVec;
   12579     } else {
   12580       // Too many inputs.
   12581       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   12582       break;
   12583     }
   12584   }
   12585 
   12586   // If everything is good, we can make a shuffle operation.
   12587   if (VecIn1.getNode()) {
   12588     unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
   12589     SmallVector<int, 8> Mask;
   12590     for (unsigned i = 0; i != NumInScalars; ++i) {
   12591       unsigned Opcode = N->getOperand(i).getOpcode();
   12592       if (Opcode == ISD::UNDEF) {
   12593         Mask.push_back(-1);
   12594         continue;
   12595       }
   12596 
   12597       // Operands can also be zero.
   12598       if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
   12599         assert(UsesZeroVector &&
   12600                (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
   12601                "Unexpected node found!");
   12602         Mask.push_back(NumInScalars+i);
   12603         continue;
   12604       }
   12605 
   12606       // If extracting from the first vector, just use the index directly.
   12607       SDValue Extract = N->getOperand(i);
   12608       SDValue ExtVal = Extract.getOperand(1);
   12609       unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
   12610       if (Extract.getOperand(0) == VecIn1) {
   12611         Mask.push_back(ExtIndex);
   12612         continue;
   12613       }
   12614 
   12615       // Otherwise, use InIdx + InputVecSize
   12616       Mask.push_back(InNumElements + ExtIndex);
   12617     }
   12618 
   12619     // Avoid introducing illegal shuffles with zero.
   12620     if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
   12621       return SDValue();
   12622 
   12623     // We can't generate a shuffle node with mismatched input and output types.
   12624     // Attempt to transform a single input vector to the correct type.
   12625     if ((VT != VecIn1.getValueType())) {
   12626       // If the input vector type has a different base type to the output
   12627       // vector type, bail out.
   12628       EVT VTElemType = VT.getVectorElementType();
   12629       if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
   12630           (VecIn2.getNode() &&
   12631            (VecIn2.getValueType().getVectorElementType() != VTElemType)))
   12632         return SDValue();
   12633 
   12634       // If the input vector is too small, widen it.
   12635       // We only support widening of vectors which are half the size of the
   12636       // output registers. For example XMM->YMM widening on X86 with AVX.
   12637       EVT VecInT = VecIn1.getValueType();
   12638       if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
   12639         // If we only have one small input, widen it by adding undef values.
   12640         if (!VecIn2.getNode())
   12641           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
   12642                                DAG.getUNDEF(VecIn1.getValueType()));
   12643         else if (VecIn1.getValueType() == VecIn2.getValueType()) {
   12644           // If we have two small inputs of the same type, try to concat them.
   12645           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
   12646           VecIn2 = SDValue(nullptr, 0);
   12647         } else
   12648           return SDValue();
   12649       } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
   12650         // If the input vector is too large, try to split it.
   12651         // We don't support having two input vectors that are too large.
   12652         // If the zero vector was used, we can not split the vector,
   12653         // since we'd need 3 inputs.
   12654         if (UsesZeroVector || VecIn2.getNode())
   12655           return SDValue();
   12656 
   12657         if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
   12658           return SDValue();
   12659 
   12660         // Try to replace VecIn1 with two extract_subvectors
   12661         // No need to update the masks, they should still be correct.
   12662         VecIn2 = DAG.getNode(
   12663             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
   12664             DAG.getConstant(VT.getVectorNumElements(), dl,
   12665                             TLI.getVectorIdxTy(DAG.getDataLayout())));
   12666         VecIn1 = DAG.getNode(
   12667             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
   12668             DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   12669       } else
   12670         return SDValue();
   12671     }
   12672 
   12673     if (UsesZeroVector)
   12674       VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
   12675                                 DAG.getConstantFP(0.0, dl, VT);
   12676     else
   12677       // If VecIn2 is unused then change it to undef.
   12678       VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
   12679 
   12680     // Check that we were able to transform all incoming values to the same
   12681     // type.
   12682     if (VecIn2.getValueType() != VecIn1.getValueType() ||
   12683         VecIn1.getValueType() != VT)
   12684           return SDValue();
   12685 
   12686     // Return the new VECTOR_SHUFFLE node.
   12687     SDValue Ops[2];
   12688     Ops[0] = VecIn1;
   12689     Ops[1] = VecIn2;
   12690     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]);
   12691   }
   12692 
   12693   return SDValue();
   12694 }
   12695 
   12696 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
   12697   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   12698   EVT OpVT = N->getOperand(0).getValueType();
   12699 
   12700   // If the operands are legal vectors, leave them alone.
   12701   if (TLI.isTypeLegal(OpVT))
   12702     return SDValue();
   12703 
   12704   SDLoc DL(N);
   12705   EVT VT = N->getValueType(0);
   12706   SmallVector<SDValue, 8> Ops;
   12707 
   12708   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
   12709   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   12710 
   12711   // Keep track of what we encounter.
   12712   bool AnyInteger = false;
   12713   bool AnyFP = false;
   12714   for (const SDValue &Op : N->ops()) {
   12715     if (ISD::BITCAST == Op.getOpcode() &&
   12716         !Op.getOperand(0).getValueType().isVector())
   12717       Ops.push_back(Op.getOperand(0));
   12718     else if (ISD::UNDEF == Op.getOpcode())
   12719       Ops.push_back(ScalarUndef);
   12720     else
   12721       return SDValue();
   12722 
   12723     // Note whether we encounter an integer or floating point scalar.
   12724     // If it's neither, bail out, it could be something weird like x86mmx.
   12725     EVT LastOpVT = Ops.back().getValueType();
   12726     if (LastOpVT.isFloatingPoint())
   12727       AnyFP = true;
   12728     else if (LastOpVT.isInteger())
   12729       AnyInteger = true;
   12730     else
   12731       return SDValue();
   12732   }
   12733 
   12734   // If any of the operands is a floating point scalar bitcast to a vector,
   12735   // use floating point types throughout, and bitcast everything.
   12736   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
   12737   if (AnyFP) {
   12738     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
   12739     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   12740     if (AnyInteger) {
   12741       for (SDValue &Op : Ops) {
   12742         if (Op.getValueType() == SVT)
   12743           continue;
   12744         if (Op.getOpcode() == ISD::UNDEF)
   12745           Op = ScalarUndef;
   12746         else
   12747           Op = DAG.getNode(ISD::BITCAST, DL, SVT, Op);
   12748       }
   12749     }
   12750   }
   12751 
   12752   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
   12753                                VT.getSizeInBits() / SVT.getSizeInBits());
   12754   return DAG.getNode(ISD::BITCAST, DL, VT,
   12755                      DAG.getNode(ISD::BUILD_VECTOR, DL, VecVT, Ops));
   12756 }
   12757 
   12758 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
   12759 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
   12760 // most two distinct vectors the same size as the result, attempt to turn this
   12761 // into a legal shuffle.
   12762 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   12763   EVT VT = N->getValueType(0);
   12764   EVT OpVT = N->getOperand(0).getValueType();
   12765   int NumElts = VT.getVectorNumElements();
   12766   int NumOpElts = OpVT.getVectorNumElements();
   12767 
   12768   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
   12769   SmallVector<int, 8> Mask;
   12770 
   12771   for (SDValue Op : N->ops()) {
   12772     // Peek through any bitcast.
   12773     while (Op.getOpcode() == ISD::BITCAST)
   12774       Op = Op.getOperand(0);
   12775 
   12776     // UNDEF nodes convert to UNDEF shuffle mask values.
   12777     if (Op.getOpcode() == ISD::UNDEF) {
   12778       Mask.append((unsigned)NumOpElts, -1);
   12779       continue;
   12780     }
   12781 
   12782     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   12783       return SDValue();
   12784 
   12785     // What vector are we extracting the subvector from and at what index?
   12786     SDValue ExtVec = Op.getOperand(0);
   12787 
   12788     // We want the EVT of the original extraction to correctly scale the
   12789     // extraction index.
   12790     EVT ExtVT = ExtVec.getValueType();
   12791 
   12792     // Peek through any bitcast.
   12793     while (ExtVec.getOpcode() == ISD::BITCAST)
   12794       ExtVec = ExtVec.getOperand(0);
   12795 
   12796     // UNDEF nodes convert to UNDEF shuffle mask values.
   12797     if (ExtVec.getOpcode() == ISD::UNDEF) {
   12798       Mask.append((unsigned)NumOpElts, -1);
   12799       continue;
   12800     }
   12801 
   12802     if (!isa<ConstantSDNode>(Op.getOperand(1)))
   12803       return SDValue();
   12804     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   12805 
   12806     // Ensure that we are extracting a subvector from a vector the same
   12807     // size as the result.
   12808     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
   12809       return SDValue();
   12810 
   12811     // Scale the subvector index to account for any bitcast.
   12812     int NumExtElts = ExtVT.getVectorNumElements();
   12813     if (0 == (NumExtElts % NumElts))
   12814       ExtIdx /= (NumExtElts / NumElts);
   12815     else if (0 == (NumElts % NumExtElts))
   12816       ExtIdx *= (NumElts / NumExtElts);
   12817     else
   12818       return SDValue();
   12819 
   12820     // At most we can reference 2 inputs in the final shuffle.
   12821     if (SV0.getOpcode() == ISD::UNDEF || SV0 == ExtVec) {
   12822       SV0 = ExtVec;
   12823       for (int i = 0; i != NumOpElts; ++i)
   12824         Mask.push_back(i + ExtIdx);
   12825     } else if (SV1.getOpcode() == ISD::UNDEF || SV1 == ExtVec) {
   12826       SV1 = ExtVec;
   12827       for (int i = 0; i != NumOpElts; ++i)
   12828         Mask.push_back(i + ExtIdx + NumElts);
   12829     } else {
   12830       return SDValue();
   12831     }
   12832   }
   12833 
   12834   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
   12835     return SDValue();
   12836 
   12837   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
   12838                               DAG.getBitcast(VT, SV1), Mask);
   12839 }
   12840 
   12841 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   12842   // If we only have one input vector, we don't need to do any concatenation.
   12843   if (N->getNumOperands() == 1)
   12844     return N->getOperand(0);
   12845 
   12846   // Check if all of the operands are undefs.
   12847   EVT VT = N->getValueType(0);
   12848   if (ISD::allOperandsUndef(N))
   12849     return DAG.getUNDEF(VT);
   12850 
   12851   // Optimize concat_vectors where all but the first of the vectors are undef.
   12852   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
   12853         return Op.getOpcode() == ISD::UNDEF;
   12854       })) {
   12855     SDValue In = N->getOperand(0);
   12856     assert(In.getValueType().isVector() && "Must concat vectors");
   12857 
   12858     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
   12859     if (In->getOpcode() == ISD::BITCAST &&
   12860         !In->getOperand(0)->getValueType(0).isVector()) {
   12861       SDValue Scalar = In->getOperand(0);
   12862 
   12863       // If the bitcast type isn't legal, it might be a trunc of a legal type;
   12864       // look through the trunc so we can still do the transform:
   12865       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
   12866       if (Scalar->getOpcode() == ISD::TRUNCATE &&
   12867           !TLI.isTypeLegal(Scalar.getValueType()) &&
   12868           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
   12869         Scalar = Scalar->getOperand(0);
   12870 
   12871       EVT SclTy = Scalar->getValueType(0);
   12872 
   12873       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
   12874         return SDValue();
   12875 
   12876       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
   12877                                  VT.getSizeInBits() / SclTy.getSizeInBits());
   12878       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
   12879         return SDValue();
   12880 
   12881       SDLoc dl = SDLoc(N);
   12882       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
   12883       return DAG.getNode(ISD::BITCAST, dl, VT, Res);
   12884     }
   12885   }
   12886 
   12887   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
   12888   // We have already tested above for an UNDEF only concatenation.
   12889   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   12890   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
   12891   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
   12892     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
   12893   };
   12894   bool AllBuildVectorsOrUndefs =
   12895       std::all_of(N->op_begin(), N->op_end(), IsBuildVectorOrUndef);
   12896   if (AllBuildVectorsOrUndefs) {
   12897     SmallVector<SDValue, 8> Opnds;
   12898     EVT SVT = VT.getScalarType();
   12899 
   12900     EVT MinVT = SVT;
   12901     if (!SVT.isFloatingPoint()) {
   12902       // If BUILD_VECTOR are from built from integer, they may have different
   12903       // operand types. Get the smallest type and truncate all operands to it.
   12904       bool FoundMinVT = false;
   12905       for (const SDValue &Op : N->ops())
   12906         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   12907           EVT OpSVT = Op.getOperand(0)->getValueType(0);
   12908           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
   12909           FoundMinVT = true;
   12910         }
   12911       assert(FoundMinVT && "Concat vector type mismatch");
   12912     }
   12913 
   12914     for (const SDValue &Op : N->ops()) {
   12915       EVT OpVT = Op.getValueType();
   12916       unsigned NumElts = OpVT.getVectorNumElements();
   12917 
   12918       if (ISD::UNDEF == Op.getOpcode())
   12919         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
   12920 
   12921       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   12922         if (SVT.isFloatingPoint()) {
   12923           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
   12924           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
   12925         } else {
   12926           for (unsigned i = 0; i != NumElts; ++i)
   12927             Opnds.push_back(
   12928                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
   12929         }
   12930       }
   12931     }
   12932 
   12933     assert(VT.getVectorNumElements() == Opnds.size() &&
   12934            "Concat vector type mismatch");
   12935     return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds);
   12936   }
   12937 
   12938   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
   12939   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
   12940     return V;
   12941 
   12942   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
   12943   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   12944     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
   12945       return V;
   12946 
   12947   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   12948   // nodes often generate nop CONCAT_VECTOR nodes.
   12949   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
   12950   // place the incoming vectors at the exact same location.
   12951   SDValue SingleSource = SDValue();
   12952   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
   12953 
   12954   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   12955     SDValue Op = N->getOperand(i);
   12956 
   12957     if (Op.getOpcode() == ISD::UNDEF)
   12958       continue;
   12959 
   12960     // Check if this is the identity extract:
   12961     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   12962       return SDValue();
   12963 
   12964     // Find the single incoming vector for the extract_subvector.
   12965     if (SingleSource.getNode()) {
   12966       if (Op.getOperand(0) != SingleSource)
   12967         return SDValue();
   12968     } else {
   12969       SingleSource = Op.getOperand(0);
   12970 
   12971       // Check the source type is the same as the type of the result.
   12972       // If not, this concat may extend the vector, so we can not
   12973       // optimize it away.
   12974       if (SingleSource.getValueType() != N->getValueType(0))
   12975         return SDValue();
   12976     }
   12977 
   12978     unsigned IdentityIndex = i * PartNumElem;
   12979     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   12980     // The extract index must be constant.
   12981     if (!CS)
   12982       return SDValue();
   12983 
   12984     // Check that we are reading from the identity index.
   12985     if (CS->getZExtValue() != IdentityIndex)
   12986       return SDValue();
   12987   }
   12988 
   12989   if (SingleSource.getNode())
   12990     return SingleSource;
   12991 
   12992   return SDValue();
   12993 }
   12994 
   12995 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   12996   EVT NVT = N->getValueType(0);
   12997   SDValue V = N->getOperand(0);
   12998 
   12999   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
   13000     // Combine:
   13001     //    (extract_subvec (concat V1, V2, ...), i)
   13002     // Into:
   13003     //    Vi if possible
   13004     // Only operand 0 is checked as 'concat' assumes all inputs of the same
   13005     // type.
   13006     if (V->getOperand(0).getValueType() != NVT)
   13007       return SDValue();
   13008     unsigned Idx = N->getConstantOperandVal(1);
   13009     unsigned NumElems = NVT.getVectorNumElements();
   13010     assert((Idx % NumElems) == 0 &&
   13011            "IDX in concat is not a multiple of the result vector length.");
   13012     return V->getOperand(Idx / NumElems);
   13013   }
   13014 
   13015   // Skip bitcasting
   13016   if (V->getOpcode() == ISD::BITCAST)
   13017     V = V.getOperand(0);
   13018 
   13019   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
   13020     SDLoc dl(N);
   13021     // Handle only simple case where vector being inserted and vector
   13022     // being extracted are of same type, and are half size of larger vectors.
   13023     EVT BigVT = V->getOperand(0).getValueType();
   13024     EVT SmallVT = V->getOperand(1).getValueType();
   13025     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
   13026       return SDValue();
   13027 
   13028     // Only handle cases where both indexes are constants with the same type.
   13029     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
   13030     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
   13031 
   13032     if (InsIdx && ExtIdx &&
   13033         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
   13034         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
   13035       // Combine:
   13036       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
   13037       // Into:
   13038       //    indices are equal or bit offsets are equal => V1
   13039       //    otherwise => (extract_subvec V1, ExtIdx)
   13040       if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
   13041           ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
   13042         return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1));
   13043       return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT,
   13044                          DAG.getNode(ISD::BITCAST, dl,
   13045                                      N->getOperand(0).getValueType(),
   13046                                      V->getOperand(0)), N->getOperand(1));
   13047     }
   13048   }
   13049 
   13050   return SDValue();
   13051 }
   13052 
   13053 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
   13054                                                  SDValue V, SelectionDAG &DAG) {
   13055   SDLoc DL(V);
   13056   EVT VT = V.getValueType();
   13057 
   13058   switch (V.getOpcode()) {
   13059   default:
   13060     return V;
   13061 
   13062   case ISD::CONCAT_VECTORS: {
   13063     EVT OpVT = V->getOperand(0).getValueType();
   13064     int OpSize = OpVT.getVectorNumElements();
   13065     SmallBitVector OpUsedElements(OpSize, false);
   13066     bool FoundSimplification = false;
   13067     SmallVector<SDValue, 4> NewOps;
   13068     NewOps.reserve(V->getNumOperands());
   13069     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
   13070       SDValue Op = V->getOperand(i);
   13071       bool OpUsed = false;
   13072       for (int j = 0; j < OpSize; ++j)
   13073         if (UsedElements[i * OpSize + j]) {
   13074           OpUsedElements[j] = true;
   13075           OpUsed = true;
   13076         }
   13077       NewOps.push_back(
   13078           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
   13079                  : DAG.getUNDEF(OpVT));
   13080       FoundSimplification |= Op == NewOps.back();
   13081       OpUsedElements.reset();
   13082     }
   13083     if (FoundSimplification)
   13084       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
   13085     return V;
   13086   }
   13087 
   13088   case ISD::INSERT_SUBVECTOR: {
   13089     SDValue BaseV = V->getOperand(0);
   13090     SDValue SubV = V->getOperand(1);
   13091     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
   13092     if (!IdxN)
   13093       return V;
   13094 
   13095     int SubSize = SubV.getValueType().getVectorNumElements();
   13096     int Idx = IdxN->getZExtValue();
   13097     bool SubVectorUsed = false;
   13098     SmallBitVector SubUsedElements(SubSize, false);
   13099     for (int i = 0; i < SubSize; ++i)
   13100       if (UsedElements[i + Idx]) {
   13101         SubVectorUsed = true;
   13102         SubUsedElements[i] = true;
   13103         UsedElements[i + Idx] = false;
   13104       }
   13105 
   13106     // Now recurse on both the base and sub vectors.
   13107     SDValue SimplifiedSubV =
   13108         SubVectorUsed
   13109             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
   13110             : DAG.getUNDEF(SubV.getValueType());
   13111     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
   13112     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
   13113       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
   13114                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
   13115     return V;
   13116   }
   13117   }
   13118 }
   13119 
   13120 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
   13121                                        SDValue N1, SelectionDAG &DAG) {
   13122   EVT VT = SVN->getValueType(0);
   13123   int NumElts = VT.getVectorNumElements();
   13124   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
   13125   for (int M : SVN->getMask())
   13126     if (M >= 0 && M < NumElts)
   13127       N0UsedElements[M] = true;
   13128     else if (M >= NumElts)
   13129       N1UsedElements[M - NumElts] = true;
   13130 
   13131   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
   13132   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
   13133   if (S0 == N0 && S1 == N1)
   13134     return SDValue();
   13135 
   13136   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
   13137 }
   13138 
   13139 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
   13140 // or turn a shuffle of a single concat into simpler shuffle then concat.
   13141 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   13142   EVT VT = N->getValueType(0);
   13143   unsigned NumElts = VT.getVectorNumElements();
   13144 
   13145   SDValue N0 = N->getOperand(0);
   13146   SDValue N1 = N->getOperand(1);
   13147   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   13148 
   13149   SmallVector<SDValue, 4> Ops;
   13150   EVT ConcatVT = N0.getOperand(0).getValueType();
   13151   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   13152   unsigned NumConcats = NumElts / NumElemsPerConcat;
   13153 
   13154   // Special case: shuffle(concat(A,B)) can be more efficiently represented
   13155   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
   13156   // half vector elements.
   13157   if (NumElemsPerConcat * 2 == NumElts && N1.getOpcode() == ISD::UNDEF &&
   13158       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
   13159                   SVN->getMask().end(), [](int i) { return i == -1; })) {
   13160     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   13161                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
   13162     N1 = DAG.getUNDEF(ConcatVT);
   13163     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   13164   }
   13165 
   13166   // Look at every vector that's inserted. We're looking for exact
   13167   // subvector-sized copies from a concatenated vector
   13168   for (unsigned I = 0; I != NumConcats; ++I) {
   13169     // Make sure we're dealing with a copy.
   13170     unsigned Begin = I * NumElemsPerConcat;
   13171     bool AllUndef = true, NoUndef = true;
   13172     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
   13173       if (SVN->getMaskElt(J) >= 0)
   13174         AllUndef = false;
   13175       else
   13176         NoUndef = false;
   13177     }
   13178 
   13179     if (NoUndef) {
   13180       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
   13181         return SDValue();
   13182 
   13183       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
   13184         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
   13185           return SDValue();
   13186 
   13187       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
   13188       if (FirstElt < N0.getNumOperands())
   13189         Ops.push_back(N0.getOperand(FirstElt));
   13190       else
   13191         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   13192 
   13193     } else if (AllUndef) {
   13194       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
   13195     } else { // Mixed with general masks and undefs, can't do optimization.
   13196       return SDValue();
   13197     }
   13198   }
   13199 
   13200   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   13201 }
   13202 
   13203 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   13204   EVT VT = N->getValueType(0);
   13205   unsigned NumElts = VT.getVectorNumElements();
   13206 
   13207   SDValue N0 = N->getOperand(0);
   13208   SDValue N1 = N->getOperand(1);
   13209 
   13210   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
   13211 
   13212   // Canonicalize shuffle undef, undef -> undef
   13213   if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF)
   13214     return DAG.getUNDEF(VT);
   13215 
   13216   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   13217 
   13218   // Canonicalize shuffle v, v -> v, undef
   13219   if (N0 == N1) {
   13220     SmallVector<int, 8> NewMask;
   13221     for (unsigned i = 0; i != NumElts; ++i) {
   13222       int Idx = SVN->getMaskElt(i);
   13223       if (Idx >= (int)NumElts) Idx -= NumElts;
   13224       NewMask.push_back(Idx);
   13225     }
   13226     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT),
   13227                                 &NewMask[0]);
   13228   }
   13229 
   13230   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   13231   if (N0.getOpcode() == ISD::UNDEF) {
   13232     SmallVector<int, 8> NewMask;
   13233     for (unsigned i = 0; i != NumElts; ++i) {
   13234       int Idx = SVN->getMaskElt(i);
   13235       if (Idx >= 0) {
   13236         if (Idx >= (int)NumElts)
   13237           Idx -= NumElts;
   13238         else
   13239           Idx = -1; // remove reference to lhs
   13240       }
   13241       NewMask.push_back(Idx);
   13242     }
   13243     return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT),
   13244                                 &NewMask[0]);
   13245   }
   13246 
   13247   // Remove references to rhs if it is undef
   13248   if (N1.getOpcode() == ISD::UNDEF) {
   13249     bool Changed = false;
   13250     SmallVector<int, 8> NewMask;
   13251     for (unsigned i = 0; i != NumElts; ++i) {
   13252       int Idx = SVN->getMaskElt(i);
   13253       if (Idx >= (int)NumElts) {
   13254         Idx = -1;
   13255         Changed = true;
   13256       }
   13257       NewMask.push_back(Idx);
   13258     }
   13259     if (Changed)
   13260       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]);
   13261   }
   13262 
   13263   // If it is a splat, check if the argument vector is another splat or a
   13264   // build_vector.
   13265   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
   13266     SDNode *V = N0.getNode();
   13267 
   13268     // If this is a bit convert that changes the element type of the vector but
   13269     // not the number of vector elements, look through it.  Be careful not to
   13270     // look though conversions that change things like v4f32 to v2f64.
   13271     if (V->getOpcode() == ISD::BITCAST) {
   13272       SDValue ConvInput = V->getOperand(0);
   13273       if (ConvInput.getValueType().isVector() &&
   13274           ConvInput.getValueType().getVectorNumElements() == NumElts)
   13275         V = ConvInput.getNode();
   13276     }
   13277 
   13278     if (V->getOpcode() == ISD::BUILD_VECTOR) {
   13279       assert(V->getNumOperands() == NumElts &&
   13280              "BUILD_VECTOR has wrong number of operands");
   13281       SDValue Base;
   13282       bool AllSame = true;
   13283       for (unsigned i = 0; i != NumElts; ++i) {
   13284         if (V->getOperand(i).getOpcode() != ISD::UNDEF) {
   13285           Base = V->getOperand(i);
   13286           break;
   13287         }
   13288       }
   13289       // Splat of <u, u, u, u>, return <u, u, u, u>
   13290       if (!Base.getNode())
   13291         return N0;
   13292       for (unsigned i = 0; i != NumElts; ++i) {
   13293         if (V->getOperand(i) != Base) {
   13294           AllSame = false;
   13295           break;
   13296         }
   13297       }
   13298       // Splat of <x, x, x, x>, return <x, x, x, x>
   13299       if (AllSame)
   13300         return N0;
   13301 
   13302       // Canonicalize any other splat as a build_vector.
   13303       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
   13304       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
   13305       SDValue NewBV = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N),
   13306                                   V->getValueType(0), Ops);
   13307 
   13308       // We may have jumped through bitcasts, so the type of the
   13309       // BUILD_VECTOR may not match the type of the shuffle.
   13310       if (V->getValueType(0) != VT)
   13311         NewBV = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, NewBV);
   13312       return NewBV;
   13313     }
   13314   }
   13315 
   13316   // There are various patterns used to build up a vector from smaller vectors,
   13317   // subvectors, or elements. Scan chains of these and replace unused insertions
   13318   // or components with undef.
   13319   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
   13320     return S;
   13321 
   13322   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   13323       Level < AfterLegalizeVectorOps &&
   13324       (N1.getOpcode() == ISD::UNDEF ||
   13325       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   13326        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
   13327     SDValue V = partitionShuffleOfConcats(N, DAG);
   13328 
   13329     if (V.getNode())
   13330       return V;
   13331   }
   13332 
   13333   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   13334   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   13335   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
   13336     SmallVector<SDValue, 8> Ops;
   13337     for (int M : SVN->getMask()) {
   13338       SDValue Op = DAG.getUNDEF(VT.getScalarType());
   13339       if (M >= 0) {
   13340         int Idx = M % NumElts;
   13341         SDValue &S = (M < (int)NumElts ? N0 : N1);
   13342         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
   13343           Op = S.getOperand(Idx);
   13344         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
   13345           if (Idx == 0)
   13346             Op = S.getOperand(0);
   13347         } else {
   13348           // Operand can't be combined - bail out.
   13349           break;
   13350         }
   13351       }
   13352       Ops.push_back(Op);
   13353     }
   13354     if (Ops.size() == VT.getVectorNumElements()) {
   13355       // BUILD_VECTOR requires all inputs to be of the same type, find the
   13356       // maximum type and extend them all.
   13357       EVT SVT = VT.getScalarType();
   13358       if (SVT.isInteger())
   13359         for (SDValue &Op : Ops)
   13360           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
   13361       if (SVT != VT.getScalarType())
   13362         for (SDValue &Op : Ops)
   13363           Op = TLI.isZExtFree(Op.getValueType(), SVT)
   13364                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
   13365                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
   13366       return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Ops);
   13367     }
   13368   }
   13369 
   13370   // If this shuffle only has a single input that is a bitcasted shuffle,
   13371   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   13372   // back to their original types.
   13373   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   13374       N1.getOpcode() == ISD::UNDEF && Level < AfterLegalizeVectorOps &&
   13375       TLI.isTypeLegal(VT)) {
   13376 
   13377     // Peek through the bitcast only if there is one user.
   13378     SDValue BC0 = N0;
   13379     while (BC0.getOpcode() == ISD::BITCAST) {
   13380       if (!BC0.hasOneUse())
   13381         break;
   13382       BC0 = BC0.getOperand(0);
   13383     }
   13384 
   13385     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
   13386       if (Scale == 1)
   13387         return SmallVector<int, 8>(Mask.begin(), Mask.end());
   13388 
   13389       SmallVector<int, 8> NewMask;
   13390       for (int M : Mask)
   13391         for (int s = 0; s != Scale; ++s)
   13392           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
   13393       return NewMask;
   13394     };
   13395 
   13396     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
   13397       EVT SVT = VT.getScalarType();
   13398       EVT InnerVT = BC0->getValueType(0);
   13399       EVT InnerSVT = InnerVT.getScalarType();
   13400 
   13401       // Determine which shuffle works with the smaller scalar type.
   13402       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
   13403       EVT ScaleSVT = ScaleVT.getScalarType();
   13404 
   13405       if (TLI.isTypeLegal(ScaleVT) &&
   13406           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
   13407           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
   13408 
   13409         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   13410         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   13411 
   13412         // Scale the shuffle masks to the smaller scalar type.
   13413         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
   13414         SmallVector<int, 8> InnerMask =
   13415             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
   13416         SmallVector<int, 8> OuterMask =
   13417             ScaleShuffleMask(SVN->getMask(), OuterScale);
   13418 
   13419         // Merge the shuffle masks.
   13420         SmallVector<int, 8> NewMask;
   13421         for (int M : OuterMask)
   13422           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
   13423 
   13424         // Test for shuffle mask legality over both commutations.
   13425         SDValue SV0 = BC0->getOperand(0);
   13426         SDValue SV1 = BC0->getOperand(1);
   13427         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   13428         if (!LegalMask) {
   13429           std::swap(SV0, SV1);
   13430           ShuffleVectorSDNode::commuteMask(NewMask);
   13431           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   13432         }
   13433 
   13434         if (LegalMask) {
   13435           SV0 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV0);
   13436           SV1 = DAG.getNode(ISD::BITCAST, SDLoc(N), ScaleVT, SV1);
   13437           return DAG.getNode(
   13438               ISD::BITCAST, SDLoc(N), VT,
   13439               DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
   13440         }
   13441       }
   13442     }
   13443   }
   13444 
   13445   // Canonicalize shuffles according to rules:
   13446   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   13447   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
   13448   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
   13449   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
   13450       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
   13451       TLI.isTypeLegal(VT)) {
   13452     // The incoming shuffle must be of the same type as the result of the
   13453     // current shuffle.
   13454     assert(N1->getOperand(0).getValueType() == VT &&
   13455            "Shuffle types don't match");
   13456 
   13457     SDValue SV0 = N1->getOperand(0);
   13458     SDValue SV1 = N1->getOperand(1);
   13459     bool HasSameOp0 = N0 == SV0;
   13460     bool IsSV1Undef = SV1.getOpcode() == ISD::UNDEF;
   13461     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
   13462       // Commute the operands of this shuffle so that next rule
   13463       // will trigger.
   13464       return DAG.getCommutedVectorShuffle(*SVN);
   13465   }
   13466 
   13467   // Try to fold according to rules:
   13468   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   13469   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   13470   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   13471   // Don't try to fold shuffles with illegal type.
   13472   // Only fold if this shuffle is the only user of the other shuffle.
   13473   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
   13474       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
   13475     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
   13476 
   13477     // The incoming shuffle must be of the same type as the result of the
   13478     // current shuffle.
   13479     assert(OtherSV->getOperand(0).getValueType() == VT &&
   13480            "Shuffle types don't match");
   13481 
   13482     SDValue SV0, SV1;
   13483     SmallVector<int, 4> Mask;
   13484     // Compute the combined shuffle mask for a shuffle with SV0 as the first
   13485     // operand, and SV1 as the second operand.
   13486     for (unsigned i = 0; i != NumElts; ++i) {
   13487       int Idx = SVN->getMaskElt(i);
   13488       if (Idx < 0) {
   13489         // Propagate Undef.
   13490         Mask.push_back(Idx);
   13491         continue;
   13492       }
   13493 
   13494       SDValue CurrentVec;
   13495       if (Idx < (int)NumElts) {
   13496         // This shuffle index refers to the inner shuffle N0. Lookup the inner
   13497         // shuffle mask to identify which vector is actually referenced.
   13498         Idx = OtherSV->getMaskElt(Idx);
   13499         if (Idx < 0) {
   13500           // Propagate Undef.
   13501           Mask.push_back(Idx);
   13502           continue;
   13503         }
   13504 
   13505         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
   13506                                            : OtherSV->getOperand(1);
   13507       } else {
   13508         // This shuffle index references an element within N1.
   13509         CurrentVec = N1;
   13510       }
   13511 
   13512       // Simple case where 'CurrentVec' is UNDEF.
   13513       if (CurrentVec.getOpcode() == ISD::UNDEF) {
   13514         Mask.push_back(-1);
   13515         continue;
   13516       }
   13517 
   13518       // Canonicalize the shuffle index. We don't know yet if CurrentVec
   13519       // will be the first or second operand of the combined shuffle.
   13520       Idx = Idx % NumElts;
   13521       if (!SV0.getNode() || SV0 == CurrentVec) {
   13522         // Ok. CurrentVec is the left hand side.
   13523         // Update the mask accordingly.
   13524         SV0 = CurrentVec;
   13525         Mask.push_back(Idx);
   13526         continue;
   13527       }
   13528 
   13529       // Bail out if we cannot convert the shuffle pair into a single shuffle.
   13530       if (SV1.getNode() && SV1 != CurrentVec)
   13531         return SDValue();
   13532 
   13533       // Ok. CurrentVec is the right hand side.
   13534       // Update the mask accordingly.
   13535       SV1 = CurrentVec;
   13536       Mask.push_back(Idx + NumElts);
   13537     }
   13538 
   13539     // Check if all indices in Mask are Undef. In case, propagate Undef.
   13540     bool isUndefMask = true;
   13541     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
   13542       isUndefMask &= Mask[i] < 0;
   13543 
   13544     if (isUndefMask)
   13545       return DAG.getUNDEF(VT);
   13546 
   13547     if (!SV0.getNode())
   13548       SV0 = DAG.getUNDEF(VT);
   13549     if (!SV1.getNode())
   13550       SV1 = DAG.getUNDEF(VT);
   13551 
   13552     // Avoid introducing shuffles with illegal mask.
   13553     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
   13554       ShuffleVectorSDNode::commuteMask(Mask);
   13555 
   13556       if (!TLI.isShuffleMaskLegal(Mask, VT))
   13557         return SDValue();
   13558 
   13559       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
   13560       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
   13561       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
   13562       std::swap(SV0, SV1);
   13563     }
   13564 
   13565     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   13566     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   13567     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   13568     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, &Mask[0]);
   13569   }
   13570 
   13571   return SDValue();
   13572 }
   13573 
   13574 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
   13575   SDValue InVal = N->getOperand(0);
   13576   EVT VT = N->getValueType(0);
   13577 
   13578   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
   13579   // with a VECTOR_SHUFFLE.
   13580   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   13581     SDValue InVec = InVal->getOperand(0);
   13582     SDValue EltNo = InVal->getOperand(1);
   13583 
   13584     // FIXME: We could support implicit truncation if the shuffle can be
   13585     // scaled to a smaller vector scalar type.
   13586     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
   13587     if (C0 && VT == InVec.getValueType() &&
   13588         VT.getScalarType() == InVal.getValueType()) {
   13589       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
   13590       int Elt = C0->getZExtValue();
   13591       NewMask[0] = Elt;
   13592 
   13593       if (TLI.isShuffleMaskLegal(NewMask, VT))
   13594         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
   13595                                     NewMask);
   13596     }
   13597   }
   13598 
   13599   return SDValue();
   13600 }
   13601 
   13602 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   13603   SDValue N0 = N->getOperand(0);
   13604   SDValue N2 = N->getOperand(2);
   13605 
   13606   // If the input vector is a concatenation, and the insert replaces
   13607   // one of the halves, we can optimize into a single concat_vectors.
   13608   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   13609       N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) {
   13610     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
   13611     EVT VT = N->getValueType(0);
   13612 
   13613     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   13614     // (concat_vectors Z, Y)
   13615     if (InsIdx == 0)
   13616       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
   13617                          N->getOperand(1), N0.getOperand(1));
   13618 
   13619     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   13620     // (concat_vectors X, Z)
   13621     if (InsIdx == VT.getVectorNumElements()/2)
   13622       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT,
   13623                          N0.getOperand(0), N->getOperand(1));
   13624   }
   13625 
   13626   return SDValue();
   13627 }
   13628 
   13629 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
   13630   SDValue N0 = N->getOperand(0);
   13631 
   13632   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
   13633   if (N0->getOpcode() == ISD::FP16_TO_FP)
   13634     return N0->getOperand(0);
   13635 
   13636   return SDValue();
   13637 }
   13638 
   13639 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   13640   SDValue N0 = N->getOperand(0);
   13641 
   13642   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
   13643   if (N0->getOpcode() == ISD::AND) {
   13644     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
   13645     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
   13646       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
   13647                          N0.getOperand(0));
   13648     }
   13649   }
   13650 
   13651   return SDValue();
   13652 }
   13653 
   13654 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
   13655 /// with the destination vector and a zero vector.
   13656 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
   13657 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
   13658 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   13659   EVT VT = N->getValueType(0);
   13660   SDValue LHS = N->getOperand(0);
   13661   SDValue RHS = N->getOperand(1);
   13662   SDLoc dl(N);
   13663 
   13664   // Make sure we're not running after operation legalization where it
   13665   // may have custom lowered the vector shuffles.
   13666   if (LegalOperations)
   13667     return SDValue();
   13668 
   13669   if (N->getOpcode() != ISD::AND)
   13670     return SDValue();
   13671 
   13672   if (RHS.getOpcode() == ISD::BITCAST)
   13673     RHS = RHS.getOperand(0);
   13674 
   13675   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
   13676     return SDValue();
   13677 
   13678   EVT RVT = RHS.getValueType();
   13679   unsigned NumElts = RHS.getNumOperands();
   13680 
   13681   // Attempt to create a valid clear mask, splitting the mask into
   13682   // sub elements and checking to see if each is
   13683   // all zeros or all ones - suitable for shuffle masking.
   13684   auto BuildClearMask = [&](int Split) {
   13685     int NumSubElts = NumElts * Split;
   13686     int NumSubBits = RVT.getScalarSizeInBits() / Split;
   13687 
   13688     SmallVector<int, 8> Indices;
   13689     for (int i = 0; i != NumSubElts; ++i) {
   13690       int EltIdx = i / Split;
   13691       int SubIdx = i % Split;
   13692       SDValue Elt = RHS.getOperand(EltIdx);
   13693       if (Elt.getOpcode() == ISD::UNDEF) {
   13694         Indices.push_back(-1);
   13695         continue;
   13696       }
   13697 
   13698       APInt Bits;
   13699       if (isa<ConstantSDNode>(Elt))
   13700         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
   13701       else if (isa<ConstantFPSDNode>(Elt))
   13702         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
   13703       else
   13704         return SDValue();
   13705 
   13706       // Extract the sub element from the constant bit mask.
   13707       if (DAG.getDataLayout().isBigEndian()) {
   13708         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
   13709       } else {
   13710         Bits = Bits.lshr(SubIdx * NumSubBits);
   13711       }
   13712 
   13713       if (Split > 1)
   13714         Bits = Bits.trunc(NumSubBits);
   13715 
   13716       if (Bits.isAllOnesValue())
   13717         Indices.push_back(i);
   13718       else if (Bits == 0)
   13719         Indices.push_back(i + NumSubElts);
   13720       else
   13721         return SDValue();
   13722     }
   13723 
   13724     // Let's see if the target supports this vector_shuffle.
   13725     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
   13726     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
   13727     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
   13728       return SDValue();
   13729 
   13730     SDValue Zero = DAG.getConstant(0, dl, ClearVT);
   13731     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
   13732                                                    DAG.getBitcast(ClearVT, LHS),
   13733                                                    Zero, &Indices[0]));
   13734   };
   13735 
   13736   // Determine maximum split level (byte level masking).
   13737   int MaxSplit = 1;
   13738   if (RVT.getScalarSizeInBits() % 8 == 0)
   13739     MaxSplit = RVT.getScalarSizeInBits() / 8;
   13740 
   13741   for (int Split = 1; Split <= MaxSplit; ++Split)
   13742     if (RVT.getScalarSizeInBits() % Split == 0)
   13743       if (SDValue S = BuildClearMask(Split))
   13744         return S;
   13745 
   13746   return SDValue();
   13747 }
   13748 
   13749 /// Visit a binary vector operation, like ADD.
   13750 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   13751   assert(N->getValueType(0).isVector() &&
   13752          "SimplifyVBinOp only works on vectors!");
   13753 
   13754   SDValue LHS = N->getOperand(0);
   13755   SDValue RHS = N->getOperand(1);
   13756   SDValue Ops[] = {LHS, RHS};
   13757 
   13758   // See if we can constant fold the vector operation.
   13759   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
   13760           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
   13761     return Fold;
   13762 
   13763   // Try to convert a constant mask AND into a shuffle clear mask.
   13764   if (SDValue Shuffle = XformToShuffleWithZero(N))
   13765     return Shuffle;
   13766 
   13767   // Type legalization might introduce new shuffles in the DAG.
   13768   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   13769   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
   13770   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
   13771       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
   13772       LHS.getOperand(1).getOpcode() == ISD::UNDEF &&
   13773       RHS.getOperand(1).getOpcode() == ISD::UNDEF) {
   13774     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
   13775     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
   13776 
   13777     if (SVN0->getMask().equals(SVN1->getMask())) {
   13778       EVT VT = N->getValueType(0);
   13779       SDValue UndefVector = LHS.getOperand(1);
   13780       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   13781                                      LHS.getOperand(0), RHS.getOperand(0),
   13782                                      N->getFlags());
   13783       AddUsersToWorklist(N);
   13784       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
   13785                                   &SVN0->getMask()[0]);
   13786     }
   13787   }
   13788 
   13789   return SDValue();
   13790 }
   13791 
   13792 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0,
   13793                                     SDValue N1, SDValue N2){
   13794   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
   13795 
   13796   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
   13797                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
   13798 
   13799   // If we got a simplified select_cc node back from SimplifySelectCC, then
   13800   // break it down into a new SETCC node, and a new SELECT node, and then return
   13801   // the SELECT node, since we were called with a SELECT node.
   13802   if (SCC.getNode()) {
   13803     // Check to see if we got a select_cc back (to turn into setcc/select).
   13804     // Otherwise, just return whatever node we got back, like fabs.
   13805     if (SCC.getOpcode() == ISD::SELECT_CC) {
   13806       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
   13807                                   N0.getValueType(),
   13808                                   SCC.getOperand(0), SCC.getOperand(1),
   13809                                   SCC.getOperand(4));
   13810       AddToWorklist(SETCC.getNode());
   13811       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
   13812                            SCC.getOperand(2), SCC.getOperand(3));
   13813     }
   13814 
   13815     return SCC;
   13816   }
   13817   return SDValue();
   13818 }
   13819 
   13820 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
   13821 /// being selected between, see if we can simplify the select.  Callers of this
   13822 /// should assume that TheSelect is deleted if this returns true.  As such, they
   13823 /// should return the appropriate thing (e.g. the node) back to the top-level of
   13824 /// the DAG combiner loop to avoid it being looked at.
   13825 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   13826                                     SDValue RHS) {
   13827 
   13828   // fold (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
   13829   // The select + setcc is redundant, because fsqrt returns NaN for X < -0.
   13830   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
   13831     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
   13832       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
   13833       SDValue Sqrt = RHS;
   13834       ISD::CondCode CC;
   13835       SDValue CmpLHS;
   13836       const ConstantFPSDNode *NegZero = nullptr;
   13837 
   13838       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
   13839         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
   13840         CmpLHS = TheSelect->getOperand(0);
   13841         NegZero = isConstOrConstSplatFP(TheSelect->getOperand(1));
   13842       } else {
   13843         // SELECT or VSELECT
   13844         SDValue Cmp = TheSelect->getOperand(0);
   13845         if (Cmp.getOpcode() == ISD::SETCC) {
   13846           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
   13847           CmpLHS = Cmp.getOperand(0);
   13848           NegZero = isConstOrConstSplatFP(Cmp.getOperand(1));
   13849         }
   13850       }
   13851       if (NegZero && NegZero->isNegative() && NegZero->isZero() &&
   13852           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
   13853           CC == ISD::SETULT || CC == ISD::SETLT)) {
   13854         // We have: (select (setcc x, -0.0, *lt), NaN, (fsqrt x))
   13855         CombineTo(TheSelect, Sqrt);
   13856         return true;
   13857       }
   13858     }
   13859   }
   13860   // Cannot simplify select with vector condition
   13861   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
   13862 
   13863   // If this is a select from two identical things, try to pull the operation
   13864   // through the select.
   13865   if (LHS.getOpcode() != RHS.getOpcode() ||
   13866       !LHS.hasOneUse() || !RHS.hasOneUse())
   13867     return false;
   13868 
   13869   // If this is a load and the token chain is identical, replace the select
   13870   // of two loads with a load through a select of the address to load from.
   13871   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
   13872   // constants have been dropped into the constant pool.
   13873   if (LHS.getOpcode() == ISD::LOAD) {
   13874     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
   13875     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
   13876 
   13877     // Token chains must be identical.
   13878     if (LHS.getOperand(0) != RHS.getOperand(0) ||
   13879         // Do not let this transformation reduce the number of volatile loads.
   13880         LLD->isVolatile() || RLD->isVolatile() ||
   13881         // FIXME: If either is a pre/post inc/dec load,
   13882         // we'd need to split out the address adjustment.
   13883         LLD->isIndexed() || RLD->isIndexed() ||
   13884         // If this is an EXTLOAD, the VT's must match.
   13885         LLD->getMemoryVT() != RLD->getMemoryVT() ||
   13886         // If this is an EXTLOAD, the kind of extension must match.
   13887         (LLD->getExtensionType() != RLD->getExtensionType() &&
   13888          // The only exception is if one of the extensions is anyext.
   13889          LLD->getExtensionType() != ISD::EXTLOAD &&
   13890          RLD->getExtensionType() != ISD::EXTLOAD) ||
   13891         // FIXME: this discards src value information.  This is
   13892         // over-conservative. It would be beneficial to be able to remember
   13893         // both potential memory locations.  Since we are discarding
   13894         // src value info, don't do the transformation if the memory
   13895         // locations are not in the default address space.
   13896         LLD->getPointerInfo().getAddrSpace() != 0 ||
   13897         RLD->getPointerInfo().getAddrSpace() != 0 ||
   13898         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
   13899                                       LLD->getBasePtr().getValueType()))
   13900       return false;
   13901 
   13902     // Check that the select condition doesn't reach either load.  If so,
   13903     // folding this will induce a cycle into the DAG.  If not, this is safe to
   13904     // xform, so create a select of the addresses.
   13905     SDValue Addr;
   13906     if (TheSelect->getOpcode() == ISD::SELECT) {
   13907       SDNode *CondNode = TheSelect->getOperand(0).getNode();
   13908       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
   13909           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
   13910         return false;
   13911       // The loads must not depend on one another.
   13912       if (LLD->isPredecessorOf(RLD) ||
   13913           RLD->isPredecessorOf(LLD))
   13914         return false;
   13915       Addr = DAG.getSelect(SDLoc(TheSelect),
   13916                            LLD->getBasePtr().getValueType(),
   13917                            TheSelect->getOperand(0), LLD->getBasePtr(),
   13918                            RLD->getBasePtr());
   13919     } else {  // Otherwise SELECT_CC
   13920       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
   13921       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
   13922 
   13923       if ((LLD->hasAnyUseOfValue(1) &&
   13924            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
   13925           (RLD->hasAnyUseOfValue(1) &&
   13926            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
   13927         return false;
   13928 
   13929       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
   13930                          LLD->getBasePtr().getValueType(),
   13931                          TheSelect->getOperand(0),
   13932                          TheSelect->getOperand(1),
   13933                          LLD->getBasePtr(), RLD->getBasePtr(),
   13934                          TheSelect->getOperand(4));
   13935     }
   13936 
   13937     SDValue Load;
   13938     // It is safe to replace the two loads if they have different alignments,
   13939     // but the new load must be the minimum (most restrictive) alignment of the
   13940     // inputs.
   13941     bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
   13942     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
   13943     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
   13944       Load = DAG.getLoad(TheSelect->getValueType(0),
   13945                          SDLoc(TheSelect),
   13946                          // FIXME: Discards pointer and AA info.
   13947                          LLD->getChain(), Addr, MachinePointerInfo(),
   13948                          LLD->isVolatile(), LLD->isNonTemporal(),
   13949                          isInvariant, Alignment);
   13950     } else {
   13951       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
   13952                             RLD->getExtensionType() : LLD->getExtensionType(),
   13953                             SDLoc(TheSelect),
   13954                             TheSelect->getValueType(0),
   13955                             // FIXME: Discards pointer and AA info.
   13956                             LLD->getChain(), Addr, MachinePointerInfo(),
   13957                             LLD->getMemoryVT(), LLD->isVolatile(),
   13958                             LLD->isNonTemporal(), isInvariant, Alignment);
   13959     }
   13960 
   13961     // Users of the select now use the result of the load.
   13962     CombineTo(TheSelect, Load);
   13963 
   13964     // Users of the old loads now use the new load's chain.  We know the
   13965     // old-load value is dead now.
   13966     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
   13967     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
   13968     return true;
   13969   }
   13970 
   13971   return false;
   13972 }
   13973 
   13974 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
   13975 /// where 'cond' is the comparison specified by CC.
   13976 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1,
   13977                                       SDValue N2, SDValue N3,
   13978                                       ISD::CondCode CC, bool NotExtCompare) {
   13979   // (x ? y : y) -> y.
   13980   if (N2 == N3) return N2;
   13981 
   13982   EVT VT = N2.getValueType();
   13983   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   13984   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   13985 
   13986   // Determine if the condition we're dealing with is constant
   13987   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   13988                               N0, N1, CC, DL, false);
   13989   if (SCC.getNode()) AddToWorklist(SCC.getNode());
   13990 
   13991   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
   13992     // fold select_cc true, x, y -> x
   13993     // fold select_cc false, x, y -> y
   13994     return !SCCC->isNullValue() ? N2 : N3;
   13995   }
   13996 
   13997   // Check to see if we can simplify the select into an fabs node
   13998   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
   13999     // Allow either -0.0 or 0.0
   14000     if (CFP->isZero()) {
   14001       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
   14002       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
   14003           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
   14004           N2 == N3.getOperand(0))
   14005         return DAG.getNode(ISD::FABS, DL, VT, N0);
   14006 
   14007       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
   14008       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
   14009           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
   14010           N2.getOperand(0) == N3)
   14011         return DAG.getNode(ISD::FABS, DL, VT, N3);
   14012     }
   14013   }
   14014 
   14015   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   14016   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   14017   // in it.  This is a win when the constant is not otherwise available because
   14018   // it replaces two constant pool loads with one.  We only do this if the FP
   14019   // type is known to be legal, because if it isn't, then we are before legalize
   14020   // types an we want the other legalization to happen first (e.g. to avoid
   14021   // messing with soft float) and if the ConstantFP is not legal, because if
   14022   // it is legal, we may not need to store the FP constant in a constant pool.
   14023   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
   14024     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
   14025       if (TLI.isTypeLegal(N2.getValueType()) &&
   14026           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
   14027                TargetLowering::Legal &&
   14028            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
   14029            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
   14030           // If both constants have multiple uses, then we won't need to do an
   14031           // extra load, they are likely around in registers for other users.
   14032           (TV->hasOneUse() || FV->hasOneUse())) {
   14033         Constant *Elts[] = {
   14034           const_cast<ConstantFP*>(FV->getConstantFPValue()),
   14035           const_cast<ConstantFP*>(TV->getConstantFPValue())
   14036         };
   14037         Type *FPTy = Elts[0]->getType();
   14038         const DataLayout &TD = DAG.getDataLayout();
   14039 
   14040         // Create a ConstantArray of the two constants.
   14041         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
   14042         SDValue CPIdx =
   14043             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
   14044                                 TD.getPrefTypeAlignment(FPTy));
   14045         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   14046 
   14047         // Get the offsets to the 0 and 1 element of the array so that we can
   14048         // select between them.
   14049         SDValue Zero = DAG.getIntPtrConstant(0, DL);
   14050         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
   14051         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
   14052 
   14053         SDValue Cond = DAG.getSetCC(DL,
   14054                                     getSetCCResultType(N0.getValueType()),
   14055                                     N0, N1, CC);
   14056         AddToWorklist(Cond.getNode());
   14057         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
   14058                                           Cond, One, Zero);
   14059         AddToWorklist(CstOffset.getNode());
   14060         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
   14061                             CstOffset);
   14062         AddToWorklist(CPIdx.getNode());
   14063         return DAG.getLoad(
   14064             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
   14065             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
   14066             false, false, false, Alignment);
   14067       }
   14068     }
   14069 
   14070   // Check to see if we can perform the "gzip trick", transforming
   14071   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
   14072   if (isNullConstant(N3) && CC == ISD::SETLT &&
   14073       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
   14074        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
   14075     EVT XType = N0.getValueType();
   14076     EVT AType = N2.getValueType();
   14077     if (XType.bitsGE(AType)) {
   14078       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
   14079       // single-bit constant.
   14080       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
   14081         unsigned ShCtV = N2C->getAPIntValue().logBase2();
   14082         ShCtV = XType.getSizeInBits() - ShCtV - 1;
   14083         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
   14084                                        getShiftAmountTy(N0.getValueType()));
   14085         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
   14086                                     XType, N0, ShCt);
   14087         AddToWorklist(Shift.getNode());
   14088 
   14089         if (XType.bitsGT(AType)) {
   14090           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   14091           AddToWorklist(Shift.getNode());
   14092         }
   14093 
   14094         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   14095       }
   14096 
   14097       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
   14098                                   XType, N0,
   14099                                   DAG.getConstant(XType.getSizeInBits() - 1,
   14100                                                   SDLoc(N0),
   14101                                          getShiftAmountTy(N0.getValueType())));
   14102       AddToWorklist(Shift.getNode());
   14103 
   14104       if (XType.bitsGT(AType)) {
   14105         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   14106         AddToWorklist(Shift.getNode());
   14107       }
   14108 
   14109       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   14110     }
   14111   }
   14112 
   14113   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   14114   // where y is has a single bit set.
   14115   // A plaintext description would be, we can turn the SELECT_CC into an AND
   14116   // when the condition can be materialized as an all-ones register.  Any
   14117   // single bit-test can be materialized as an all-ones register with
   14118   // shift-left and shift-right-arith.
   14119   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
   14120       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
   14121     SDValue AndLHS = N0->getOperand(0);
   14122     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   14123     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
   14124       // Shift the tested bit over the sign bit.
   14125       APInt AndMask = ConstAndRHS->getAPIntValue();
   14126       SDValue ShlAmt =
   14127         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
   14128                         getShiftAmountTy(AndLHS.getValueType()));
   14129       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
   14130 
   14131       // Now arithmetic right shift it all the way over, so the result is either
   14132       // all-ones, or zero.
   14133       SDValue ShrAmt =
   14134         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
   14135                         getShiftAmountTy(Shl.getValueType()));
   14136       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
   14137 
   14138       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
   14139     }
   14140   }
   14141 
   14142   // fold select C, 16, 0 -> shl C, 4
   14143   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
   14144       TLI.getBooleanContents(N0.getValueType()) ==
   14145           TargetLowering::ZeroOrOneBooleanContent) {
   14146 
   14147     // If the caller doesn't want us to simplify this into a zext of a compare,
   14148     // don't do it.
   14149     if (NotExtCompare && N2C->isOne())
   14150       return SDValue();
   14151 
   14152     // Get a SetCC of the condition
   14153     // NOTE: Don't create a SETCC if it's not legal on this target.
   14154     if (!LegalOperations ||
   14155         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
   14156       SDValue Temp, SCC;
   14157       // cast from setcc result type to select result type
   14158       if (LegalTypes) {
   14159         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
   14160                             N0, N1, CC);
   14161         if (N2.getValueType().bitsLT(SCC.getValueType()))
   14162           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
   14163                                         N2.getValueType());
   14164         else
   14165           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   14166                              N2.getValueType(), SCC);
   14167       } else {
   14168         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
   14169         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   14170                            N2.getValueType(), SCC);
   14171       }
   14172 
   14173       AddToWorklist(SCC.getNode());
   14174       AddToWorklist(Temp.getNode());
   14175 
   14176       if (N2C->isOne())
   14177         return Temp;
   14178 
   14179       // shl setcc result by log2 n2c
   14180       return DAG.getNode(
   14181           ISD::SHL, DL, N2.getValueType(), Temp,
   14182           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
   14183                           getShiftAmountTy(Temp.getValueType())));
   14184     }
   14185   }
   14186 
   14187   // Check to see if this is an integer abs.
   14188   // select_cc setg[te] X,  0,  X, -X ->
   14189   // select_cc setgt    X, -1,  X, -X ->
   14190   // select_cc setl[te] X,  0, -X,  X ->
   14191   // select_cc setlt    X,  1, -X,  X ->
   14192   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   14193   if (N1C) {
   14194     ConstantSDNode *SubC = nullptr;
   14195     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   14196          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
   14197         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
   14198       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
   14199     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
   14200               (N1C->isOne() && CC == ISD::SETLT)) &&
   14201              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
   14202       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
   14203 
   14204     EVT XType = N0.getValueType();
   14205     if (SubC && SubC->isNullValue() && XType.isInteger()) {
   14206       SDLoc DL(N0);
   14207       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
   14208                                   N0,
   14209                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
   14210                                          getShiftAmountTy(N0.getValueType())));
   14211       SDValue Add = DAG.getNode(ISD::ADD, DL,
   14212                                 XType, N0, Shift);
   14213       AddToWorklist(Shift.getNode());
   14214       AddToWorklist(Add.getNode());
   14215       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
   14216     }
   14217   }
   14218 
   14219   return SDValue();
   14220 }
   14221 
   14222 /// This is a stub for TargetLowering::SimplifySetCC.
   14223 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0,
   14224                                    SDValue N1, ISD::CondCode Cond,
   14225                                    SDLoc DL, bool foldBooleans) {
   14226   TargetLowering::DAGCombinerInfo
   14227     DagCombineInfo(DAG, Level, false, this);
   14228   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
   14229 }
   14230 
   14231 /// Given an ISD::SDIV node expressing a divide by constant, return
   14232 /// a DAG expression to select that will generate the same value by multiplying
   14233 /// by a magic number.
   14234 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   14235 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   14236   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14237   if (!C)
   14238     return SDValue();
   14239 
   14240   // Avoid division by zero.
   14241   if (C->isNullValue())
   14242     return SDValue();
   14243 
   14244   std::vector<SDNode*> Built;
   14245   SDValue S =
   14246       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   14247 
   14248   for (SDNode *N : Built)
   14249     AddToWorklist(N);
   14250   return S;
   14251 }
   14252 
   14253 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
   14254 /// DAG expression that will generate the same value by right shifting.
   14255 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
   14256   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14257   if (!C)
   14258     return SDValue();
   14259 
   14260   // Avoid division by zero.
   14261   if (C->isNullValue())
   14262     return SDValue();
   14263 
   14264   std::vector<SDNode *> Built;
   14265   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
   14266 
   14267   for (SDNode *N : Built)
   14268     AddToWorklist(N);
   14269   return S;
   14270 }
   14271 
   14272 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
   14273 /// expression that will generate the same value by multiplying by a magic
   14274 /// number.
   14275 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   14276 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   14277   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14278   if (!C)
   14279     return SDValue();
   14280 
   14281   // Avoid division by zero.
   14282   if (C->isNullValue())
   14283     return SDValue();
   14284 
   14285   std::vector<SDNode*> Built;
   14286   SDValue S =
   14287       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   14288 
   14289   for (SDNode *N : Built)
   14290     AddToWorklist(N);
   14291   return S;
   14292 }
   14293 
   14294 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
   14295   if (Level >= AfterLegalizeDAG)
   14296     return SDValue();
   14297 
   14298   // Expose the DAG combiner to the target combiner implementations.
   14299   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   14300 
   14301   unsigned Iterations = 0;
   14302   if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
   14303     if (Iterations) {
   14304       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14305       // For the reciprocal, we need to find the zero of the function:
   14306       //   F(X) = A X - 1 [which has a zero at X = 1/A]
   14307       //     =>
   14308       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
   14309       //     does not require additional intermediate precision]
   14310       EVT VT = Op.getValueType();
   14311       SDLoc DL(Op);
   14312       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   14313 
   14314       AddToWorklist(Est.getNode());
   14315 
   14316       // Newton iterations: Est = Est + Est (1 - Arg * Est)
   14317       for (unsigned i = 0; i < Iterations; ++i) {
   14318         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
   14319         AddToWorklist(NewEst.getNode());
   14320 
   14321         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
   14322         AddToWorklist(NewEst.getNode());
   14323 
   14324         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   14325         AddToWorklist(NewEst.getNode());
   14326 
   14327         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
   14328         AddToWorklist(Est.getNode());
   14329       }
   14330     }
   14331     return Est;
   14332   }
   14333 
   14334   return SDValue();
   14335 }
   14336 
   14337 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14338 /// For the reciprocal sqrt, we need to find the zero of the function:
   14339 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   14340 ///     =>
   14341 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
   14342 /// As a result, we precompute A/2 prior to the iteration loop.
   14343 SDValue DAGCombiner::BuildRsqrtNROneConst(SDValue Arg, SDValue Est,
   14344                                           unsigned Iterations,
   14345                                           SDNodeFlags *Flags) {
   14346   EVT VT = Arg.getValueType();
   14347   SDLoc DL(Arg);
   14348   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
   14349 
   14350   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   14351   // this entire sequence requires only one FP constant.
   14352   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
   14353   AddToWorklist(HalfArg.getNode());
   14354 
   14355   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
   14356   AddToWorklist(HalfArg.getNode());
   14357 
   14358   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   14359   for (unsigned i = 0; i < Iterations; ++i) {
   14360     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
   14361     AddToWorklist(NewEst.getNode());
   14362 
   14363     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
   14364     AddToWorklist(NewEst.getNode());
   14365 
   14366     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
   14367     AddToWorklist(NewEst.getNode());
   14368 
   14369     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   14370     AddToWorklist(Est.getNode());
   14371   }
   14372   return Est;
   14373 }
   14374 
   14375 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14376 /// For the reciprocal sqrt, we need to find the zero of the function:
   14377 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   14378 ///     =>
   14379 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
   14380 SDValue DAGCombiner::BuildRsqrtNRTwoConst(SDValue Arg, SDValue Est,
   14381                                           unsigned Iterations,
   14382                                           SDNodeFlags *Flags) {
   14383   EVT VT = Arg.getValueType();
   14384   SDLoc DL(Arg);
   14385   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
   14386   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
   14387 
   14388   // Newton iterations: Est = -0.5 * Est * (-3.0 + Arg * Est * Est)
   14389   for (unsigned i = 0; i < Iterations; ++i) {
   14390     SDValue HalfEst = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
   14391     AddToWorklist(HalfEst.getNode());
   14392 
   14393     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
   14394     AddToWorklist(Est.getNode());
   14395 
   14396     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
   14397     AddToWorklist(Est.getNode());
   14398 
   14399     Est = DAG.getNode(ISD::FADD, DL, VT, Est, MinusThree, Flags);
   14400     AddToWorklist(Est.getNode());
   14401 
   14402     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, HalfEst, Flags);
   14403     AddToWorklist(Est.getNode());
   14404   }
   14405   return Est;
   14406 }
   14407 
   14408 SDValue DAGCombiner::BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
   14409   if (Level >= AfterLegalizeDAG)
   14410     return SDValue();
   14411 
   14412   // Expose the DAG combiner to the target combiner implementations.
   14413   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   14414   unsigned Iterations = 0;
   14415   bool UseOneConstNR = false;
   14416   if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
   14417     AddToWorklist(Est.getNode());
   14418     if (Iterations) {
   14419       Est = UseOneConstNR ?
   14420         BuildRsqrtNROneConst(Op, Est, Iterations, Flags) :
   14421         BuildRsqrtNRTwoConst(Op, Est, Iterations, Flags);
   14422     }
   14423     return Est;
   14424   }
   14425 
   14426   return SDValue();
   14427 }
   14428 
   14429 /// Return true if base is a frame index, which is known not to alias with
   14430 /// anything but itself.  Provides base object and offset as results.
   14431 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
   14432                            const GlobalValue *&GV, const void *&CV) {
   14433   // Assume it is a primitive operation.
   14434   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
   14435 
   14436   // If it's an adding a simple constant then integrate the offset.
   14437   if (Base.getOpcode() == ISD::ADD) {
   14438     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
   14439       Base = Base.getOperand(0);
   14440       Offset += C->getZExtValue();
   14441     }
   14442   }
   14443 
   14444   // Return the underlying GlobalValue, and update the Offset.  Return false
   14445   // for GlobalAddressSDNode since the same GlobalAddress may be represented
   14446   // by multiple nodes with different offsets.
   14447   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
   14448     GV = G->getGlobal();
   14449     Offset += G->getOffset();
   14450     return false;
   14451   }
   14452 
   14453   // Return the underlying Constant value, and update the Offset.  Return false
   14454   // for ConstantSDNodes since the same constant pool entry may be represented
   14455   // by multiple nodes with different offsets.
   14456   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
   14457     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
   14458                                          : (const void *)C->getConstVal();
   14459     Offset += C->getOffset();
   14460     return false;
   14461   }
   14462   // If it's any of the following then it can't alias with anything but itself.
   14463   return isa<FrameIndexSDNode>(Base);
   14464 }
   14465 
   14466 /// Return true if there is any possibility that the two addresses overlap.
   14467 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   14468   // If they are the same then they must be aliases.
   14469   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
   14470 
   14471   // If they are both volatile then they cannot be reordered.
   14472   if (Op0->isVolatile() && Op1->isVolatile()) return true;
   14473 
   14474   // If one operation reads from invariant memory, and the other may store, they
   14475   // cannot alias. These should really be checking the equivalent of mayWrite,
   14476   // but it only matters for memory nodes other than load /store.
   14477   if (Op0->isInvariant() && Op1->writeMem())
   14478     return false;
   14479 
   14480   if (Op1->isInvariant() && Op0->writeMem())
   14481     return false;
   14482 
   14483   // Gather base node and offset information.
   14484   SDValue Base1, Base2;
   14485   int64_t Offset1, Offset2;
   14486   const GlobalValue *GV1, *GV2;
   14487   const void *CV1, *CV2;
   14488   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
   14489                                       Base1, Offset1, GV1, CV1);
   14490   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
   14491                                       Base2, Offset2, GV2, CV2);
   14492 
   14493   // If they have a same base address then check to see if they overlap.
   14494   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
   14495     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   14496              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   14497 
   14498   // It is possible for different frame indices to alias each other, mostly
   14499   // when tail call optimization reuses return address slots for arguments.
   14500   // To catch this case, look up the actual index of frame indices to compute
   14501   // the real alias relationship.
   14502   if (isFrameIndex1 && isFrameIndex2) {
   14503     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   14504     Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
   14505     Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
   14506     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   14507              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   14508   }
   14509 
   14510   // Otherwise, if we know what the bases are, and they aren't identical, then
   14511   // we know they cannot alias.
   14512   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
   14513     return false;
   14514 
   14515   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
   14516   // compared to the size and offset of the access, we may be able to prove they
   14517   // do not alias.  This check is conservative for now to catch cases created by
   14518   // splitting vector types.
   14519   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
   14520       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
   14521       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
   14522        Op1->getMemoryVT().getSizeInBits() >> 3) &&
   14523       (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) {
   14524     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
   14525     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
   14526 
   14527     // There is no overlap between these relatively aligned accesses of similar
   14528     // size, return no alias.
   14529     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
   14530         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
   14531       return false;
   14532   }
   14533 
   14534   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
   14535                    ? CombinerGlobalAA
   14536                    : DAG.getSubtarget().useAA();
   14537 #ifndef NDEBUG
   14538   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   14539       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   14540     UseAA = false;
   14541 #endif
   14542   if (UseAA &&
   14543       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
   14544     // Use alias analysis information.
   14545     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
   14546                                  Op1->getSrcValueOffset());
   14547     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
   14548         Op0->getSrcValueOffset() - MinOffset;
   14549     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
   14550         Op1->getSrcValueOffset() - MinOffset;
   14551     AliasResult AAResult =
   14552         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
   14553                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
   14554                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
   14555                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
   14556     if (AAResult == NoAlias)
   14557       return false;
   14558   }
   14559 
   14560   // Otherwise we have to assume they alias.
   14561   return true;
   14562 }
   14563 
   14564 /// Walk up chain skipping non-aliasing memory nodes,
   14565 /// looking for aliasing nodes and adding them to the Aliases vector.
   14566 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   14567                                    SmallVectorImpl<SDValue> &Aliases) {
   14568   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   14569   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
   14570 
   14571   // Get alias information for node.
   14572   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
   14573 
   14574   // Starting off.
   14575   Chains.push_back(OriginalChain);
   14576   unsigned Depth = 0;
   14577 
   14578   // Look at each chain and determine if it is an alias.  If so, add it to the
   14579   // aliases list.  If not, then continue up the chain looking for the next
   14580   // candidate.
   14581   while (!Chains.empty()) {
   14582     SDValue Chain = Chains.pop_back_val();
   14583 
   14584     // For TokenFactor nodes, look at each operand and only continue up the
   14585     // chain until we reach the depth limit.
   14586     //
   14587     // FIXME: The depth check could be made to return the last non-aliasing
   14588     // chain we found before we hit a tokenfactor rather than the original
   14589     // chain.
   14590     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
   14591       Aliases.clear();
   14592       Aliases.push_back(OriginalChain);
   14593       return;
   14594     }
   14595 
   14596     // Don't bother if we've been before.
   14597     if (!Visited.insert(Chain.getNode()).second)
   14598       continue;
   14599 
   14600     switch (Chain.getOpcode()) {
   14601     case ISD::EntryToken:
   14602       // Entry token is ideal chain operand, but handled in FindBetterChain.
   14603       break;
   14604 
   14605     case ISD::LOAD:
   14606     case ISD::STORE: {
   14607       // Get alias information for Chain.
   14608       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
   14609           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
   14610 
   14611       // If chain is alias then stop here.
   14612       if (!(IsLoad && IsOpLoad) &&
   14613           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
   14614         Aliases.push_back(Chain);
   14615       } else {
   14616         // Look further up the chain.
   14617         Chains.push_back(Chain.getOperand(0));
   14618         ++Depth;
   14619       }
   14620       break;
   14621     }
   14622 
   14623     case ISD::TokenFactor:
   14624       // We have to check each of the operands of the token factor for "small"
   14625       // token factors, so we queue them up.  Adding the operands to the queue
   14626       // (stack) in reverse order maintains the original order and increases the
   14627       // likelihood that getNode will find a matching token factor (CSE.)
   14628       if (Chain.getNumOperands() > 16) {
   14629         Aliases.push_back(Chain);
   14630         break;
   14631       }
   14632       for (unsigned n = Chain.getNumOperands(); n;)
   14633         Chains.push_back(Chain.getOperand(--n));
   14634       ++Depth;
   14635       break;
   14636 
   14637     default:
   14638       // For all other instructions we will just have to take what we can get.
   14639       Aliases.push_back(Chain);
   14640       break;
   14641     }
   14642   }
   14643 
   14644   // We need to be careful here to also search for aliases through the
   14645   // value operand of a store, etc. Consider the following situation:
   14646   //   Token1 = ...
   14647   //   L1 = load Token1, %52
   14648   //   S1 = store Token1, L1, %51
   14649   //   L2 = load Token1, %52+8
   14650   //   S2 = store Token1, L2, %51+8
   14651   //   Token2 = Token(S1, S2)
   14652   //   L3 = load Token2, %53
   14653   //   S3 = store Token2, L3, %52
   14654   //   L4 = load Token2, %53+8
   14655   //   S4 = store Token2, L4, %52+8
   14656   // If we search for aliases of S3 (which loads address %52), and we look
   14657   // only through the chain, then we'll miss the trivial dependence on L1
   14658   // (which also loads from %52). We then might change all loads and
   14659   // stores to use Token1 as their chain operand, which could result in
   14660   // copying %53 into %52 before copying %52 into %51 (which should
   14661   // happen first).
   14662   //
   14663   // The problem is, however, that searching for such data dependencies
   14664   // can become expensive, and the cost is not directly related to the
   14665   // chain depth. Instead, we'll rule out such configurations here by
   14666   // insisting that we've visited all chain users (except for users
   14667   // of the original chain, which is not necessary). When doing this,
   14668   // we need to look through nodes we don't care about (otherwise, things
   14669   // like register copies will interfere with trivial cases).
   14670 
   14671   SmallVector<const SDNode *, 16> Worklist;
   14672   for (const SDNode *N : Visited)
   14673     if (N != OriginalChain.getNode())
   14674       Worklist.push_back(N);
   14675 
   14676   while (!Worklist.empty()) {
   14677     const SDNode *M = Worklist.pop_back_val();
   14678 
   14679     // We have already visited M, and want to make sure we've visited any uses
   14680     // of M that we care about. For uses that we've not visisted, and don't
   14681     // care about, queue them to the worklist.
   14682 
   14683     for (SDNode::use_iterator UI = M->use_begin(),
   14684          UIE = M->use_end(); UI != UIE; ++UI)
   14685       if (UI.getUse().getValueType() == MVT::Other &&
   14686           Visited.insert(*UI).second) {
   14687         if (isa<MemSDNode>(*UI)) {
   14688           // We've not visited this use, and we care about it (it could have an
   14689           // ordering dependency with the original node).
   14690           Aliases.clear();
   14691           Aliases.push_back(OriginalChain);
   14692           return;
   14693         }
   14694 
   14695         // We've not visited this use, but we don't care about it. Mark it as
   14696         // visited and enqueue it to the worklist.
   14697         Worklist.push_back(*UI);
   14698       }
   14699   }
   14700 }
   14701 
   14702 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
   14703 /// (aliasing node.)
   14704 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   14705   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
   14706 
   14707   // Accumulate all the aliases to this node.
   14708   GatherAllAliases(N, OldChain, Aliases);
   14709 
   14710   // If no operands then chain to entry token.
   14711   if (Aliases.size() == 0)
   14712     return DAG.getEntryNode();
   14713 
   14714   // If a single operand then chain to it.  We don't need to revisit it.
   14715   if (Aliases.size() == 1)
   14716     return Aliases[0];
   14717 
   14718   // Construct a custom tailored token factor.
   14719   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
   14720 }
   14721 
   14722 bool DAGCombiner::findBetterNeighborChains(StoreSDNode* St) {
   14723   // This holds the base pointer, index, and the offset in bytes from the base
   14724   // pointer.
   14725   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr());
   14726 
   14727   // We must have a base and an offset.
   14728   if (!BasePtr.Base.getNode())
   14729     return false;
   14730 
   14731   // Do not handle stores to undef base pointers.
   14732   if (BasePtr.Base.getOpcode() == ISD::UNDEF)
   14733     return false;
   14734 
   14735   SmallVector<StoreSDNode *, 8> ChainedStores;
   14736   ChainedStores.push_back(St);
   14737 
   14738   // Walk up the chain and look for nodes with offsets from the same
   14739   // base pointer. Stop when reaching an instruction with a different kind
   14740   // or instruction which has a different base pointer.
   14741   StoreSDNode *Index = St;
   14742   while (Index) {
   14743     // If the chain has more than one use, then we can't reorder the mem ops.
   14744     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   14745       break;
   14746 
   14747     if (Index->isVolatile() || Index->isIndexed())
   14748       break;
   14749 
   14750     // Find the base pointer and offset for this memory node.
   14751     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr());
   14752 
   14753     // Check that the base pointer is the same as the original one.
   14754     if (!Ptr.equalBaseIndex(BasePtr))
   14755       break;
   14756 
   14757     // Find the next memory operand in the chain. If the next operand in the
   14758     // chain is a store then move up and continue the scan with the next
   14759     // memory operand. If the next operand is a load save it and use alias
   14760     // information to check if it interferes with anything.
   14761     SDNode *NextInChain = Index->getChain().getNode();
   14762     while (true) {
   14763       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   14764         // We found a store node. Use it for the next iteration.
   14765         if (STn->isVolatile() || STn->isIndexed()) {
   14766           Index = nullptr;
   14767           break;
   14768         }
   14769         ChainedStores.push_back(STn);
   14770         Index = STn;
   14771         break;
   14772       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   14773         NextInChain = Ldn->getChain().getNode();
   14774         continue;
   14775       } else {
   14776         Index = nullptr;
   14777         break;
   14778       }
   14779     }
   14780   }
   14781 
   14782   bool MadeChange = false;
   14783   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
   14784 
   14785   for (StoreSDNode *ChainedStore : ChainedStores) {
   14786     SDValue Chain = ChainedStore->getChain();
   14787     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
   14788 
   14789     if (Chain != BetterChain) {
   14790       MadeChange = true;
   14791       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
   14792     }
   14793   }
   14794 
   14795   // Do all replacements after finding the replacements to make to avoid making
   14796   // the chains more complicated by introducing new TokenFactors.
   14797   for (auto Replacement : BetterChains)
   14798     replaceStoreChain(Replacement.first, Replacement.second);
   14799 
   14800   return MadeChange;
   14801 }
   14802 
   14803 /// This is the entry point for the file.
   14804 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
   14805                            CodeGenOpt::Level OptLevel) {
   14806   /// This is the main entry point to this class.
   14807   DAGCombiner(*this, AA, OptLevel).Run(Level);
   14808 }
   14809