Home | History | Annotate | Download | only in SelectionDAG
      1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass combines dag nodes to form fewer, simpler DAG nodes.  It can be run
     11 // both before and after the DAG is legalized.
     12 //
     13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is
     14 // primarily intended to handle simplification opportunities that are implicit
     15 // in the LLVM IR and exposed by the various codegen lowering phases.
     16 //
     17 //===----------------------------------------------------------------------===//
     18 
     19 #include "llvm/CodeGen/SelectionDAG.h"
     20 #include "llvm/ADT/SetVector.h"
     21 #include "llvm/ADT/SmallBitVector.h"
     22 #include "llvm/ADT/SmallPtrSet.h"
     23 #include "llvm/ADT/Statistic.h"
     24 #include "llvm/Analysis/AliasAnalysis.h"
     25 #include "llvm/CodeGen/MachineFrameInfo.h"
     26 #include "llvm/CodeGen/MachineFunction.h"
     27 #include "llvm/CodeGen/SelectionDAGTargetInfo.h"
     28 #include "llvm/IR/DataLayout.h"
     29 #include "llvm/IR/DerivedTypes.h"
     30 #include "llvm/IR/Function.h"
     31 #include "llvm/IR/LLVMContext.h"
     32 #include "llvm/Support/CommandLine.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/ErrorHandling.h"
     35 #include "llvm/Support/MathExtras.h"
     36 #include "llvm/Support/raw_ostream.h"
     37 #include "llvm/Target/TargetLowering.h"
     38 #include "llvm/Target/TargetOptions.h"
     39 #include "llvm/Target/TargetRegisterInfo.h"
     40 #include "llvm/Target/TargetSubtargetInfo.h"
     41 #include <algorithm>
     42 using namespace llvm;
     43 
     44 #define DEBUG_TYPE "dagcombine"
     45 
     46 STATISTIC(NodesCombined   , "Number of dag nodes combined");
     47 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created");
     48 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created");
     49 STATISTIC(OpsNarrowed     , "Number of load/op/store narrowed");
     50 STATISTIC(LdStFP2Int      , "Number of fp load/store pairs transformed to int");
     51 STATISTIC(SlicedLoads, "Number of load sliced");
     52 
     53 namespace {
     54   static cl::opt<bool>
     55     CombinerAA("combiner-alias-analysis", cl::Hidden,
     56                cl::desc("Enable DAG combiner alias-analysis heuristics"));
     57 
     58   static cl::opt<bool>
     59     CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden,
     60                cl::desc("Enable DAG combiner's use of IR alias analysis"));
     61 
     62   static cl::opt<bool>
     63     UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true),
     64                cl::desc("Enable DAG combiner's use of TBAA"));
     65 
     66 #ifndef NDEBUG
     67   static cl::opt<std::string>
     68     CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden,
     69                cl::desc("Only use DAG-combiner alias analysis in this"
     70                         " function"));
     71 #endif
     72 
     73   /// Hidden option to stress test load slicing, i.e., when this option
     74   /// is enabled, load slicing bypasses most of its profitability guards.
     75   static cl::opt<bool>
     76   StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden,
     77                     cl::desc("Bypass the profitability model of load "
     78                              "slicing"),
     79                     cl::init(false));
     80 
     81   static cl::opt<bool>
     82     MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true),
     83                       cl::desc("DAG combiner may split indexing from loads"));
     84 
     85 //------------------------------ DAGCombiner ---------------------------------//
     86 
     87   class DAGCombiner {
     88     SelectionDAG &DAG;
     89     const TargetLowering &TLI;
     90     CombineLevel Level;
     91     CodeGenOpt::Level OptLevel;
     92     bool LegalOperations;
     93     bool LegalTypes;
     94     bool ForCodeSize;
     95 
     96     /// \brief Worklist of all of the nodes that need to be simplified.
     97     ///
     98     /// This must behave as a stack -- new nodes to process are pushed onto the
     99     /// back and when processing we pop off of the back.
    100     ///
    101     /// The worklist will not contain duplicates but may contain null entries
    102     /// due to nodes being deleted from the underlying DAG.
    103     SmallVector<SDNode *, 64> Worklist;
    104 
    105     /// \brief Mapping from an SDNode to its position on the worklist.
    106     ///
    107     /// This is used to find and remove nodes from the worklist (by nulling
    108     /// them) when they are deleted from the underlying DAG. It relies on
    109     /// stable indices of nodes within the worklist.
    110     DenseMap<SDNode *, unsigned> WorklistMap;
    111 
    112     /// \brief Set of nodes which have been combined (at least once).
    113     ///
    114     /// This is used to allow us to reliably add any operands of a DAG node
    115     /// which have not yet been combined to the worklist.
    116     SmallPtrSet<SDNode *, 32> CombinedNodes;
    117 
    118     // AA - Used for DAG load/store alias analysis.
    119     AliasAnalysis &AA;
    120 
    121     /// When an instruction is simplified, add all users of the instruction to
    122     /// the work lists because they might get more simplified now.
    123     void AddUsersToWorklist(SDNode *N) {
    124       for (SDNode *Node : N->uses())
    125         AddToWorklist(Node);
    126     }
    127 
    128     /// Call the node-specific routine that folds each particular type of node.
    129     SDValue visit(SDNode *N);
    130 
    131   public:
    132     /// Add to the worklist making sure its instance is at the back (next to be
    133     /// processed.)
    134     void AddToWorklist(SDNode *N) {
    135       // Skip handle nodes as they can't usefully be combined and confuse the
    136       // zero-use deletion strategy.
    137       if (N->getOpcode() == ISD::HANDLENODE)
    138         return;
    139 
    140       if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second)
    141         Worklist.push_back(N);
    142     }
    143 
    144     /// Remove all instances of N from the worklist.
    145     void removeFromWorklist(SDNode *N) {
    146       CombinedNodes.erase(N);
    147 
    148       auto It = WorklistMap.find(N);
    149       if (It == WorklistMap.end())
    150         return; // Not in the worklist.
    151 
    152       // Null out the entry rather than erasing it to avoid a linear operation.
    153       Worklist[It->second] = nullptr;
    154       WorklistMap.erase(It);
    155     }
    156 
    157     void deleteAndRecombine(SDNode *N);
    158     bool recursivelyDeleteUnusedNodes(SDNode *N);
    159 
    160     /// Replaces all uses of the results of one DAG node with new values.
    161     SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    162                       bool AddTo = true);
    163 
    164     /// Replaces all uses of the results of one DAG node with new values.
    165     SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) {
    166       return CombineTo(N, &Res, 1, AddTo);
    167     }
    168 
    169     /// Replaces all uses of the results of one DAG node with new values.
    170     SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1,
    171                       bool AddTo = true) {
    172       SDValue To[] = { Res0, Res1 };
    173       return CombineTo(N, To, 2, AddTo);
    174     }
    175 
    176     void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO);
    177 
    178   private:
    179 
    180     /// Check the specified integer node value to see if it can be simplified or
    181     /// if things it uses can be simplified by bit propagation.
    182     /// If so, return true.
    183     bool SimplifyDemandedBits(SDValue Op) {
    184       unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
    185       APInt Demanded = APInt::getAllOnesValue(BitWidth);
    186       return SimplifyDemandedBits(Op, Demanded);
    187     }
    188 
    189     bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded);
    190 
    191     bool CombineToPreIndexedLoadStore(SDNode *N);
    192     bool CombineToPostIndexedLoadStore(SDNode *N);
    193     SDValue SplitIndexingFromLoad(LoadSDNode *LD);
    194     bool SliceUpLoad(SDNode *N);
    195 
    196     /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed
    197     ///   load.
    198     ///
    199     /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced.
    200     /// \param InVecVT type of the input vector to EVE with bitcasts resolved.
    201     /// \param EltNo index of the vector element to load.
    202     /// \param OriginalLoad load that EVE came from to be replaced.
    203     /// \returns EVE on success SDValue() on failure.
    204     SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
    205         SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad);
    206     void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad);
    207     SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace);
    208     SDValue SExtPromoteOperand(SDValue Op, EVT PVT);
    209     SDValue ZExtPromoteOperand(SDValue Op, EVT PVT);
    210     SDValue PromoteIntBinOp(SDValue Op);
    211     SDValue PromoteIntShiftOp(SDValue Op);
    212     SDValue PromoteExtend(SDValue Op);
    213     bool PromoteLoad(SDValue Op);
    214 
    215     void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc,
    216                          SDValue ExtLoad, const SDLoc &DL,
    217                          ISD::NodeType ExtType);
    218 
    219     /// Call the node-specific routine that knows how to fold each
    220     /// particular type of node. If that doesn't do anything, try the
    221     /// target-specific DAG combines.
    222     SDValue combine(SDNode *N);
    223 
    224     // Visitation implementation - Implement dag node combining for different
    225     // node types.  The semantics are as follows:
    226     // Return Value:
    227     //   SDValue.getNode() == 0 - No change was made
    228     //   SDValue.getNode() == N - N was replaced, is dead and has been handled.
    229     //   otherwise              - N should be replaced by the returned Operand.
    230     //
    231     SDValue visitTokenFactor(SDNode *N);
    232     SDValue visitMERGE_VALUES(SDNode *N);
    233     SDValue visitADD(SDNode *N);
    234     SDValue visitSUB(SDNode *N);
    235     SDValue visitADDC(SDNode *N);
    236     SDValue visitSUBC(SDNode *N);
    237     SDValue visitADDE(SDNode *N);
    238     SDValue visitSUBE(SDNode *N);
    239     SDValue visitMUL(SDNode *N);
    240     SDValue useDivRem(SDNode *N);
    241     SDValue visitSDIV(SDNode *N);
    242     SDValue visitUDIV(SDNode *N);
    243     SDValue visitREM(SDNode *N);
    244     SDValue visitMULHU(SDNode *N);
    245     SDValue visitMULHS(SDNode *N);
    246     SDValue visitSMUL_LOHI(SDNode *N);
    247     SDValue visitUMUL_LOHI(SDNode *N);
    248     SDValue visitSMULO(SDNode *N);
    249     SDValue visitUMULO(SDNode *N);
    250     SDValue visitIMINMAX(SDNode *N);
    251     SDValue visitAND(SDNode *N);
    252     SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference);
    253     SDValue visitOR(SDNode *N);
    254     SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference);
    255     SDValue visitXOR(SDNode *N);
    256     SDValue SimplifyVBinOp(SDNode *N);
    257     SDValue visitSHL(SDNode *N);
    258     SDValue visitSRA(SDNode *N);
    259     SDValue visitSRL(SDNode *N);
    260     SDValue visitRotate(SDNode *N);
    261     SDValue visitBSWAP(SDNode *N);
    262     SDValue visitBITREVERSE(SDNode *N);
    263     SDValue visitCTLZ(SDNode *N);
    264     SDValue visitCTLZ_ZERO_UNDEF(SDNode *N);
    265     SDValue visitCTTZ(SDNode *N);
    266     SDValue visitCTTZ_ZERO_UNDEF(SDNode *N);
    267     SDValue visitCTPOP(SDNode *N);
    268     SDValue visitSELECT(SDNode *N);
    269     SDValue visitVSELECT(SDNode *N);
    270     SDValue visitSELECT_CC(SDNode *N);
    271     SDValue visitSETCC(SDNode *N);
    272     SDValue visitSETCCE(SDNode *N);
    273     SDValue visitSIGN_EXTEND(SDNode *N);
    274     SDValue visitZERO_EXTEND(SDNode *N);
    275     SDValue visitANY_EXTEND(SDNode *N);
    276     SDValue visitSIGN_EXTEND_INREG(SDNode *N);
    277     SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N);
    278     SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N);
    279     SDValue visitTRUNCATE(SDNode *N);
    280     SDValue visitBITCAST(SDNode *N);
    281     SDValue visitBUILD_PAIR(SDNode *N);
    282     SDValue visitFADD(SDNode *N);
    283     SDValue visitFSUB(SDNode *N);
    284     SDValue visitFMUL(SDNode *N);
    285     SDValue visitFMA(SDNode *N);
    286     SDValue visitFDIV(SDNode *N);
    287     SDValue visitFREM(SDNode *N);
    288     SDValue visitFSQRT(SDNode *N);
    289     SDValue visitFCOPYSIGN(SDNode *N);
    290     SDValue visitSINT_TO_FP(SDNode *N);
    291     SDValue visitUINT_TO_FP(SDNode *N);
    292     SDValue visitFP_TO_SINT(SDNode *N);
    293     SDValue visitFP_TO_UINT(SDNode *N);
    294     SDValue visitFP_ROUND(SDNode *N);
    295     SDValue visitFP_ROUND_INREG(SDNode *N);
    296     SDValue visitFP_EXTEND(SDNode *N);
    297     SDValue visitFNEG(SDNode *N);
    298     SDValue visitFABS(SDNode *N);
    299     SDValue visitFCEIL(SDNode *N);
    300     SDValue visitFTRUNC(SDNode *N);
    301     SDValue visitFFLOOR(SDNode *N);
    302     SDValue visitFMINNUM(SDNode *N);
    303     SDValue visitFMAXNUM(SDNode *N);
    304     SDValue visitBRCOND(SDNode *N);
    305     SDValue visitBR_CC(SDNode *N);
    306     SDValue visitLOAD(SDNode *N);
    307 
    308     SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain);
    309     SDValue replaceStoreOfFPConstant(StoreSDNode *ST);
    310 
    311     SDValue visitSTORE(SDNode *N);
    312     SDValue visitINSERT_VECTOR_ELT(SDNode *N);
    313     SDValue visitEXTRACT_VECTOR_ELT(SDNode *N);
    314     SDValue visitBUILD_VECTOR(SDNode *N);
    315     SDValue visitCONCAT_VECTORS(SDNode *N);
    316     SDValue visitEXTRACT_SUBVECTOR(SDNode *N);
    317     SDValue visitVECTOR_SHUFFLE(SDNode *N);
    318     SDValue visitSCALAR_TO_VECTOR(SDNode *N);
    319     SDValue visitINSERT_SUBVECTOR(SDNode *N);
    320     SDValue visitMLOAD(SDNode *N);
    321     SDValue visitMSTORE(SDNode *N);
    322     SDValue visitMGATHER(SDNode *N);
    323     SDValue visitMSCATTER(SDNode *N);
    324     SDValue visitFP_TO_FP16(SDNode *N);
    325     SDValue visitFP16_TO_FP(SDNode *N);
    326 
    327     SDValue visitFADDForFMACombine(SDNode *N);
    328     SDValue visitFSUBForFMACombine(SDNode *N);
    329     SDValue visitFMULForFMACombine(SDNode *N);
    330 
    331     SDValue XformToShuffleWithZero(SDNode *N);
    332     SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS,
    333                            SDValue RHS);
    334 
    335     SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt);
    336 
    337     bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS);
    338     SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N);
    339     SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2);
    340     SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
    341                              SDValue N2, SDValue N3, ISD::CondCode CC,
    342                              bool NotExtCompare = false);
    343     SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond,
    344                           const SDLoc &DL, bool foldBooleans = true);
    345 
    346     bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    347                            SDValue &CC) const;
    348     bool isOneUseSetCC(SDValue N) const;
    349 
    350     SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
    351                                          unsigned HiOp);
    352     SDValue CombineConsecutiveLoads(SDNode *N, EVT VT);
    353     SDValue CombineExtLoad(SDNode *N);
    354     SDValue combineRepeatedFPDivisors(SDNode *N);
    355     SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT);
    356     SDValue BuildSDIV(SDNode *N);
    357     SDValue BuildSDIVPow2(SDNode *N);
    358     SDValue BuildUDIV(SDNode *N);
    359     SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags);
    360     SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags);
    361     SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags);
    362     SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip);
    363     SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations,
    364                                 SDNodeFlags *Flags, bool Reciprocal);
    365     SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations,
    366                                 SDNodeFlags *Flags, bool Reciprocal);
    367     SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
    368                                bool DemandHighBits = true);
    369     SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1);
    370     SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg,
    371                               SDValue InnerPos, SDValue InnerNeg,
    372                               unsigned PosOpcode, unsigned NegOpcode,
    373                               const SDLoc &DL);
    374     SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL);
    375     SDValue ReduceLoadWidth(SDNode *N);
    376     SDValue ReduceLoadOpStoreWidth(SDNode *N);
    377     SDValue TransformFPLoadStorePair(SDNode *N);
    378     SDValue reduceBuildVecExtToExtBuildVec(SDNode *N);
    379     SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N);
    380 
    381     SDValue GetDemandedBits(SDValue V, const APInt &Mask);
    382 
    383     /// Walk up chain skipping non-aliasing memory nodes,
    384     /// looking for aliasing nodes and adding them to the Aliases vector.
    385     void GatherAllAliases(SDNode *N, SDValue OriginalChain,
    386                           SmallVectorImpl<SDValue> &Aliases);
    387 
    388     /// Return true if there is any possibility that the two addresses overlap.
    389     bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const;
    390 
    391     /// Walk up chain skipping non-aliasing memory nodes, looking for a better
    392     /// chain (aliasing node.)
    393     SDValue FindBetterChain(SDNode *N, SDValue Chain);
    394 
    395     /// Try to replace a store and any possibly adjacent stores on
    396     /// consecutive chains with better chains. Return true only if St is
    397     /// replaced.
    398     ///
    399     /// Notice that other chains may still be replaced even if the function
    400     /// returns false.
    401     bool findBetterNeighborChains(StoreSDNode *St);
    402 
    403     /// Match "(X shl/srl V1) & V2" where V2 may not be present.
    404     bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask);
    405 
    406     /// Holds a pointer to an LSBaseSDNode as well as information on where it
    407     /// is located in a sequence of memory operations connected by a chain.
    408     struct MemOpLink {
    409       MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq):
    410       MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { }
    411       // Ptr to the mem node.
    412       LSBaseSDNode *MemNode;
    413       // Offset from the base ptr.
    414       int64_t OffsetFromBase;
    415       // What is the sequence number of this mem node.
    416       // Lowest mem operand in the DAG starts at zero.
    417       unsigned SequenceNum;
    418     };
    419 
    420     /// This is a helper function for visitMUL to check the profitability
    421     /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
    422     /// MulNode is the original multiply, AddNode is (add x, c1),
    423     /// and ConstNode is c2.
    424     bool isMulAddWithConstProfitable(SDNode *MulNode,
    425                                      SDValue &AddNode,
    426                                      SDValue &ConstNode);
    427 
    428     /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a
    429     /// constant build_vector of the stored constant values in Stores.
    430     SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL,
    431                                          ArrayRef<MemOpLink> Stores,
    432                                          SmallVectorImpl<SDValue> &Chains,
    433                                          EVT Ty) const;
    434 
    435     /// This is a helper function for visitAND and visitZERO_EXTEND.  Returns
    436     /// true if the (and (load x) c) pattern matches an extload.  ExtVT returns
    437     /// the type of the loaded value to be extended.  LoadedVT returns the type
    438     /// of the original loaded value.  NarrowLoad returns whether the load would
    439     /// need to be narrowed in order to match.
    440     bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
    441                           EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
    442                           bool &NarrowLoad);
    443 
    444     /// This is a helper function for MergeConsecutiveStores. When the source
    445     /// elements of the consecutive stores are all constants or all extracted
    446     /// vector elements, try to merge them into one larger store.
    447     /// \return True if a merged store was created.
    448     bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes,
    449                                          EVT MemVT, unsigned NumStores,
    450                                          bool IsConstantSrc, bool UseVector);
    451 
    452     /// This is a helper function for MergeConsecutiveStores.
    453     /// Stores that may be merged are placed in StoreNodes.
    454     /// Loads that may alias with those stores are placed in AliasLoadNodes.
    455     void getStoreMergeAndAliasCandidates(
    456         StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
    457         SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes);
    458 
    459     /// Helper function for MergeConsecutiveStores. Checks if
    460     /// Candidate stores have indirect dependency through their
    461     /// operands. \return True if safe to merge
    462     bool checkMergeStoreCandidatesForDependencies(
    463         SmallVectorImpl<MemOpLink> &StoreNodes);
    464 
    465     /// Merge consecutive store operations into a wide store.
    466     /// This optimization uses wide integers or vectors when possible.
    467     /// \return True if some memory operations were changed.
    468     bool MergeConsecutiveStores(StoreSDNode *N);
    469 
    470     /// \brief Try to transform a truncation where C is a constant:
    471     ///     (trunc (and X, C)) -> (and (trunc X), (trunc C))
    472     ///
    473     /// \p N needs to be a truncation and its first operand an AND. Other
    474     /// requirements are checked by the function (e.g. that trunc is
    475     /// single-use) and if missed an empty SDValue is returned.
    476     SDValue distributeTruncateThroughAnd(SDNode *N);
    477 
    478   public:
    479     DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL)
    480         : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes),
    481           OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {
    482       ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize();
    483     }
    484 
    485     /// Runs the dag combiner on all nodes in the work list
    486     void Run(CombineLevel AtLevel);
    487 
    488     SelectionDAG &getDAG() const { return DAG; }
    489 
    490     /// Returns a type large enough to hold any valid shift amount - before type
    491     /// legalization these can be huge.
    492     EVT getShiftAmountTy(EVT LHSTy) {
    493       assert(LHSTy.isInteger() && "Shift amount is not an integer type!");
    494       if (LHSTy.isVector())
    495         return LHSTy;
    496       auto &DL = DAG.getDataLayout();
    497       return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy)
    498                         : TLI.getPointerTy(DL);
    499     }
    500 
    501     /// This method returns true if we are running before type legalization or
    502     /// if the specified VT is legal.
    503     bool isTypeLegal(const EVT &VT) {
    504       if (!LegalTypes) return true;
    505       return TLI.isTypeLegal(VT);
    506     }
    507 
    508     /// Convenience wrapper around TargetLowering::getSetCCResultType
    509     EVT getSetCCResultType(EVT VT) const {
    510       return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT);
    511     }
    512   };
    513 }
    514 
    515 
    516 namespace {
    517 /// This class is a DAGUpdateListener that removes any deleted
    518 /// nodes from the worklist.
    519 class WorklistRemover : public SelectionDAG::DAGUpdateListener {
    520   DAGCombiner &DC;
    521 public:
    522   explicit WorklistRemover(DAGCombiner &dc)
    523     : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {}
    524 
    525   void NodeDeleted(SDNode *N, SDNode *E) override {
    526     DC.removeFromWorklist(N);
    527   }
    528 };
    529 }
    530 
    531 //===----------------------------------------------------------------------===//
    532 //  TargetLowering::DAGCombinerInfo implementation
    533 //===----------------------------------------------------------------------===//
    534 
    535 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) {
    536   ((DAGCombiner*)DC)->AddToWorklist(N);
    537 }
    538 
    539 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) {
    540   ((DAGCombiner*)DC)->removeFromWorklist(N);
    541 }
    542 
    543 SDValue TargetLowering::DAGCombinerInfo::
    544 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) {
    545   return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo);
    546 }
    547 
    548 SDValue TargetLowering::DAGCombinerInfo::
    549 CombineTo(SDNode *N, SDValue Res, bool AddTo) {
    550   return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo);
    551 }
    552 
    553 
    554 SDValue TargetLowering::DAGCombinerInfo::
    555 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) {
    556   return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo);
    557 }
    558 
    559 void TargetLowering::DAGCombinerInfo::
    560 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    561   return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO);
    562 }
    563 
    564 //===----------------------------------------------------------------------===//
    565 // Helper Functions
    566 //===----------------------------------------------------------------------===//
    567 
    568 void DAGCombiner::deleteAndRecombine(SDNode *N) {
    569   removeFromWorklist(N);
    570 
    571   // If the operands of this node are only used by the node, they will now be
    572   // dead. Make sure to re-visit them and recursively delete dead nodes.
    573   for (const SDValue &Op : N->ops())
    574     // For an operand generating multiple values, one of the values may
    575     // become dead allowing further simplification (e.g. split index
    576     // arithmetic from an indexed load).
    577     if (Op->hasOneUse() || Op->getNumValues() > 1)
    578       AddToWorklist(Op.getNode());
    579 
    580   DAG.DeleteNode(N);
    581 }
    582 
    583 /// Return 1 if we can compute the negated form of the specified expression for
    584 /// the same cost as the expression itself, or 2 if we can compute the negated
    585 /// form more cheaply than the expression itself.
    586 static char isNegatibleForFree(SDValue Op, bool LegalOperations,
    587                                const TargetLowering &TLI,
    588                                const TargetOptions *Options,
    589                                unsigned Depth = 0) {
    590   // fneg is removable even if it has multiple uses.
    591   if (Op.getOpcode() == ISD::FNEG) return 2;
    592 
    593   // Don't allow anything with multiple uses.
    594   if (!Op.hasOneUse()) return 0;
    595 
    596   // Don't recurse exponentially.
    597   if (Depth > 6) return 0;
    598 
    599   switch (Op.getOpcode()) {
    600   default: return false;
    601   case ISD::ConstantFP:
    602     // Don't invert constant FP values after legalize.  The negated constant
    603     // isn't necessarily legal.
    604     return LegalOperations ? 0 : 1;
    605   case ISD::FADD:
    606     // FIXME: determine better conditions for this xform.
    607     if (!Options->UnsafeFPMath) return 0;
    608 
    609     // After operation legalization, it might not be legal to create new FSUBs.
    610     if (LegalOperations &&
    611         !TLI.isOperationLegalOrCustom(ISD::FSUB,  Op.getValueType()))
    612       return 0;
    613 
    614     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    615     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    616                                     Options, Depth + 1))
    617       return V;
    618     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    619     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    620                               Depth + 1);
    621   case ISD::FSUB:
    622     // We can't turn -(A-B) into B-A when we honor signed zeros.
    623     if (!Options->UnsafeFPMath) return 0;
    624 
    625     // fold (fneg (fsub A, B)) -> (fsub B, A)
    626     return 1;
    627 
    628   case ISD::FMUL:
    629   case ISD::FDIV:
    630     if (Options->HonorSignDependentRoundingFPMath()) return 0;
    631 
    632     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y))
    633     if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI,
    634                                     Options, Depth + 1))
    635       return V;
    636 
    637     return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options,
    638                               Depth + 1);
    639 
    640   case ISD::FP_EXTEND:
    641   case ISD::FP_ROUND:
    642   case ISD::FSIN:
    643     return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options,
    644                               Depth + 1);
    645   }
    646 }
    647 
    648 /// If isNegatibleForFree returns true, return the newly negated expression.
    649 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG,
    650                                     bool LegalOperations, unsigned Depth = 0) {
    651   const TargetOptions &Options = DAG.getTarget().Options;
    652   // fneg is removable even if it has multiple uses.
    653   if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0);
    654 
    655   // Don't allow anything with multiple uses.
    656   assert(Op.hasOneUse() && "Unknown reuse!");
    657 
    658   assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree");
    659 
    660   const SDNodeFlags *Flags = Op.getNode()->getFlags();
    661 
    662   switch (Op.getOpcode()) {
    663   default: llvm_unreachable("Unknown code");
    664   case ISD::ConstantFP: {
    665     APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF();
    666     V.changeSign();
    667     return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType());
    668   }
    669   case ISD::FADD:
    670     // FIXME: determine better conditions for this xform.
    671     assert(Options.UnsafeFPMath);
    672 
    673     // fold (fneg (fadd A, B)) -> (fsub (fneg A), B)
    674     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    675                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    676       return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    677                          GetNegatedExpression(Op.getOperand(0), DAG,
    678                                               LegalOperations, Depth+1),
    679                          Op.getOperand(1), Flags);
    680     // fold (fneg (fadd A, B)) -> (fsub (fneg B), A)
    681     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    682                        GetNegatedExpression(Op.getOperand(1), DAG,
    683                                             LegalOperations, Depth+1),
    684                        Op.getOperand(0), Flags);
    685   case ISD::FSUB:
    686     // We can't turn -(A-B) into B-A when we honor signed zeros.
    687     assert(Options.UnsafeFPMath);
    688 
    689     // fold (fneg (fsub 0, B)) -> B
    690     if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0)))
    691       if (N0CFP->isZero())
    692         return Op.getOperand(1);
    693 
    694     // fold (fneg (fsub A, B)) -> (fsub B, A)
    695     return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(),
    696                        Op.getOperand(1), Op.getOperand(0), Flags);
    697 
    698   case ISD::FMUL:
    699   case ISD::FDIV:
    700     assert(!Options.HonorSignDependentRoundingFPMath());
    701 
    702     // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y)
    703     if (isNegatibleForFree(Op.getOperand(0), LegalOperations,
    704                            DAG.getTargetLoweringInfo(), &Options, Depth+1))
    705       return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    706                          GetNegatedExpression(Op.getOperand(0), DAG,
    707                                               LegalOperations, Depth+1),
    708                          Op.getOperand(1), Flags);
    709 
    710     // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y))
    711     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    712                        Op.getOperand(0),
    713                        GetNegatedExpression(Op.getOperand(1), DAG,
    714                                             LegalOperations, Depth+1), Flags);
    715 
    716   case ISD::FP_EXTEND:
    717   case ISD::FSIN:
    718     return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(),
    719                        GetNegatedExpression(Op.getOperand(0), DAG,
    720                                             LegalOperations, Depth+1));
    721   case ISD::FP_ROUND:
    722       return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(),
    723                          GetNegatedExpression(Op.getOperand(0), DAG,
    724                                               LegalOperations, Depth+1),
    725                          Op.getOperand(1));
    726   }
    727 }
    728 
    729 // Return true if this node is a setcc, or is a select_cc
    730 // that selects between the target values used for true and false, making it
    731 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to
    732 // the appropriate nodes based on the type of node we are checking. This
    733 // simplifies life a bit for the callers.
    734 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS,
    735                                     SDValue &CC) const {
    736   if (N.getOpcode() == ISD::SETCC) {
    737     LHS = N.getOperand(0);
    738     RHS = N.getOperand(1);
    739     CC  = N.getOperand(2);
    740     return true;
    741   }
    742 
    743   if (N.getOpcode() != ISD::SELECT_CC ||
    744       !TLI.isConstTrueVal(N.getOperand(2).getNode()) ||
    745       !TLI.isConstFalseVal(N.getOperand(3).getNode()))
    746     return false;
    747 
    748   if (TLI.getBooleanContents(N.getValueType()) ==
    749       TargetLowering::UndefinedBooleanContent)
    750     return false;
    751 
    752   LHS = N.getOperand(0);
    753   RHS = N.getOperand(1);
    754   CC  = N.getOperand(4);
    755   return true;
    756 }
    757 
    758 /// Return true if this is a SetCC-equivalent operation with only one use.
    759 /// If this is true, it allows the users to invert the operation for free when
    760 /// it is profitable to do so.
    761 bool DAGCombiner::isOneUseSetCC(SDValue N) const {
    762   SDValue N0, N1, N2;
    763   if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse())
    764     return true;
    765   return false;
    766 }
    767 
    768 // \brief Returns the SDNode if it is a constant float BuildVector
    769 // or constant float.
    770 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) {
    771   if (isa<ConstantFPSDNode>(N))
    772     return N.getNode();
    773   if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode()))
    774     return N.getNode();
    775   return nullptr;
    776 }
    777 
    778 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    779 // int.
    780 static ConstantSDNode *isConstOrConstSplat(SDValue N) {
    781   if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N))
    782     return CN;
    783 
    784   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    785     BitVector UndefElements;
    786     ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements);
    787 
    788     // BuildVectors can truncate their operands. Ignore that case here.
    789     // FIXME: We blindly ignore splats which include undef which is overly
    790     // pessimistic.
    791     if (CN && UndefElements.none() &&
    792         CN->getValueType(0) == N.getValueType().getScalarType())
    793       return CN;
    794   }
    795 
    796   return nullptr;
    797 }
    798 
    799 // \brief Returns the SDNode if it is a constant splat BuildVector or constant
    800 // float.
    801 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) {
    802   if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N))
    803     return CN;
    804 
    805   if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) {
    806     BitVector UndefElements;
    807     ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements);
    808 
    809     if (CN && UndefElements.none())
    810       return CN;
    811   }
    812 
    813   return nullptr;
    814 }
    815 
    816 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0,
    817                                     SDValue N1) {
    818   EVT VT = N0.getValueType();
    819   if (N0.getOpcode() == Opc) {
    820     if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) {
    821       if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
    822         // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2))
    823         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R))
    824           return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode);
    825         return SDValue();
    826       }
    827       if (N0.hasOneUse()) {
    828         // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one
    829         // use
    830         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1);
    831         if (!OpNode.getNode())
    832           return SDValue();
    833         AddToWorklist(OpNode.getNode());
    834         return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1));
    835       }
    836     }
    837   }
    838 
    839   if (N1.getOpcode() == Opc) {
    840     if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) {
    841       if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
    842         // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2))
    843         if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L))
    844           return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode);
    845         return SDValue();
    846       }
    847       if (N1.hasOneUse()) {
    848         // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one
    849         // use
    850         SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0));
    851         if (!OpNode.getNode())
    852           return SDValue();
    853         AddToWorklist(OpNode.getNode());
    854         return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1));
    855       }
    856     }
    857   }
    858 
    859   return SDValue();
    860 }
    861 
    862 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
    863                                bool AddTo) {
    864   assert(N->getNumValues() == NumTo && "Broken CombineTo call!");
    865   ++NodesCombined;
    866   DEBUG(dbgs() << "\nReplacing.1 ";
    867         N->dump(&DAG);
    868         dbgs() << "\nWith: ";
    869         To[0].getNode()->dump(&DAG);
    870         dbgs() << " and " << NumTo-1 << " other values\n");
    871   for (unsigned i = 0, e = NumTo; i != e; ++i)
    872     assert((!To[i].getNode() ||
    873             N->getValueType(i) == To[i].getValueType()) &&
    874            "Cannot combine value to value of different type!");
    875 
    876   WorklistRemover DeadNodes(*this);
    877   DAG.ReplaceAllUsesWith(N, To);
    878   if (AddTo) {
    879     // Push the new nodes and any users onto the worklist
    880     for (unsigned i = 0, e = NumTo; i != e; ++i) {
    881       if (To[i].getNode()) {
    882         AddToWorklist(To[i].getNode());
    883         AddUsersToWorklist(To[i].getNode());
    884       }
    885     }
    886   }
    887 
    888   // Finally, if the node is now dead, remove it from the graph.  The node
    889   // may not be dead if the replacement process recursively simplified to
    890   // something else needing this node.
    891   if (N->use_empty())
    892     deleteAndRecombine(N);
    893   return SDValue(N, 0);
    894 }
    895 
    896 void DAGCombiner::
    897 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) {
    898   // Replace all uses.  If any nodes become isomorphic to other nodes and
    899   // are deleted, make sure to remove them from our worklist.
    900   WorklistRemover DeadNodes(*this);
    901   DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New);
    902 
    903   // Push the new node and any (possibly new) users onto the worklist.
    904   AddToWorklist(TLO.New.getNode());
    905   AddUsersToWorklist(TLO.New.getNode());
    906 
    907   // Finally, if the node is now dead, remove it from the graph.  The node
    908   // may not be dead if the replacement process recursively simplified to
    909   // something else needing this node.
    910   if (TLO.Old.getNode()->use_empty())
    911     deleteAndRecombine(TLO.Old.getNode());
    912 }
    913 
    914 /// Check the specified integer node value to see if it can be simplified or if
    915 /// things it uses can be simplified by bit propagation. If so, return true.
    916 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) {
    917   TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations);
    918   APInt KnownZero, KnownOne;
    919   if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO))
    920     return false;
    921 
    922   // Revisit the node.
    923   AddToWorklist(Op.getNode());
    924 
    925   // Replace the old value with the new one.
    926   ++NodesCombined;
    927   DEBUG(dbgs() << "\nReplacing.2 ";
    928         TLO.Old.getNode()->dump(&DAG);
    929         dbgs() << "\nWith: ";
    930         TLO.New.getNode()->dump(&DAG);
    931         dbgs() << '\n');
    932 
    933   CommitTargetLoweringOpt(TLO);
    934   return true;
    935 }
    936 
    937 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) {
    938   SDLoc dl(Load);
    939   EVT VT = Load->getValueType(0);
    940   SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0));
    941 
    942   DEBUG(dbgs() << "\nReplacing.9 ";
    943         Load->dump(&DAG);
    944         dbgs() << "\nWith: ";
    945         Trunc.getNode()->dump(&DAG);
    946         dbgs() << '\n');
    947   WorklistRemover DeadNodes(*this);
    948   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc);
    949   DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1));
    950   deleteAndRecombine(Load);
    951   AddToWorklist(Trunc.getNode());
    952 }
    953 
    954 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) {
    955   Replace = false;
    956   SDLoc dl(Op);
    957   if (ISD::isUNINDEXEDLoad(Op.getNode())) {
    958     LoadSDNode *LD = cast<LoadSDNode>(Op);
    959     EVT MemVT = LD->getMemoryVT();
    960     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
    961       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
    962                                                        : ISD::EXTLOAD)
    963       : LD->getExtensionType();
    964     Replace = true;
    965     return DAG.getExtLoad(ExtType, dl, PVT,
    966                           LD->getChain(), LD->getBasePtr(),
    967                           MemVT, LD->getMemOperand());
    968   }
    969 
    970   unsigned Opc = Op.getOpcode();
    971   switch (Opc) {
    972   default: break;
    973   case ISD::AssertSext:
    974     return DAG.getNode(ISD::AssertSext, dl, PVT,
    975                        SExtPromoteOperand(Op.getOperand(0), PVT),
    976                        Op.getOperand(1));
    977   case ISD::AssertZext:
    978     return DAG.getNode(ISD::AssertZext, dl, PVT,
    979                        ZExtPromoteOperand(Op.getOperand(0), PVT),
    980                        Op.getOperand(1));
    981   case ISD::Constant: {
    982     unsigned ExtOpc =
    983       Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND;
    984     return DAG.getNode(ExtOpc, dl, PVT, Op);
    985   }
    986   }
    987 
    988   if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT))
    989     return SDValue();
    990   return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op);
    991 }
    992 
    993 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) {
    994   if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT))
    995     return SDValue();
    996   EVT OldVT = Op.getValueType();
    997   SDLoc dl(Op);
    998   bool Replace = false;
    999   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1000   if (!NewOp.getNode())
   1001     return SDValue();
   1002   AddToWorklist(NewOp.getNode());
   1003 
   1004   if (Replace)
   1005     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1006   return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp,
   1007                      DAG.getValueType(OldVT));
   1008 }
   1009 
   1010 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) {
   1011   EVT OldVT = Op.getValueType();
   1012   SDLoc dl(Op);
   1013   bool Replace = false;
   1014   SDValue NewOp = PromoteOperand(Op, PVT, Replace);
   1015   if (!NewOp.getNode())
   1016     return SDValue();
   1017   AddToWorklist(NewOp.getNode());
   1018 
   1019   if (Replace)
   1020     ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode());
   1021   return DAG.getZeroExtendInReg(NewOp, dl, OldVT);
   1022 }
   1023 
   1024 /// Promote the specified integer binary operation if the target indicates it is
   1025 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1026 /// i32 since i16 instructions are longer.
   1027 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) {
   1028   if (!LegalOperations)
   1029     return SDValue();
   1030 
   1031   EVT VT = Op.getValueType();
   1032   if (VT.isVector() || !VT.isInteger())
   1033     return SDValue();
   1034 
   1035   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1036   // promoting it.
   1037   unsigned Opc = Op.getOpcode();
   1038   if (TLI.isTypeDesirableForOp(Opc, VT))
   1039     return SDValue();
   1040 
   1041   EVT PVT = VT;
   1042   // Consult target whether it is a good idea to promote this operation and
   1043   // what's the right type to promote it to.
   1044   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1045     assert(PVT != VT && "Don't know what type to promote to!");
   1046 
   1047     bool Replace0 = false;
   1048     SDValue N0 = Op.getOperand(0);
   1049     SDValue NN0 = PromoteOperand(N0, PVT, Replace0);
   1050     if (!NN0.getNode())
   1051       return SDValue();
   1052 
   1053     bool Replace1 = false;
   1054     SDValue N1 = Op.getOperand(1);
   1055     SDValue NN1;
   1056     if (N0 == N1)
   1057       NN1 = NN0;
   1058     else {
   1059       NN1 = PromoteOperand(N1, PVT, Replace1);
   1060       if (!NN1.getNode())
   1061         return SDValue();
   1062     }
   1063 
   1064     AddToWorklist(NN0.getNode());
   1065     if (NN1.getNode())
   1066       AddToWorklist(NN1.getNode());
   1067 
   1068     if (Replace0)
   1069       ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode());
   1070     if (Replace1)
   1071       ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode());
   1072 
   1073     DEBUG(dbgs() << "\nPromoting ";
   1074           Op.getNode()->dump(&DAG));
   1075     SDLoc dl(Op);
   1076     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1077                        DAG.getNode(Opc, dl, PVT, NN0, NN1));
   1078   }
   1079   return SDValue();
   1080 }
   1081 
   1082 /// Promote the specified integer shift operation if the target indicates it is
   1083 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to
   1084 /// i32 since i16 instructions are longer.
   1085 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) {
   1086   if (!LegalOperations)
   1087     return SDValue();
   1088 
   1089   EVT VT = Op.getValueType();
   1090   if (VT.isVector() || !VT.isInteger())
   1091     return SDValue();
   1092 
   1093   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1094   // promoting it.
   1095   unsigned Opc = Op.getOpcode();
   1096   if (TLI.isTypeDesirableForOp(Opc, VT))
   1097     return SDValue();
   1098 
   1099   EVT PVT = VT;
   1100   // Consult target whether it is a good idea to promote this operation and
   1101   // what's the right type to promote it to.
   1102   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1103     assert(PVT != VT && "Don't know what type to promote to!");
   1104 
   1105     bool Replace = false;
   1106     SDValue N0 = Op.getOperand(0);
   1107     if (Opc == ISD::SRA)
   1108       N0 = SExtPromoteOperand(Op.getOperand(0), PVT);
   1109     else if (Opc == ISD::SRL)
   1110       N0 = ZExtPromoteOperand(Op.getOperand(0), PVT);
   1111     else
   1112       N0 = PromoteOperand(N0, PVT, Replace);
   1113     if (!N0.getNode())
   1114       return SDValue();
   1115 
   1116     AddToWorklist(N0.getNode());
   1117     if (Replace)
   1118       ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode());
   1119 
   1120     DEBUG(dbgs() << "\nPromoting ";
   1121           Op.getNode()->dump(&DAG));
   1122     SDLoc dl(Op);
   1123     return DAG.getNode(ISD::TRUNCATE, dl, VT,
   1124                        DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1)));
   1125   }
   1126   return SDValue();
   1127 }
   1128 
   1129 SDValue DAGCombiner::PromoteExtend(SDValue Op) {
   1130   if (!LegalOperations)
   1131     return SDValue();
   1132 
   1133   EVT VT = Op.getValueType();
   1134   if (VT.isVector() || !VT.isInteger())
   1135     return SDValue();
   1136 
   1137   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1138   // promoting it.
   1139   unsigned Opc = Op.getOpcode();
   1140   if (TLI.isTypeDesirableForOp(Opc, VT))
   1141     return SDValue();
   1142 
   1143   EVT PVT = VT;
   1144   // Consult target whether it is a good idea to promote this operation and
   1145   // what's the right type to promote it to.
   1146   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1147     assert(PVT != VT && "Don't know what type to promote to!");
   1148     // fold (aext (aext x)) -> (aext x)
   1149     // fold (aext (zext x)) -> (zext x)
   1150     // fold (aext (sext x)) -> (sext x)
   1151     DEBUG(dbgs() << "\nPromoting ";
   1152           Op.getNode()->dump(&DAG));
   1153     return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0));
   1154   }
   1155   return SDValue();
   1156 }
   1157 
   1158 bool DAGCombiner::PromoteLoad(SDValue Op) {
   1159   if (!LegalOperations)
   1160     return false;
   1161 
   1162   if (!ISD::isUNINDEXEDLoad(Op.getNode()))
   1163     return false;
   1164 
   1165   EVT VT = Op.getValueType();
   1166   if (VT.isVector() || !VT.isInteger())
   1167     return false;
   1168 
   1169   // If operation type is 'undesirable', e.g. i16 on x86, consider
   1170   // promoting it.
   1171   unsigned Opc = Op.getOpcode();
   1172   if (TLI.isTypeDesirableForOp(Opc, VT))
   1173     return false;
   1174 
   1175   EVT PVT = VT;
   1176   // Consult target whether it is a good idea to promote this operation and
   1177   // what's the right type to promote it to.
   1178   if (TLI.IsDesirableToPromoteOp(Op, PVT)) {
   1179     assert(PVT != VT && "Don't know what type to promote to!");
   1180 
   1181     SDLoc dl(Op);
   1182     SDNode *N = Op.getNode();
   1183     LoadSDNode *LD = cast<LoadSDNode>(N);
   1184     EVT MemVT = LD->getMemoryVT();
   1185     ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD)
   1186       ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD
   1187                                                        : ISD::EXTLOAD)
   1188       : LD->getExtensionType();
   1189     SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT,
   1190                                    LD->getChain(), LD->getBasePtr(),
   1191                                    MemVT, LD->getMemOperand());
   1192     SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD);
   1193 
   1194     DEBUG(dbgs() << "\nPromoting ";
   1195           N->dump(&DAG);
   1196           dbgs() << "\nTo: ";
   1197           Result.getNode()->dump(&DAG);
   1198           dbgs() << '\n');
   1199     WorklistRemover DeadNodes(*this);
   1200     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result);
   1201     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1));
   1202     deleteAndRecombine(N);
   1203     AddToWorklist(Result.getNode());
   1204     return true;
   1205   }
   1206   return false;
   1207 }
   1208 
   1209 /// \brief Recursively delete a node which has no uses and any operands for
   1210 /// which it is the only use.
   1211 ///
   1212 /// Note that this both deletes the nodes and removes them from the worklist.
   1213 /// It also adds any nodes who have had a user deleted to the worklist as they
   1214 /// may now have only one use and subject to other combines.
   1215 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) {
   1216   if (!N->use_empty())
   1217     return false;
   1218 
   1219   SmallSetVector<SDNode *, 16> Nodes;
   1220   Nodes.insert(N);
   1221   do {
   1222     N = Nodes.pop_back_val();
   1223     if (!N)
   1224       continue;
   1225 
   1226     if (N->use_empty()) {
   1227       for (const SDValue &ChildN : N->op_values())
   1228         Nodes.insert(ChildN.getNode());
   1229 
   1230       removeFromWorklist(N);
   1231       DAG.DeleteNode(N);
   1232     } else {
   1233       AddToWorklist(N);
   1234     }
   1235   } while (!Nodes.empty());
   1236   return true;
   1237 }
   1238 
   1239 //===----------------------------------------------------------------------===//
   1240 //  Main DAG Combiner implementation
   1241 //===----------------------------------------------------------------------===//
   1242 
   1243 void DAGCombiner::Run(CombineLevel AtLevel) {
   1244   // set the instance variables, so that the various visit routines may use it.
   1245   Level = AtLevel;
   1246   LegalOperations = Level >= AfterLegalizeVectorOps;
   1247   LegalTypes = Level >= AfterLegalizeTypes;
   1248 
   1249   // Add all the dag nodes to the worklist.
   1250   for (SDNode &Node : DAG.allnodes())
   1251     AddToWorklist(&Node);
   1252 
   1253   // Create a dummy node (which is not added to allnodes), that adds a reference
   1254   // to the root node, preventing it from being deleted, and tracking any
   1255   // changes of the root.
   1256   HandleSDNode Dummy(DAG.getRoot());
   1257 
   1258   // While the worklist isn't empty, find a node and try to combine it.
   1259   while (!WorklistMap.empty()) {
   1260     SDNode *N;
   1261     // The Worklist holds the SDNodes in order, but it may contain null entries.
   1262     do {
   1263       N = Worklist.pop_back_val();
   1264     } while (!N);
   1265 
   1266     bool GoodWorklistEntry = WorklistMap.erase(N);
   1267     (void)GoodWorklistEntry;
   1268     assert(GoodWorklistEntry &&
   1269            "Found a worklist entry without a corresponding map entry!");
   1270 
   1271     // If N has no uses, it is dead.  Make sure to revisit all N's operands once
   1272     // N is deleted from the DAG, since they too may now be dead or may have a
   1273     // reduced number of uses, allowing other xforms.
   1274     if (recursivelyDeleteUnusedNodes(N))
   1275       continue;
   1276 
   1277     WorklistRemover DeadNodes(*this);
   1278 
   1279     // If this combine is running after legalizing the DAG, re-legalize any
   1280     // nodes pulled off the worklist.
   1281     if (Level == AfterLegalizeDAG) {
   1282       SmallSetVector<SDNode *, 16> UpdatedNodes;
   1283       bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes);
   1284 
   1285       for (SDNode *LN : UpdatedNodes) {
   1286         AddToWorklist(LN);
   1287         AddUsersToWorklist(LN);
   1288       }
   1289       if (!NIsValid)
   1290         continue;
   1291     }
   1292 
   1293     DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG));
   1294 
   1295     // Add any operands of the new node which have not yet been combined to the
   1296     // worklist as well. Because the worklist uniques things already, this
   1297     // won't repeatedly process the same operand.
   1298     CombinedNodes.insert(N);
   1299     for (const SDValue &ChildN : N->op_values())
   1300       if (!CombinedNodes.count(ChildN.getNode()))
   1301         AddToWorklist(ChildN.getNode());
   1302 
   1303     SDValue RV = combine(N);
   1304 
   1305     if (!RV.getNode())
   1306       continue;
   1307 
   1308     ++NodesCombined;
   1309 
   1310     // If we get back the same node we passed in, rather than a new node or
   1311     // zero, we know that the node must have defined multiple values and
   1312     // CombineTo was used.  Since CombineTo takes care of the worklist
   1313     // mechanics for us, we have no work to do in this case.
   1314     if (RV.getNode() == N)
   1315       continue;
   1316 
   1317     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1318            RV.getNode()->getOpcode() != ISD::DELETED_NODE &&
   1319            "Node was deleted but visit returned new node!");
   1320 
   1321     DEBUG(dbgs() << " ... into: ";
   1322           RV.getNode()->dump(&DAG));
   1323 
   1324     if (N->getNumValues() == RV.getNode()->getNumValues())
   1325       DAG.ReplaceAllUsesWith(N, RV.getNode());
   1326     else {
   1327       assert(N->getValueType(0) == RV.getValueType() &&
   1328              N->getNumValues() == 1 && "Type mismatch");
   1329       SDValue OpV = RV;
   1330       DAG.ReplaceAllUsesWith(N, &OpV);
   1331     }
   1332 
   1333     // Push the new node and any users onto the worklist
   1334     AddToWorklist(RV.getNode());
   1335     AddUsersToWorklist(RV.getNode());
   1336 
   1337     // Finally, if the node is now dead, remove it from the graph.  The node
   1338     // may not be dead if the replacement process recursively simplified to
   1339     // something else needing this node. This will also take care of adding any
   1340     // operands which have lost a user to the worklist.
   1341     recursivelyDeleteUnusedNodes(N);
   1342   }
   1343 
   1344   // If the root changed (e.g. it was a dead load, update the root).
   1345   DAG.setRoot(Dummy.getValue());
   1346   DAG.RemoveDeadNodes();
   1347 }
   1348 
   1349 SDValue DAGCombiner::visit(SDNode *N) {
   1350   switch (N->getOpcode()) {
   1351   default: break;
   1352   case ISD::TokenFactor:        return visitTokenFactor(N);
   1353   case ISD::MERGE_VALUES:       return visitMERGE_VALUES(N);
   1354   case ISD::ADD:                return visitADD(N);
   1355   case ISD::SUB:                return visitSUB(N);
   1356   case ISD::ADDC:               return visitADDC(N);
   1357   case ISD::SUBC:               return visitSUBC(N);
   1358   case ISD::ADDE:               return visitADDE(N);
   1359   case ISD::SUBE:               return visitSUBE(N);
   1360   case ISD::MUL:                return visitMUL(N);
   1361   case ISD::SDIV:               return visitSDIV(N);
   1362   case ISD::UDIV:               return visitUDIV(N);
   1363   case ISD::SREM:
   1364   case ISD::UREM:               return visitREM(N);
   1365   case ISD::MULHU:              return visitMULHU(N);
   1366   case ISD::MULHS:              return visitMULHS(N);
   1367   case ISD::SMUL_LOHI:          return visitSMUL_LOHI(N);
   1368   case ISD::UMUL_LOHI:          return visitUMUL_LOHI(N);
   1369   case ISD::SMULO:              return visitSMULO(N);
   1370   case ISD::UMULO:              return visitUMULO(N);
   1371   case ISD::SMIN:
   1372   case ISD::SMAX:
   1373   case ISD::UMIN:
   1374   case ISD::UMAX:               return visitIMINMAX(N);
   1375   case ISD::AND:                return visitAND(N);
   1376   case ISD::OR:                 return visitOR(N);
   1377   case ISD::XOR:                return visitXOR(N);
   1378   case ISD::SHL:                return visitSHL(N);
   1379   case ISD::SRA:                return visitSRA(N);
   1380   case ISD::SRL:                return visitSRL(N);
   1381   case ISD::ROTR:
   1382   case ISD::ROTL:               return visitRotate(N);
   1383   case ISD::BSWAP:              return visitBSWAP(N);
   1384   case ISD::BITREVERSE:         return visitBITREVERSE(N);
   1385   case ISD::CTLZ:               return visitCTLZ(N);
   1386   case ISD::CTLZ_ZERO_UNDEF:    return visitCTLZ_ZERO_UNDEF(N);
   1387   case ISD::CTTZ:               return visitCTTZ(N);
   1388   case ISD::CTTZ_ZERO_UNDEF:    return visitCTTZ_ZERO_UNDEF(N);
   1389   case ISD::CTPOP:              return visitCTPOP(N);
   1390   case ISD::SELECT:             return visitSELECT(N);
   1391   case ISD::VSELECT:            return visitVSELECT(N);
   1392   case ISD::SELECT_CC:          return visitSELECT_CC(N);
   1393   case ISD::SETCC:              return visitSETCC(N);
   1394   case ISD::SETCCE:             return visitSETCCE(N);
   1395   case ISD::SIGN_EXTEND:        return visitSIGN_EXTEND(N);
   1396   case ISD::ZERO_EXTEND:        return visitZERO_EXTEND(N);
   1397   case ISD::ANY_EXTEND:         return visitANY_EXTEND(N);
   1398   case ISD::SIGN_EXTEND_INREG:  return visitSIGN_EXTEND_INREG(N);
   1399   case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N);
   1400   case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N);
   1401   case ISD::TRUNCATE:           return visitTRUNCATE(N);
   1402   case ISD::BITCAST:            return visitBITCAST(N);
   1403   case ISD::BUILD_PAIR:         return visitBUILD_PAIR(N);
   1404   case ISD::FADD:               return visitFADD(N);
   1405   case ISD::FSUB:               return visitFSUB(N);
   1406   case ISD::FMUL:               return visitFMUL(N);
   1407   case ISD::FMA:                return visitFMA(N);
   1408   case ISD::FDIV:               return visitFDIV(N);
   1409   case ISD::FREM:               return visitFREM(N);
   1410   case ISD::FSQRT:              return visitFSQRT(N);
   1411   case ISD::FCOPYSIGN:          return visitFCOPYSIGN(N);
   1412   case ISD::SINT_TO_FP:         return visitSINT_TO_FP(N);
   1413   case ISD::UINT_TO_FP:         return visitUINT_TO_FP(N);
   1414   case ISD::FP_TO_SINT:         return visitFP_TO_SINT(N);
   1415   case ISD::FP_TO_UINT:         return visitFP_TO_UINT(N);
   1416   case ISD::FP_ROUND:           return visitFP_ROUND(N);
   1417   case ISD::FP_ROUND_INREG:     return visitFP_ROUND_INREG(N);
   1418   case ISD::FP_EXTEND:          return visitFP_EXTEND(N);
   1419   case ISD::FNEG:               return visitFNEG(N);
   1420   case ISD::FABS:               return visitFABS(N);
   1421   case ISD::FFLOOR:             return visitFFLOOR(N);
   1422   case ISD::FMINNUM:            return visitFMINNUM(N);
   1423   case ISD::FMAXNUM:            return visitFMAXNUM(N);
   1424   case ISD::FCEIL:              return visitFCEIL(N);
   1425   case ISD::FTRUNC:             return visitFTRUNC(N);
   1426   case ISD::BRCOND:             return visitBRCOND(N);
   1427   case ISD::BR_CC:              return visitBR_CC(N);
   1428   case ISD::LOAD:               return visitLOAD(N);
   1429   case ISD::STORE:              return visitSTORE(N);
   1430   case ISD::INSERT_VECTOR_ELT:  return visitINSERT_VECTOR_ELT(N);
   1431   case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N);
   1432   case ISD::BUILD_VECTOR:       return visitBUILD_VECTOR(N);
   1433   case ISD::CONCAT_VECTORS:     return visitCONCAT_VECTORS(N);
   1434   case ISD::EXTRACT_SUBVECTOR:  return visitEXTRACT_SUBVECTOR(N);
   1435   case ISD::VECTOR_SHUFFLE:     return visitVECTOR_SHUFFLE(N);
   1436   case ISD::SCALAR_TO_VECTOR:   return visitSCALAR_TO_VECTOR(N);
   1437   case ISD::INSERT_SUBVECTOR:   return visitINSERT_SUBVECTOR(N);
   1438   case ISD::MGATHER:            return visitMGATHER(N);
   1439   case ISD::MLOAD:              return visitMLOAD(N);
   1440   case ISD::MSCATTER:           return visitMSCATTER(N);
   1441   case ISD::MSTORE:             return visitMSTORE(N);
   1442   case ISD::FP_TO_FP16:         return visitFP_TO_FP16(N);
   1443   case ISD::FP16_TO_FP:         return visitFP16_TO_FP(N);
   1444   }
   1445   return SDValue();
   1446 }
   1447 
   1448 SDValue DAGCombiner::combine(SDNode *N) {
   1449   SDValue RV = visit(N);
   1450 
   1451   // If nothing happened, try a target-specific DAG combine.
   1452   if (!RV.getNode()) {
   1453     assert(N->getOpcode() != ISD::DELETED_NODE &&
   1454            "Node was deleted but visit returned NULL!");
   1455 
   1456     if (N->getOpcode() >= ISD::BUILTIN_OP_END ||
   1457         TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) {
   1458 
   1459       // Expose the DAG combiner to the target combiner impls.
   1460       TargetLowering::DAGCombinerInfo
   1461         DagCombineInfo(DAG, Level, false, this);
   1462 
   1463       RV = TLI.PerformDAGCombine(N, DagCombineInfo);
   1464     }
   1465   }
   1466 
   1467   // If nothing happened still, try promoting the operation.
   1468   if (!RV.getNode()) {
   1469     switch (N->getOpcode()) {
   1470     default: break;
   1471     case ISD::ADD:
   1472     case ISD::SUB:
   1473     case ISD::MUL:
   1474     case ISD::AND:
   1475     case ISD::OR:
   1476     case ISD::XOR:
   1477       RV = PromoteIntBinOp(SDValue(N, 0));
   1478       break;
   1479     case ISD::SHL:
   1480     case ISD::SRA:
   1481     case ISD::SRL:
   1482       RV = PromoteIntShiftOp(SDValue(N, 0));
   1483       break;
   1484     case ISD::SIGN_EXTEND:
   1485     case ISD::ZERO_EXTEND:
   1486     case ISD::ANY_EXTEND:
   1487       RV = PromoteExtend(SDValue(N, 0));
   1488       break;
   1489     case ISD::LOAD:
   1490       if (PromoteLoad(SDValue(N, 0)))
   1491         RV = SDValue(N, 0);
   1492       break;
   1493     }
   1494   }
   1495 
   1496   // If N is a commutative binary node, try commuting it to enable more
   1497   // sdisel CSE.
   1498   if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) &&
   1499       N->getNumValues() == 1) {
   1500     SDValue N0 = N->getOperand(0);
   1501     SDValue N1 = N->getOperand(1);
   1502 
   1503     // Constant operands are canonicalized to RHS.
   1504     if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) {
   1505       SDValue Ops[] = {N1, N0};
   1506       SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops,
   1507                                             N->getFlags());
   1508       if (CSENode)
   1509         return SDValue(CSENode, 0);
   1510     }
   1511   }
   1512 
   1513   return RV;
   1514 }
   1515 
   1516 /// Given a node, return its input chain if it has one, otherwise return a null
   1517 /// sd operand.
   1518 static SDValue getInputChainForNode(SDNode *N) {
   1519   if (unsigned NumOps = N->getNumOperands()) {
   1520     if (N->getOperand(0).getValueType() == MVT::Other)
   1521       return N->getOperand(0);
   1522     if (N->getOperand(NumOps-1).getValueType() == MVT::Other)
   1523       return N->getOperand(NumOps-1);
   1524     for (unsigned i = 1; i < NumOps-1; ++i)
   1525       if (N->getOperand(i).getValueType() == MVT::Other)
   1526         return N->getOperand(i);
   1527   }
   1528   return SDValue();
   1529 }
   1530 
   1531 SDValue DAGCombiner::visitTokenFactor(SDNode *N) {
   1532   // If N has two operands, where one has an input chain equal to the other,
   1533   // the 'other' chain is redundant.
   1534   if (N->getNumOperands() == 2) {
   1535     if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1))
   1536       return N->getOperand(0);
   1537     if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0))
   1538       return N->getOperand(1);
   1539   }
   1540 
   1541   SmallVector<SDNode *, 8> TFs;     // List of token factors to visit.
   1542   SmallVector<SDValue, 8> Ops;    // Ops for replacing token factor.
   1543   SmallPtrSet<SDNode*, 16> SeenOps;
   1544   bool Changed = false;             // If we should replace this token factor.
   1545 
   1546   // Start out with this token factor.
   1547   TFs.push_back(N);
   1548 
   1549   // Iterate through token factors.  The TFs grows when new token factors are
   1550   // encountered.
   1551   for (unsigned i = 0; i < TFs.size(); ++i) {
   1552     SDNode *TF = TFs[i];
   1553 
   1554     // Check each of the operands.
   1555     for (const SDValue &Op : TF->op_values()) {
   1556 
   1557       switch (Op.getOpcode()) {
   1558       case ISD::EntryToken:
   1559         // Entry tokens don't need to be added to the list. They are
   1560         // redundant.
   1561         Changed = true;
   1562         break;
   1563 
   1564       case ISD::TokenFactor:
   1565         if (Op.hasOneUse() &&
   1566             std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) {
   1567           // Queue up for processing.
   1568           TFs.push_back(Op.getNode());
   1569           // Clean up in case the token factor is removed.
   1570           AddToWorklist(Op.getNode());
   1571           Changed = true;
   1572           break;
   1573         }
   1574         // Fall thru
   1575 
   1576       default:
   1577         // Only add if it isn't already in the list.
   1578         if (SeenOps.insert(Op.getNode()).second)
   1579           Ops.push_back(Op);
   1580         else
   1581           Changed = true;
   1582         break;
   1583       }
   1584     }
   1585   }
   1586 
   1587   SDValue Result;
   1588 
   1589   // If we've changed things around then replace token factor.
   1590   if (Changed) {
   1591     if (Ops.empty()) {
   1592       // The entry token is the only possible outcome.
   1593       Result = DAG.getEntryNode();
   1594     } else {
   1595       // New and improved token factor.
   1596       Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops);
   1597     }
   1598 
   1599     // Add users to worklist if AA is enabled, since it may introduce
   1600     // a lot of new chained token factors while removing memory deps.
   1601     bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   1602       : DAG.getSubtarget().useAA();
   1603     return CombineTo(N, Result, UseAA /*add to worklist*/);
   1604   }
   1605 
   1606   return Result;
   1607 }
   1608 
   1609 /// MERGE_VALUES can always be eliminated.
   1610 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) {
   1611   WorklistRemover DeadNodes(*this);
   1612   // Replacing results may cause a different MERGE_VALUES to suddenly
   1613   // be CSE'd with N, and carry its uses with it. Iterate until no
   1614   // uses remain, to ensure that the node can be safely deleted.
   1615   // First add the users of this node to the work list so that they
   1616   // can be tried again once they have new operands.
   1617   AddUsersToWorklist(N);
   1618   do {
   1619     for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
   1620       DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i));
   1621   } while (!N->use_empty());
   1622   deleteAndRecombine(N);
   1623   return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   1624 }
   1625 
   1626 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a
   1627 /// ConstantSDNode pointer else nullptr.
   1628 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) {
   1629   ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N);
   1630   return Const != nullptr && !Const->isOpaque() ? Const : nullptr;
   1631 }
   1632 
   1633 SDValue DAGCombiner::visitADD(SDNode *N) {
   1634   SDValue N0 = N->getOperand(0);
   1635   SDValue N1 = N->getOperand(1);
   1636   EVT VT = N0.getValueType();
   1637 
   1638   // fold vector ops
   1639   if (VT.isVector()) {
   1640     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1641       return FoldedVOp;
   1642 
   1643     // fold (add x, 0) -> x, vector edition
   1644     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1645       return N0;
   1646     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   1647       return N1;
   1648   }
   1649 
   1650   // fold (add x, undef) -> undef
   1651   if (N0.isUndef())
   1652     return N0;
   1653   if (N1.isUndef())
   1654     return N1;
   1655   if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) {
   1656     // canonicalize constant to RHS
   1657     if (!DAG.isConstantIntBuildVectorOrConstantInt(N1))
   1658       return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0);
   1659     // fold (add c1, c2) -> c1+c2
   1660     return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT,
   1661                                       N0.getNode(), N1.getNode());
   1662   }
   1663   // fold (add x, 0) -> x
   1664   if (isNullConstant(N1))
   1665     return N0;
   1666   // fold ((c1-A)+c2) -> (c1+c2)-A
   1667   if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) {
   1668     if (N0.getOpcode() == ISD::SUB)
   1669       if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   1670         SDLoc DL(N);
   1671         return DAG.getNode(ISD::SUB, DL, VT,
   1672                            DAG.getConstant(N1C->getAPIntValue()+
   1673                                            N0C->getAPIntValue(), DL, VT),
   1674                            N0.getOperand(1));
   1675       }
   1676   }
   1677   // reassociate add
   1678   if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1))
   1679     return RADD;
   1680   // fold ((0-A) + B) -> B-A
   1681   if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0)))
   1682     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1));
   1683   // fold (A + (0-B)) -> A-B
   1684   if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0)))
   1685     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1));
   1686   // fold (A+(B-A)) -> B
   1687   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1))
   1688     return N1.getOperand(0);
   1689   // fold ((B-A)+A) -> B
   1690   if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1))
   1691     return N0.getOperand(0);
   1692   // fold (A+(B-(A+C))) to (B-C)
   1693   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1694       N0 == N1.getOperand(1).getOperand(0))
   1695     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1696                        N1.getOperand(1).getOperand(1));
   1697   // fold (A+(B-(C+A))) to (B-C)
   1698   if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD &&
   1699       N0 == N1.getOperand(1).getOperand(1))
   1700     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0),
   1701                        N1.getOperand(1).getOperand(0));
   1702   // fold (A+((B-A)+or-C)) to (B+or-C)
   1703   if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) &&
   1704       N1.getOperand(0).getOpcode() == ISD::SUB &&
   1705       N0 == N1.getOperand(0).getOperand(1))
   1706     return DAG.getNode(N1.getOpcode(), SDLoc(N), VT,
   1707                        N1.getOperand(0).getOperand(0), N1.getOperand(1));
   1708 
   1709   // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant
   1710   if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) {
   1711     SDValue N00 = N0.getOperand(0);
   1712     SDValue N01 = N0.getOperand(1);
   1713     SDValue N10 = N1.getOperand(0);
   1714     SDValue N11 = N1.getOperand(1);
   1715 
   1716     if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10))
   1717       return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1718                          DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10),
   1719                          DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11));
   1720   }
   1721 
   1722   if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0)))
   1723     return SDValue(N, 0);
   1724 
   1725   // fold (a+b) -> (a|b) iff a and b share no bits.
   1726   if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) &&
   1727       VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1))
   1728     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1);
   1729 
   1730   // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n))
   1731   if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB &&
   1732       isNullConstant(N1.getOperand(0).getOperand(0)))
   1733     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0,
   1734                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1735                                    N1.getOperand(0).getOperand(1),
   1736                                    N1.getOperand(1)));
   1737   if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB &&
   1738       isNullConstant(N0.getOperand(0).getOperand(0)))
   1739     return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1,
   1740                        DAG.getNode(ISD::SHL, SDLoc(N), VT,
   1741                                    N0.getOperand(0).getOperand(1),
   1742                                    N0.getOperand(1)));
   1743 
   1744   if (N1.getOpcode() == ISD::AND) {
   1745     SDValue AndOp0 = N1.getOperand(0);
   1746     unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0);
   1747     unsigned DestBits = VT.getScalarType().getSizeInBits();
   1748 
   1749     // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x))
   1750     // and similar xforms where the inner op is either ~0 or 0.
   1751     if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) {
   1752       SDLoc DL(N);
   1753       return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0);
   1754     }
   1755   }
   1756 
   1757   // add (sext i1), X -> sub X, (zext i1)
   1758   if (N0.getOpcode() == ISD::SIGN_EXTEND &&
   1759       N0.getOperand(0).getValueType() == MVT::i1 &&
   1760       !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) {
   1761     SDLoc DL(N);
   1762     SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0));
   1763     return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt);
   1764   }
   1765 
   1766   // add X, (sextinreg Y i1) -> sub X, (and Y 1)
   1767   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1768     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1769     if (TN->getVT() == MVT::i1) {
   1770       SDLoc DL(N);
   1771       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1772                                  DAG.getConstant(1, DL, VT));
   1773       return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt);
   1774     }
   1775   }
   1776 
   1777   return SDValue();
   1778 }
   1779 
   1780 SDValue DAGCombiner::visitADDC(SDNode *N) {
   1781   SDValue N0 = N->getOperand(0);
   1782   SDValue N1 = N->getOperand(1);
   1783   EVT VT = N0.getValueType();
   1784 
   1785   // If the flag result is dead, turn this into an ADD.
   1786   if (!N->hasAnyUseOfValue(1))
   1787     return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1),
   1788                      DAG.getNode(ISD::CARRY_FALSE,
   1789                                  SDLoc(N), MVT::Glue));
   1790 
   1791   // canonicalize constant to RHS.
   1792   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1793   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1794   if (N0C && !N1C)
   1795     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0);
   1796 
   1797   // fold (addc x, 0) -> x + no carry out
   1798   if (isNullConstant(N1))
   1799     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE,
   1800                                         SDLoc(N), MVT::Glue));
   1801 
   1802   // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
   1803   APInt LHSZero, LHSOne;
   1804   APInt RHSZero, RHSOne;
   1805   DAG.computeKnownBits(N0, LHSZero, LHSOne);
   1806 
   1807   if (LHSZero.getBoolValue()) {
   1808     DAG.computeKnownBits(N1, RHSZero, RHSOne);
   1809 
   1810     // If all possibly-set bits on the LHS are clear on the RHS, return an OR.
   1811     // If all possibly-set bits on the RHS are clear on the LHS, return an OR.
   1812     if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero)
   1813       return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1),
   1814                        DAG.getNode(ISD::CARRY_FALSE,
   1815                                    SDLoc(N), MVT::Glue));
   1816   }
   1817 
   1818   return SDValue();
   1819 }
   1820 
   1821 SDValue DAGCombiner::visitADDE(SDNode *N) {
   1822   SDValue N0 = N->getOperand(0);
   1823   SDValue N1 = N->getOperand(1);
   1824   SDValue CarryIn = N->getOperand(2);
   1825 
   1826   // canonicalize constant to RHS
   1827   ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
   1828   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   1829   if (N0C && !N1C)
   1830     return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(),
   1831                        N1, N0, CarryIn);
   1832 
   1833   // fold (adde x, y, false) -> (addc x, y)
   1834   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1835     return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1);
   1836 
   1837   return SDValue();
   1838 }
   1839 
   1840 // Since it may not be valid to emit a fold to zero for vector initializers
   1841 // check if we can before folding.
   1842 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT,
   1843                              SelectionDAG &DAG, bool LegalOperations,
   1844                              bool LegalTypes) {
   1845   if (!VT.isVector())
   1846     return DAG.getConstant(0, DL, VT);
   1847   if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   1848     return DAG.getConstant(0, DL, VT);
   1849   return SDValue();
   1850 }
   1851 
   1852 SDValue DAGCombiner::visitSUB(SDNode *N) {
   1853   SDValue N0 = N->getOperand(0);
   1854   SDValue N1 = N->getOperand(1);
   1855   EVT VT = N0.getValueType();
   1856 
   1857   // fold vector ops
   1858   if (VT.isVector()) {
   1859     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   1860       return FoldedVOp;
   1861 
   1862     // fold (sub x, 0) -> x, vector edition
   1863     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   1864       return N0;
   1865   }
   1866 
   1867   // fold (sub x, x) -> 0
   1868   // FIXME: Refactor this and xor and other similar operations together.
   1869   if (N0 == N1)
   1870     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   1871   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   1872       DAG.isConstantIntBuildVectorOrConstantInt(N1)) {
   1873     // fold (sub c1, c2) -> c1-c2
   1874     return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT,
   1875                                       N0.getNode(), N1.getNode());
   1876   }
   1877   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   1878   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   1879   // fold (sub x, c) -> (add x, -c)
   1880   if (N1C) {
   1881     SDLoc DL(N);
   1882     return DAG.getNode(ISD::ADD, DL, VT, N0,
   1883                        DAG.getConstant(-N1C->getAPIntValue(), DL, VT));
   1884   }
   1885   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1)
   1886   if (isAllOnesConstant(N0))
   1887     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   1888   // fold A-(A-B) -> B
   1889   if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0))
   1890     return N1.getOperand(1);
   1891   // fold (A+B)-A -> B
   1892   if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1)
   1893     return N0.getOperand(1);
   1894   // fold (A+B)-B -> A
   1895   if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1)
   1896     return N0.getOperand(0);
   1897   // fold C2-(A+C1) -> (C2-C1)-A
   1898   ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr :
   1899     dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode());
   1900   if (N1.getOpcode() == ISD::ADD && N0C && N1C1) {
   1901     SDLoc DL(N);
   1902     SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(),
   1903                                    DL, VT);
   1904     return DAG.getNode(ISD::SUB, DL, VT, NewC,
   1905                        N1.getOperand(0));
   1906   }
   1907   // fold ((A+(B+or-C))-B) -> A+or-C
   1908   if (N0.getOpcode() == ISD::ADD &&
   1909       (N0.getOperand(1).getOpcode() == ISD::SUB ||
   1910        N0.getOperand(1).getOpcode() == ISD::ADD) &&
   1911       N0.getOperand(1).getOperand(0) == N1)
   1912     return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT,
   1913                        N0.getOperand(0), N0.getOperand(1).getOperand(1));
   1914   // fold ((A+(C+B))-B) -> A+C
   1915   if (N0.getOpcode() == ISD::ADD &&
   1916       N0.getOperand(1).getOpcode() == ISD::ADD &&
   1917       N0.getOperand(1).getOperand(1) == N1)
   1918     return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   1919                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1920   // fold ((A-(B-C))-C) -> A-B
   1921   if (N0.getOpcode() == ISD::SUB &&
   1922       N0.getOperand(1).getOpcode() == ISD::SUB &&
   1923       N0.getOperand(1).getOperand(1) == N1)
   1924     return DAG.getNode(ISD::SUB, SDLoc(N), VT,
   1925                        N0.getOperand(0), N0.getOperand(1).getOperand(0));
   1926 
   1927   // If either operand of a sub is undef, the result is undef
   1928   if (N0.isUndef())
   1929     return N0;
   1930   if (N1.isUndef())
   1931     return N1;
   1932 
   1933   // If the relocation model supports it, consider symbol offsets.
   1934   if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0))
   1935     if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) {
   1936       // fold (sub Sym, c) -> Sym-c
   1937       if (N1C && GA->getOpcode() == ISD::GlobalAddress)
   1938         return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT,
   1939                                     GA->getOffset() -
   1940                                       (uint64_t)N1C->getSExtValue());
   1941       // fold (sub Sym+c1, Sym+c2) -> c1-c2
   1942       if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1))
   1943         if (GA->getGlobal() == GB->getGlobal())
   1944           return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(),
   1945                                  SDLoc(N), VT);
   1946     }
   1947 
   1948   // sub X, (sextinreg Y i1) -> add X, (and Y 1)
   1949   if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) {
   1950     VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1));
   1951     if (TN->getVT() == MVT::i1) {
   1952       SDLoc DL(N);
   1953       SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0),
   1954                                  DAG.getConstant(1, DL, VT));
   1955       return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt);
   1956     }
   1957   }
   1958 
   1959   return SDValue();
   1960 }
   1961 
   1962 SDValue DAGCombiner::visitSUBC(SDNode *N) {
   1963   SDValue N0 = N->getOperand(0);
   1964   SDValue N1 = N->getOperand(1);
   1965   EVT VT = N0.getValueType();
   1966   SDLoc DL(N);
   1967 
   1968   // If the flag result is dead, turn this into an SUB.
   1969   if (!N->hasAnyUseOfValue(1))
   1970     return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1),
   1971                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1972 
   1973   // fold (subc x, x) -> 0 + no borrow
   1974   if (N0 == N1)
   1975     return CombineTo(N, DAG.getConstant(0, DL, VT),
   1976                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1977 
   1978   // fold (subc x, 0) -> x + no borrow
   1979   if (isNullConstant(N1))
   1980     return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1981 
   1982   // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow
   1983   if (isAllOnesConstant(N0))
   1984     return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0),
   1985                      DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue));
   1986 
   1987   return SDValue();
   1988 }
   1989 
   1990 SDValue DAGCombiner::visitSUBE(SDNode *N) {
   1991   SDValue N0 = N->getOperand(0);
   1992   SDValue N1 = N->getOperand(1);
   1993   SDValue CarryIn = N->getOperand(2);
   1994 
   1995   // fold (sube x, y, false) -> (subc x, y)
   1996   if (CarryIn.getOpcode() == ISD::CARRY_FALSE)
   1997     return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1);
   1998 
   1999   return SDValue();
   2000 }
   2001 
   2002 SDValue DAGCombiner::visitMUL(SDNode *N) {
   2003   SDValue N0 = N->getOperand(0);
   2004   SDValue N1 = N->getOperand(1);
   2005   EVT VT = N0.getValueType();
   2006 
   2007   // fold (mul x, undef) -> 0
   2008   if (N0.isUndef() || N1.isUndef())
   2009     return DAG.getConstant(0, SDLoc(N), VT);
   2010 
   2011   bool N0IsConst = false;
   2012   bool N1IsConst = false;
   2013   bool N1IsOpaqueConst = false;
   2014   bool N0IsOpaqueConst = false;
   2015   APInt ConstValue0, ConstValue1;
   2016   // fold vector ops
   2017   if (VT.isVector()) {
   2018     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2019       return FoldedVOp;
   2020 
   2021     N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0);
   2022     N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1);
   2023   } else {
   2024     N0IsConst = isa<ConstantSDNode>(N0);
   2025     if (N0IsConst) {
   2026       ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue();
   2027       N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque();
   2028     }
   2029     N1IsConst = isa<ConstantSDNode>(N1);
   2030     if (N1IsConst) {
   2031       ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue();
   2032       N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque();
   2033     }
   2034   }
   2035 
   2036   // fold (mul c1, c2) -> c1*c2
   2037   if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst)
   2038     return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT,
   2039                                       N0.getNode(), N1.getNode());
   2040 
   2041   // canonicalize constant to RHS (vector doesn't have to splat)
   2042   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   2043      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   2044     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0);
   2045   // fold (mul x, 0) -> 0
   2046   if (N1IsConst && ConstValue1 == 0)
   2047     return N1;
   2048   // We require a splat of the entire scalar bit width for non-contiguous
   2049   // bit patterns.
   2050   bool IsFullSplat =
   2051     ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits();
   2052   // fold (mul x, 1) -> x
   2053   if (N1IsConst && ConstValue1 == 1 && IsFullSplat)
   2054     return N0;
   2055   // fold (mul x, -1) -> 0-x
   2056   if (N1IsConst && ConstValue1.isAllOnesValue()) {
   2057     SDLoc DL(N);
   2058     return DAG.getNode(ISD::SUB, DL, VT,
   2059                        DAG.getConstant(0, DL, VT), N0);
   2060   }
   2061   // fold (mul x, (1 << c)) -> x << c
   2062   if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() &&
   2063       IsFullSplat) {
   2064     SDLoc DL(N);
   2065     return DAG.getNode(ISD::SHL, DL, VT, N0,
   2066                        DAG.getConstant(ConstValue1.logBase2(), DL,
   2067                                        getShiftAmountTy(N0.getValueType())));
   2068   }
   2069   // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c
   2070   if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() &&
   2071       IsFullSplat) {
   2072     unsigned Log2Val = (-ConstValue1).logBase2();
   2073     SDLoc DL(N);
   2074     // FIXME: If the input is something that is easily negated (e.g. a
   2075     // single-use add), we should put the negate there.
   2076     return DAG.getNode(ISD::SUB, DL, VT,
   2077                        DAG.getConstant(0, DL, VT),
   2078                        DAG.getNode(ISD::SHL, DL, VT, N0,
   2079                             DAG.getConstant(Log2Val, DL,
   2080                                       getShiftAmountTy(N0.getValueType()))));
   2081   }
   2082 
   2083   APInt Val;
   2084   // (mul (shl X, c1), c2) -> (mul X, c2 << c1)
   2085   if (N1IsConst && N0.getOpcode() == ISD::SHL &&
   2086       (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2087        isa<ConstantSDNode>(N0.getOperand(1)))) {
   2088     SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1));
   2089     AddToWorklist(C3.getNode());
   2090     return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3);
   2091   }
   2092 
   2093   // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one
   2094   // use.
   2095   {
   2096     SDValue Sh(nullptr, 0), Y(nullptr, 0);
   2097     // Check for both (mul (shl X, C), Y)  and  (mul Y, (shl X, C)).
   2098     if (N0.getOpcode() == ISD::SHL &&
   2099         (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) ||
   2100          isa<ConstantSDNode>(N0.getOperand(1))) &&
   2101         N0.getNode()->hasOneUse()) {
   2102       Sh = N0; Y = N1;
   2103     } else if (N1.getOpcode() == ISD::SHL &&
   2104                isa<ConstantSDNode>(N1.getOperand(1)) &&
   2105                N1.getNode()->hasOneUse()) {
   2106       Sh = N1; Y = N0;
   2107     }
   2108 
   2109     if (Sh.getNode()) {
   2110       SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y);
   2111       return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1));
   2112     }
   2113   }
   2114 
   2115   // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2)
   2116   if (DAG.isConstantIntBuildVectorOrConstantInt(N1) &&
   2117       N0.getOpcode() == ISD::ADD &&
   2118       DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) &&
   2119       isMulAddWithConstProfitable(N, N0, N1))
   2120       return DAG.getNode(ISD::ADD, SDLoc(N), VT,
   2121                          DAG.getNode(ISD::MUL, SDLoc(N0), VT,
   2122                                      N0.getOperand(0), N1),
   2123                          DAG.getNode(ISD::MUL, SDLoc(N1), VT,
   2124                                      N0.getOperand(1), N1));
   2125 
   2126   // reassociate mul
   2127   if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1))
   2128     return RMUL;
   2129 
   2130   return SDValue();
   2131 }
   2132 
   2133 /// Return true if divmod libcall is available.
   2134 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned,
   2135                                      const TargetLowering &TLI) {
   2136   RTLIB::Libcall LC;
   2137   EVT NodeType = Node->getValueType(0);
   2138   if (!NodeType.isSimple())
   2139     return false;
   2140   switch (NodeType.getSimpleVT().SimpleTy) {
   2141   default: return false; // No libcall for vector types.
   2142   case MVT::i8:   LC= isSigned ? RTLIB::SDIVREM_I8  : RTLIB::UDIVREM_I8;  break;
   2143   case MVT::i16:  LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break;
   2144   case MVT::i32:  LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break;
   2145   case MVT::i64:  LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break;
   2146   case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break;
   2147   }
   2148 
   2149   return TLI.getLibcallName(LC) != nullptr;
   2150 }
   2151 
   2152 /// Issue divrem if both quotient and remainder are needed.
   2153 SDValue DAGCombiner::useDivRem(SDNode *Node) {
   2154   if (Node->use_empty())
   2155     return SDValue(); // This is a dead node, leave it alone.
   2156 
   2157   unsigned Opcode = Node->getOpcode();
   2158   bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM);
   2159   unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM;
   2160 
   2161   // DivMod lib calls can still work on non-legal types if using lib-calls.
   2162   EVT VT = Node->getValueType(0);
   2163   if (VT.isVector() || !VT.isInteger())
   2164     return SDValue();
   2165 
   2166   if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT))
   2167     return SDValue();
   2168 
   2169   // If DIVREM is going to get expanded into a libcall,
   2170   // but there is no libcall available, then don't combine.
   2171   if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) &&
   2172       !isDivRemLibcallAvailable(Node, isSigned, TLI))
   2173     return SDValue();
   2174 
   2175   // If div is legal, it's better to do the normal expansion
   2176   unsigned OtherOpcode = 0;
   2177   if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) {
   2178     OtherOpcode = isSigned ? ISD::SREM : ISD::UREM;
   2179     if (TLI.isOperationLegalOrCustom(Opcode, VT))
   2180       return SDValue();
   2181   } else {
   2182     OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2183     if (TLI.isOperationLegalOrCustom(OtherOpcode, VT))
   2184       return SDValue();
   2185   }
   2186 
   2187   SDValue Op0 = Node->getOperand(0);
   2188   SDValue Op1 = Node->getOperand(1);
   2189   SDValue combined;
   2190   for (SDNode::use_iterator UI = Op0.getNode()->use_begin(),
   2191          UE = Op0.getNode()->use_end(); UI != UE; ++UI) {
   2192     SDNode *User = *UI;
   2193     if (User == Node || User->use_empty())
   2194       continue;
   2195     // Convert the other matching node(s), too;
   2196     // otherwise, the DIVREM may get target-legalized into something
   2197     // target-specific that we won't be able to recognize.
   2198     unsigned UserOpc = User->getOpcode();
   2199     if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) &&
   2200         User->getOperand(0) == Op0 &&
   2201         User->getOperand(1) == Op1) {
   2202       if (!combined) {
   2203         if (UserOpc == OtherOpcode) {
   2204           SDVTList VTs = DAG.getVTList(VT, VT);
   2205           combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1);
   2206         } else if (UserOpc == DivRemOpc) {
   2207           combined = SDValue(User, 0);
   2208         } else {
   2209           assert(UserOpc == Opcode);
   2210           continue;
   2211         }
   2212       }
   2213       if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV)
   2214         CombineTo(User, combined);
   2215       else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM)
   2216         CombineTo(User, combined.getValue(1));
   2217     }
   2218   }
   2219   return combined;
   2220 }
   2221 
   2222 SDValue DAGCombiner::visitSDIV(SDNode *N) {
   2223   SDValue N0 = N->getOperand(0);
   2224   SDValue N1 = N->getOperand(1);
   2225   EVT VT = N->getValueType(0);
   2226 
   2227   // fold vector ops
   2228   if (VT.isVector())
   2229     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2230       return FoldedVOp;
   2231 
   2232   SDLoc DL(N);
   2233 
   2234   // fold (sdiv c1, c2) -> c1/c2
   2235   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2236   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2237   if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque())
   2238     return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C);
   2239   // fold (sdiv X, 1) -> X
   2240   if (N1C && N1C->isOne())
   2241     return N0;
   2242   // fold (sdiv X, -1) -> 0-X
   2243   if (N1C && N1C->isAllOnesValue())
   2244     return DAG.getNode(ISD::SUB, DL, VT,
   2245                        DAG.getConstant(0, DL, VT), N0);
   2246 
   2247   // If we know the sign bits of both operands are zero, strength reduce to a
   2248   // udiv instead.  Handles (X&15) /s 4 -> X&15 >> 2
   2249   if (!VT.isVector()) {
   2250     if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2251       return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1);
   2252   }
   2253 
   2254   // fold (sdiv X, pow2) -> simple ops after legalize
   2255   // FIXME: We check for the exact bit here because the generic lowering gives
   2256   // better results in that case. The target-specific lowering should learn how
   2257   // to handle exact sdivs efficiently.
   2258   if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2259       !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() &&
   2260       (N1C->getAPIntValue().isPowerOf2() ||
   2261        (-N1C->getAPIntValue()).isPowerOf2())) {
   2262     // Target-specific implementation of sdiv x, pow2.
   2263     if (SDValue Res = BuildSDIVPow2(N))
   2264       return Res;
   2265 
   2266     unsigned lg2 = N1C->getAPIntValue().countTrailingZeros();
   2267 
   2268     // Splat the sign bit into the register
   2269     SDValue SGN =
   2270         DAG.getNode(ISD::SRA, DL, VT, N0,
   2271                     DAG.getConstant(VT.getScalarSizeInBits() - 1, DL,
   2272                                     getShiftAmountTy(N0.getValueType())));
   2273     AddToWorklist(SGN.getNode());
   2274 
   2275     // Add (N0 < 0) ? abs2 - 1 : 0;
   2276     SDValue SRL =
   2277         DAG.getNode(ISD::SRL, DL, VT, SGN,
   2278                     DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL,
   2279                                     getShiftAmountTy(SGN.getValueType())));
   2280     SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL);
   2281     AddToWorklist(SRL.getNode());
   2282     AddToWorklist(ADD.getNode());    // Divide by pow2
   2283     SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD,
   2284                   DAG.getConstant(lg2, DL,
   2285                                   getShiftAmountTy(ADD.getValueType())));
   2286 
   2287     // If we're dividing by a positive value, we're done.  Otherwise, we must
   2288     // negate the result.
   2289     if (N1C->getAPIntValue().isNonNegative())
   2290       return SRA;
   2291 
   2292     AddToWorklist(SRA.getNode());
   2293     return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA);
   2294   }
   2295 
   2296   // If integer divide is expensive and we satisfy the requirements, emit an
   2297   // alternate sequence.  Targets may check function attributes for size/speed
   2298   // trade-offs.
   2299   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2300   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2301     if (SDValue Op = BuildSDIV(N))
   2302       return Op;
   2303 
   2304   // sdiv, srem -> sdivrem
   2305   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2306   // Otherwise, we break the simplification logic in visitREM().
   2307   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2308     if (SDValue DivRem = useDivRem(N))
   2309         return DivRem;
   2310 
   2311   // undef / X -> 0
   2312   if (N0.isUndef())
   2313     return DAG.getConstant(0, DL, VT);
   2314   // X / undef -> undef
   2315   if (N1.isUndef())
   2316     return N1;
   2317 
   2318   return SDValue();
   2319 }
   2320 
   2321 SDValue DAGCombiner::visitUDIV(SDNode *N) {
   2322   SDValue N0 = N->getOperand(0);
   2323   SDValue N1 = N->getOperand(1);
   2324   EVT VT = N->getValueType(0);
   2325 
   2326   // fold vector ops
   2327   if (VT.isVector())
   2328     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2329       return FoldedVOp;
   2330 
   2331   SDLoc DL(N);
   2332 
   2333   // fold (udiv c1, c2) -> c1/c2
   2334   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2335   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2336   if (N0C && N1C)
   2337     if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT,
   2338                                                     N0C, N1C))
   2339       return Folded;
   2340   // fold (udiv x, (1 << c)) -> x >>u c
   2341   if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2())
   2342     return DAG.getNode(ISD::SRL, DL, VT, N0,
   2343                        DAG.getConstant(N1C->getAPIntValue().logBase2(), DL,
   2344                                        getShiftAmountTy(N0.getValueType())));
   2345 
   2346   // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2
   2347   if (N1.getOpcode() == ISD::SHL) {
   2348     if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) {
   2349       if (SHC->getAPIntValue().isPowerOf2()) {
   2350         EVT ADDVT = N1.getOperand(1).getValueType();
   2351         SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT,
   2352                                   N1.getOperand(1),
   2353                                   DAG.getConstant(SHC->getAPIntValue()
   2354                                                                   .logBase2(),
   2355                                                   DL, ADDVT));
   2356         AddToWorklist(Add.getNode());
   2357         return DAG.getNode(ISD::SRL, DL, VT, N0, Add);
   2358       }
   2359     }
   2360   }
   2361 
   2362   // fold (udiv x, c) -> alternate
   2363   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2364   if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr))
   2365     if (SDValue Op = BuildUDIV(N))
   2366       return Op;
   2367 
   2368   // sdiv, srem -> sdivrem
   2369   // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true.
   2370   // Otherwise, we break the simplification logic in visitREM().
   2371   if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr))
   2372     if (SDValue DivRem = useDivRem(N))
   2373         return DivRem;
   2374 
   2375   // undef / X -> 0
   2376   if (N0.isUndef())
   2377     return DAG.getConstant(0, DL, VT);
   2378   // X / undef -> undef
   2379   if (N1.isUndef())
   2380     return N1;
   2381 
   2382   return SDValue();
   2383 }
   2384 
   2385 // handles ISD::SREM and ISD::UREM
   2386 SDValue DAGCombiner::visitREM(SDNode *N) {
   2387   unsigned Opcode = N->getOpcode();
   2388   SDValue N0 = N->getOperand(0);
   2389   SDValue N1 = N->getOperand(1);
   2390   EVT VT = N->getValueType(0);
   2391   bool isSigned = (Opcode == ISD::SREM);
   2392   SDLoc DL(N);
   2393 
   2394   // fold (rem c1, c2) -> c1%c2
   2395   ConstantSDNode *N0C = isConstOrConstSplat(N0);
   2396   ConstantSDNode *N1C = isConstOrConstSplat(N1);
   2397   if (N0C && N1C)
   2398     if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C))
   2399       return Folded;
   2400 
   2401   if (isSigned) {
   2402     // If we know the sign bits of both operands are zero, strength reduce to a
   2403     // urem instead.  Handles (X & 0x0FFFFFFF) %s 16 -> X&15
   2404     if (!VT.isVector()) {
   2405       if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0))
   2406         return DAG.getNode(ISD::UREM, DL, VT, N0, N1);
   2407     }
   2408   } else {
   2409     // fold (urem x, pow2) -> (and x, pow2-1)
   2410     if (N1C && !N1C->isNullValue() && !N1C->isOpaque() &&
   2411         N1C->getAPIntValue().isPowerOf2()) {
   2412       return DAG.getNode(ISD::AND, DL, VT, N0,
   2413                          DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT));
   2414     }
   2415     // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1))
   2416     if (N1.getOpcode() == ISD::SHL) {
   2417       ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0));
   2418       if (SHC && SHC->getAPIntValue().isPowerOf2()) {
   2419         APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits());
   2420         SDValue Add =
   2421             DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT));
   2422         AddToWorklist(Add.getNode());
   2423         return DAG.getNode(ISD::AND, DL, VT, N0, Add);
   2424       }
   2425     }
   2426   }
   2427 
   2428   AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes();
   2429 
   2430   // If X/C can be simplified by the division-by-constant logic, lower
   2431   // X%C to the equivalent of X-X/C*C.
   2432   // To avoid mangling nodes, this simplification requires that the combine()
   2433   // call for the speculative DIV must not cause a DIVREM conversion.  We guard
   2434   // against this by skipping the simplification if isIntDivCheap().  When
   2435   // div is not cheap, combine will not return a DIVREM.  Regardless,
   2436   // checking cheapness here makes sense since the simplification results in
   2437   // fatter code.
   2438   if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) {
   2439     unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV;
   2440     SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1);
   2441     AddToWorklist(Div.getNode());
   2442     SDValue OptimizedDiv = combine(Div.getNode());
   2443     if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) {
   2444       assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) &&
   2445              (OptimizedDiv.getOpcode() != ISD::SDIVREM));
   2446       SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1);
   2447       SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul);
   2448       AddToWorklist(Mul.getNode());
   2449       return Sub;
   2450     }
   2451   }
   2452 
   2453   // sdiv, srem -> sdivrem
   2454   if (SDValue DivRem = useDivRem(N))
   2455     return DivRem.getValue(1);
   2456 
   2457   // undef % X -> 0
   2458   if (N0.isUndef())
   2459     return DAG.getConstant(0, DL, VT);
   2460   // X % undef -> undef
   2461   if (N1.isUndef())
   2462     return N1;
   2463 
   2464   return SDValue();
   2465 }
   2466 
   2467 SDValue DAGCombiner::visitMULHS(SDNode *N) {
   2468   SDValue N0 = N->getOperand(0);
   2469   SDValue N1 = N->getOperand(1);
   2470   EVT VT = N->getValueType(0);
   2471   SDLoc DL(N);
   2472 
   2473   // fold (mulhs x, 0) -> 0
   2474   if (isNullConstant(N1))
   2475     return N1;
   2476   // fold (mulhs x, 1) -> (sra x, size(x)-1)
   2477   if (isOneConstant(N1)) {
   2478     SDLoc DL(N);
   2479     return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0,
   2480                        DAG.getConstant(N0.getValueType().getSizeInBits() - 1,
   2481                                        DL,
   2482                                        getShiftAmountTy(N0.getValueType())));
   2483   }
   2484   // fold (mulhs x, undef) -> 0
   2485   if (N0.isUndef() || N1.isUndef())
   2486     return DAG.getConstant(0, SDLoc(N), VT);
   2487 
   2488   // If the type twice as wide is legal, transform the mulhs to a wider multiply
   2489   // plus a shift.
   2490   if (VT.isSimple() && !VT.isVector()) {
   2491     MVT Simple = VT.getSimpleVT();
   2492     unsigned SimpleSize = Simple.getSizeInBits();
   2493     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2494     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2495       N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0);
   2496       N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1);
   2497       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2498       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2499             DAG.getConstant(SimpleSize, DL,
   2500                             getShiftAmountTy(N1.getValueType())));
   2501       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2502     }
   2503   }
   2504 
   2505   return SDValue();
   2506 }
   2507 
   2508 SDValue DAGCombiner::visitMULHU(SDNode *N) {
   2509   SDValue N0 = N->getOperand(0);
   2510   SDValue N1 = N->getOperand(1);
   2511   EVT VT = N->getValueType(0);
   2512   SDLoc DL(N);
   2513 
   2514   // fold (mulhu x, 0) -> 0
   2515   if (isNullConstant(N1))
   2516     return N1;
   2517   // fold (mulhu x, 1) -> 0
   2518   if (isOneConstant(N1))
   2519     return DAG.getConstant(0, DL, N0.getValueType());
   2520   // fold (mulhu x, undef) -> 0
   2521   if (N0.isUndef() || N1.isUndef())
   2522     return DAG.getConstant(0, DL, VT);
   2523 
   2524   // If the type twice as wide is legal, transform the mulhu to a wider multiply
   2525   // plus a shift.
   2526   if (VT.isSimple() && !VT.isVector()) {
   2527     MVT Simple = VT.getSimpleVT();
   2528     unsigned SimpleSize = Simple.getSizeInBits();
   2529     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2530     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2531       N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0);
   2532       N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1);
   2533       N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1);
   2534       N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1,
   2535             DAG.getConstant(SimpleSize, DL,
   2536                             getShiftAmountTy(N1.getValueType())));
   2537       return DAG.getNode(ISD::TRUNCATE, DL, VT, N1);
   2538     }
   2539   }
   2540 
   2541   return SDValue();
   2542 }
   2543 
   2544 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp
   2545 /// give the opcodes for the two computations that are being performed. Return
   2546 /// true if a simplification was made.
   2547 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp,
   2548                                                 unsigned HiOp) {
   2549   // If the high half is not needed, just compute the low half.
   2550   bool HiExists = N->hasAnyUseOfValue(1);
   2551   if (!HiExists &&
   2552       (!LegalOperations ||
   2553        TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) {
   2554     SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2555     return CombineTo(N, Res, Res);
   2556   }
   2557 
   2558   // If the low half is not needed, just compute the high half.
   2559   bool LoExists = N->hasAnyUseOfValue(0);
   2560   if (!LoExists &&
   2561       (!LegalOperations ||
   2562        TLI.isOperationLegal(HiOp, N->getValueType(1)))) {
   2563     SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2564     return CombineTo(N, Res, Res);
   2565   }
   2566 
   2567   // If both halves are used, return as it is.
   2568   if (LoExists && HiExists)
   2569     return SDValue();
   2570 
   2571   // If the two computed results can be simplified separately, separate them.
   2572   if (LoExists) {
   2573     SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops());
   2574     AddToWorklist(Lo.getNode());
   2575     SDValue LoOpt = combine(Lo.getNode());
   2576     if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() &&
   2577         (!LegalOperations ||
   2578          TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType())))
   2579       return CombineTo(N, LoOpt, LoOpt);
   2580   }
   2581 
   2582   if (HiExists) {
   2583     SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops());
   2584     AddToWorklist(Hi.getNode());
   2585     SDValue HiOpt = combine(Hi.getNode());
   2586     if (HiOpt.getNode() && HiOpt != Hi &&
   2587         (!LegalOperations ||
   2588          TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType())))
   2589       return CombineTo(N, HiOpt, HiOpt);
   2590   }
   2591 
   2592   return SDValue();
   2593 }
   2594 
   2595 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) {
   2596   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS))
   2597     return Res;
   2598 
   2599   EVT VT = N->getValueType(0);
   2600   SDLoc DL(N);
   2601 
   2602   // If the type is twice as wide is legal, transform the mulhu to a wider
   2603   // multiply plus a shift.
   2604   if (VT.isSimple() && !VT.isVector()) {
   2605     MVT Simple = VT.getSimpleVT();
   2606     unsigned SimpleSize = Simple.getSizeInBits();
   2607     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2608     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2609       SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0));
   2610       SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1));
   2611       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2612       // Compute the high part as N1.
   2613       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2614             DAG.getConstant(SimpleSize, DL,
   2615                             getShiftAmountTy(Lo.getValueType())));
   2616       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2617       // Compute the low part as N0.
   2618       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2619       return CombineTo(N, Lo, Hi);
   2620     }
   2621   }
   2622 
   2623   return SDValue();
   2624 }
   2625 
   2626 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) {
   2627   if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU))
   2628     return Res;
   2629 
   2630   EVT VT = N->getValueType(0);
   2631   SDLoc DL(N);
   2632 
   2633   // If the type is twice as wide is legal, transform the mulhu to a wider
   2634   // multiply plus a shift.
   2635   if (VT.isSimple() && !VT.isVector()) {
   2636     MVT Simple = VT.getSimpleVT();
   2637     unsigned SimpleSize = Simple.getSizeInBits();
   2638     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2);
   2639     if (TLI.isOperationLegal(ISD::MUL, NewVT)) {
   2640       SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0));
   2641       SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1));
   2642       Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi);
   2643       // Compute the high part as N1.
   2644       Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo,
   2645             DAG.getConstant(SimpleSize, DL,
   2646                             getShiftAmountTy(Lo.getValueType())));
   2647       Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi);
   2648       // Compute the low part as N0.
   2649       Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo);
   2650       return CombineTo(N, Lo, Hi);
   2651     }
   2652   }
   2653 
   2654   return SDValue();
   2655 }
   2656 
   2657 SDValue DAGCombiner::visitSMULO(SDNode *N) {
   2658   // (smulo x, 2) -> (saddo x, x)
   2659   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2660     if (C2->getAPIntValue() == 2)
   2661       return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(),
   2662                          N->getOperand(0), N->getOperand(0));
   2663 
   2664   return SDValue();
   2665 }
   2666 
   2667 SDValue DAGCombiner::visitUMULO(SDNode *N) {
   2668   // (umulo x, 2) -> (uaddo x, x)
   2669   if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1)))
   2670     if (C2->getAPIntValue() == 2)
   2671       return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(),
   2672                          N->getOperand(0), N->getOperand(0));
   2673 
   2674   return SDValue();
   2675 }
   2676 
   2677 SDValue DAGCombiner::visitIMINMAX(SDNode *N) {
   2678   SDValue N0 = N->getOperand(0);
   2679   SDValue N1 = N->getOperand(1);
   2680   EVT VT = N0.getValueType();
   2681 
   2682   // fold vector ops
   2683   if (VT.isVector())
   2684     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   2685       return FoldedVOp;
   2686 
   2687   // fold (add c1, c2) -> c1+c2
   2688   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   2689   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   2690   if (N0C && N1C)
   2691     return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C);
   2692 
   2693   // canonicalize constant to RHS
   2694   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   2695      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   2696     return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0);
   2697 
   2698   return SDValue();
   2699 }
   2700 
   2701 /// If this is a binary operator with two operands of the same opcode, try to
   2702 /// simplify it.
   2703 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
   2704   SDValue N0 = N->getOperand(0), N1 = N->getOperand(1);
   2705   EVT VT = N0.getValueType();
   2706   assert(N0.getOpcode() == N1.getOpcode() && "Bad input!");
   2707 
   2708   // Bail early if none of these transforms apply.
   2709   if (N0.getNode()->getNumOperands() == 0) return SDValue();
   2710 
   2711   // For each of OP in AND/OR/XOR:
   2712   // fold (OP (zext x), (zext y)) -> (zext (OP x, y))
   2713   // fold (OP (sext x), (sext y)) -> (sext (OP x, y))
   2714   // fold (OP (aext x), (aext y)) -> (aext (OP x, y))
   2715   // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y))
   2716   // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
   2717   //
   2718   // do not sink logical op inside of a vector extend, since it may combine
   2719   // into a vsetcc.
   2720   EVT Op0VT = N0.getOperand(0).getValueType();
   2721   if ((N0.getOpcode() == ISD::ZERO_EXTEND ||
   2722        N0.getOpcode() == ISD::SIGN_EXTEND ||
   2723        N0.getOpcode() == ISD::BSWAP ||
   2724        // Avoid infinite looping with PromoteIntBinOp.
   2725        (N0.getOpcode() == ISD::ANY_EXTEND &&
   2726         (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) ||
   2727        (N0.getOpcode() == ISD::TRUNCATE &&
   2728         (!TLI.isZExtFree(VT, Op0VT) ||
   2729          !TLI.isTruncateFree(Op0VT, VT)) &&
   2730         TLI.isTypeLegal(Op0VT))) &&
   2731       !VT.isVector() &&
   2732       Op0VT == N1.getOperand(0).getValueType() &&
   2733       (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) {
   2734     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2735                                  N0.getOperand(0).getValueType(),
   2736                                  N0.getOperand(0), N1.getOperand(0));
   2737     AddToWorklist(ORNode.getNode());
   2738     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode);
   2739   }
   2740 
   2741   // For each of OP in SHL/SRL/SRA/AND...
   2742   //   fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z)
   2743   //   fold (or  (OP x, z), (OP y, z)) -> (OP (or  x, y), z)
   2744   //   fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z)
   2745   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL ||
   2746        N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) &&
   2747       N0.getOperand(1) == N1.getOperand(1)) {
   2748     SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0),
   2749                                  N0.getOperand(0).getValueType(),
   2750                                  N0.getOperand(0), N1.getOperand(0));
   2751     AddToWorklist(ORNode.getNode());
   2752     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT,
   2753                        ORNode, N0.getOperand(1));
   2754   }
   2755 
   2756   // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B))
   2757   // Only perform this optimization up until type legalization, before
   2758   // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by
   2759   // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and
   2760   // we don't want to undo this promotion.
   2761   // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper
   2762   // on scalars.
   2763   if ((N0.getOpcode() == ISD::BITCAST ||
   2764        N0.getOpcode() == ISD::SCALAR_TO_VECTOR) &&
   2765        Level <= AfterLegalizeTypes) {
   2766     SDValue In0 = N0.getOperand(0);
   2767     SDValue In1 = N1.getOperand(0);
   2768     EVT In0Ty = In0.getValueType();
   2769     EVT In1Ty = In1.getValueType();
   2770     SDLoc DL(N);
   2771     // If both incoming values are integers, and the original types are the
   2772     // same.
   2773     if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) {
   2774       SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1);
   2775       SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op);
   2776       AddToWorklist(Op.getNode());
   2777       return BC;
   2778     }
   2779   }
   2780 
   2781   // Xor/and/or are indifferent to the swizzle operation (shuffle of one value).
   2782   // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B))
   2783   // If both shuffles use the same mask, and both shuffle within a single
   2784   // vector, then it is worthwhile to move the swizzle after the operation.
   2785   // The type-legalizer generates this pattern when loading illegal
   2786   // vector types from memory. In many cases this allows additional shuffle
   2787   // optimizations.
   2788   // There are other cases where moving the shuffle after the xor/and/or
   2789   // is profitable even if shuffles don't perform a swizzle.
   2790   // If both shuffles use the same mask, and both shuffles have the same first
   2791   // or second operand, then it might still be profitable to move the shuffle
   2792   // after the xor/and/or operation.
   2793   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) {
   2794     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0);
   2795     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1);
   2796 
   2797     assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
   2798            "Inputs to shuffles are not the same type");
   2799 
   2800     // Check that both shuffles use the same mask. The masks are known to be of
   2801     // the same length because the result vector type is the same.
   2802     // Check also that shuffles have only one use to avoid introducing extra
   2803     // instructions.
   2804     if (SVN0->hasOneUse() && SVN1->hasOneUse() &&
   2805         SVN0->getMask().equals(SVN1->getMask())) {
   2806       SDValue ShOp = N0->getOperand(1);
   2807 
   2808       // Don't try to fold this node if it requires introducing a
   2809       // build vector of all zeros that might be illegal at this stage.
   2810       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
   2811         if (!LegalTypes)
   2812           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2813         else
   2814           ShOp = SDValue();
   2815       }
   2816 
   2817       // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C)
   2818       // (OR  (shuf (A, C), shuf (B, C)) -> shuf (OR  (A, B), C)
   2819       // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0)
   2820       if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) {
   2821         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2822                                       N0->getOperand(0), N1->getOperand(0));
   2823         AddToWorklist(NewNode.getNode());
   2824         return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp,
   2825                                     SVN0->getMask());
   2826       }
   2827 
   2828       // Don't try to fold this node if it requires introducing a
   2829       // build vector of all zeros that might be illegal at this stage.
   2830       ShOp = N0->getOperand(0);
   2831       if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) {
   2832         if (!LegalTypes)
   2833           ShOp = DAG.getConstant(0, SDLoc(N), VT);
   2834         else
   2835           ShOp = SDValue();
   2836       }
   2837 
   2838       // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B))
   2839       // (OR  (shuf (C, A), shuf (C, B)) -> shuf (C, OR  (A, B))
   2840       // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B))
   2841       if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) {
   2842         SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   2843                                       N0->getOperand(1), N1->getOperand(1));
   2844         AddToWorklist(NewNode.getNode());
   2845         return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode,
   2846                                     SVN0->getMask());
   2847       }
   2848     }
   2849   }
   2850 
   2851   return SDValue();
   2852 }
   2853 
   2854 /// This contains all DAGCombine rules which reduce two values combined by
   2855 /// an And operation to a single value. This makes them reusable in the context
   2856 /// of visitSELECT(). Rules involving constants are not included as
   2857 /// visitSELECT() already handles those cases.
   2858 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1,
   2859                                   SDNode *LocReference) {
   2860   EVT VT = N1.getValueType();
   2861 
   2862   // fold (and x, undef) -> 0
   2863   if (N0.isUndef() || N1.isUndef())
   2864     return DAG.getConstant(0, SDLoc(LocReference), VT);
   2865   // fold (and (setcc x), (setcc y)) -> (setcc (and x, y))
   2866   SDValue LL, LR, RL, RR, CC0, CC1;
   2867   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   2868     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   2869     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   2870 
   2871     if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 &&
   2872         LL.getValueType().isInteger()) {
   2873       // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0)
   2874       if (isNullConstant(LR) && Op1 == ISD::SETEQ) {
   2875         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2876                                      LR.getValueType(), LL, RL);
   2877         AddToWorklist(ORNode.getNode());
   2878         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2879       }
   2880       if (isAllOnesConstant(LR)) {
   2881         // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1)
   2882         if (Op1 == ISD::SETEQ) {
   2883           SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0),
   2884                                         LR.getValueType(), LL, RL);
   2885           AddToWorklist(ANDNode.getNode());
   2886           return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   2887         }
   2888         // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1)
   2889         if (Op1 == ISD::SETGT) {
   2890           SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0),
   2891                                        LR.getValueType(), LL, RL);
   2892           AddToWorklist(ORNode.getNode());
   2893           return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   2894         }
   2895       }
   2896     }
   2897     // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2)
   2898     if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) &&
   2899         Op0 == Op1 && LL.getValueType().isInteger() &&
   2900       Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) ||
   2901                             (isAllOnesConstant(LR) && isNullConstant(RR)))) {
   2902       SDLoc DL(N0);
   2903       SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(),
   2904                                     LL, DAG.getConstant(1, DL,
   2905                                                         LL.getValueType()));
   2906       AddToWorklist(ADDNode.getNode());
   2907       return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode,
   2908                           DAG.getConstant(2, DL, LL.getValueType()),
   2909                           ISD::SETUGE);
   2910     }
   2911     // canonicalize equivalent to ll == rl
   2912     if (LL == RR && LR == RL) {
   2913       Op1 = ISD::getSetCCSwappedOperands(Op1);
   2914       std::swap(RL, RR);
   2915     }
   2916     if (LL == RL && LR == RR) {
   2917       bool isInteger = LL.getValueType().isInteger();
   2918       ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger);
   2919       if (Result != ISD::SETCC_INVALID &&
   2920           (!LegalOperations ||
   2921            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   2922             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   2923         EVT CCVT = getSetCCResultType(LL.getValueType());
   2924         if (N0.getValueType() == CCVT ||
   2925             (!LegalOperations && N0.getValueType() == MVT::i1))
   2926           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   2927                               LL, LR, Result);
   2928       }
   2929     }
   2930   }
   2931 
   2932   if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL &&
   2933       VT.getSizeInBits() <= 64) {
   2934     if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   2935       APInt ADDC = ADDI->getAPIntValue();
   2936       if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2937         // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal
   2938         // immediate for an add, but it is legal if its top c2 bits are set,
   2939         // transform the ADD so the immediate doesn't need to be materialized
   2940         // in a register.
   2941         if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) {
   2942           APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(),
   2943                                              SRLI->getZExtValue());
   2944           if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) {
   2945             ADDC |= Mask;
   2946             if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) {
   2947               SDLoc DL(N0);
   2948               SDValue NewAdd =
   2949                 DAG.getNode(ISD::ADD, DL, VT,
   2950                             N0.getOperand(0), DAG.getConstant(ADDC, DL, VT));
   2951               CombineTo(N0.getNode(), NewAdd);
   2952               // Return N so it doesn't get rechecked!
   2953               return SDValue(LocReference, 0);
   2954             }
   2955           }
   2956         }
   2957       }
   2958     }
   2959   }
   2960 
   2961   // Reduce bit extract of low half of an integer to the narrower type.
   2962   // (and (srl i64:x, K), KMask) ->
   2963   //   (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask)
   2964   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   2965     if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) {
   2966       if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   2967         unsigned Size = VT.getSizeInBits();
   2968         const APInt &AndMask = CAnd->getAPIntValue();
   2969         unsigned ShiftBits = CShift->getZExtValue();
   2970         unsigned MaskBits = AndMask.countTrailingOnes();
   2971         EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2);
   2972 
   2973         if (APIntOps::isMask(AndMask) &&
   2974             // Required bits must not span the two halves of the integer and
   2975             // must fit in the half size type.
   2976             (ShiftBits + MaskBits <= Size / 2) &&
   2977             TLI.isNarrowingProfitable(VT, HalfVT) &&
   2978             TLI.isTypeDesirableForOp(ISD::AND, HalfVT) &&
   2979             TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) &&
   2980             TLI.isTruncateFree(VT, HalfVT) &&
   2981             TLI.isZExtFree(HalfVT, VT)) {
   2982           // The isNarrowingProfitable is to avoid regressions on PPC and
   2983           // AArch64 which match a few 64-bit bit insert / bit extract patterns
   2984           // on downstream users of this. Those patterns could probably be
   2985           // extended to handle extensions mixed in.
   2986 
   2987           SDValue SL(N0);
   2988           assert(ShiftBits != 0 && MaskBits <= Size);
   2989 
   2990           // Extracting the highest bit of the low half.
   2991           EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout());
   2992           SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT,
   2993                                       N0.getOperand(0));
   2994 
   2995           SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT);
   2996           SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT);
   2997           SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK);
   2998           SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask);
   2999           return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And);
   3000         }
   3001       }
   3002     }
   3003   }
   3004 
   3005   return SDValue();
   3006 }
   3007 
   3008 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN,
   3009                                    EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT,
   3010                                    bool &NarrowLoad) {
   3011   uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits();
   3012 
   3013   if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue()))
   3014     return false;
   3015 
   3016   ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits);
   3017   LoadedVT = LoadN->getMemoryVT();
   3018 
   3019   if (ExtVT == LoadedVT &&
   3020       (!LegalOperations ||
   3021        TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) {
   3022     // ZEXTLOAD will match without needing to change the size of the value being
   3023     // loaded.
   3024     NarrowLoad = false;
   3025     return true;
   3026   }
   3027 
   3028   // Do not change the width of a volatile load.
   3029   if (LoadN->isVolatile())
   3030     return false;
   3031 
   3032   // Do not generate loads of non-round integer types since these can
   3033   // be expensive (and would be wrong if the type is not byte sized).
   3034   if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound())
   3035     return false;
   3036 
   3037   if (LegalOperations &&
   3038       !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))
   3039     return false;
   3040 
   3041   if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT))
   3042     return false;
   3043 
   3044   NarrowLoad = true;
   3045   return true;
   3046 }
   3047 
   3048 SDValue DAGCombiner::visitAND(SDNode *N) {
   3049   SDValue N0 = N->getOperand(0);
   3050   SDValue N1 = N->getOperand(1);
   3051   EVT VT = N1.getValueType();
   3052 
   3053   // fold vector ops
   3054   if (VT.isVector()) {
   3055     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3056       return FoldedVOp;
   3057 
   3058     // fold (and x, 0) -> 0, vector edition
   3059     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3060       // do not return N0, because undef node may exist in N0
   3061       return DAG.getConstant(
   3062           APInt::getNullValue(
   3063               N0.getValueType().getScalarType().getSizeInBits()),
   3064           SDLoc(N), N0.getValueType());
   3065     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3066       // do not return N1, because undef node may exist in N1
   3067       return DAG.getConstant(
   3068           APInt::getNullValue(
   3069               N1.getValueType().getScalarType().getSizeInBits()),
   3070           SDLoc(N), N1.getValueType());
   3071 
   3072     // fold (and x, -1) -> x, vector edition
   3073     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3074       return N1;
   3075     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3076       return N0;
   3077   }
   3078 
   3079   // fold (and c1, c2) -> c1&c2
   3080   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3081   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3082   if (N0C && N1C && !N1C->isOpaque())
   3083     return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C);
   3084   // canonicalize constant to RHS
   3085   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   3086      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   3087     return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0);
   3088   // fold (and x, -1) -> x
   3089   if (isAllOnesConstant(N1))
   3090     return N0;
   3091   // if (and x, c) is known to be zero, return 0
   3092   unsigned BitWidth = VT.getScalarType().getSizeInBits();
   3093   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   3094                                    APInt::getAllOnesValue(BitWidth)))
   3095     return DAG.getConstant(0, SDLoc(N), VT);
   3096   // reassociate and
   3097   if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1))
   3098     return RAND;
   3099   // fold (and (or x, C), D) -> D if (C & D) == D
   3100   if (N1C && N0.getOpcode() == ISD::OR)
   3101     if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   3102       if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
   3103         return N1;
   3104   // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits.
   3105   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   3106     SDValue N0Op0 = N0.getOperand(0);
   3107     APInt Mask = ~N1C->getAPIntValue();
   3108     Mask = Mask.trunc(N0Op0.getValueSizeInBits());
   3109     if (DAG.MaskedValueIsZero(N0Op0, Mask)) {
   3110       SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N),
   3111                                  N0.getValueType(), N0Op0);
   3112 
   3113       // Replace uses of the AND with uses of the Zero extend node.
   3114       CombineTo(N, Zext);
   3115 
   3116       // We actually want to replace all uses of the any_extend with the
   3117       // zero_extend, to avoid duplicating things.  This will later cause this
   3118       // AND to be folded.
   3119       CombineTo(N0.getNode(), Zext);
   3120       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3121     }
   3122   }
   3123   // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) ->
   3124   // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must
   3125   // already be zero by virtue of the width of the base type of the load.
   3126   //
   3127   // the 'X' node here can either be nothing or an extract_vector_elt to catch
   3128   // more cases.
   3129   if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   3130        N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() &&
   3131        N0.getOperand(0).getOpcode() == ISD::LOAD &&
   3132        N0.getOperand(0).getResNo() == 0) ||
   3133       (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) {
   3134     LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ?
   3135                                          N0 : N0.getOperand(0) );
   3136 
   3137     // Get the constant (if applicable) the zero'th operand is being ANDed with.
   3138     // This can be a pure constant or a vector splat, in which case we treat the
   3139     // vector as a scalar and use the splat value.
   3140     APInt Constant = APInt::getNullValue(1);
   3141     if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) {
   3142       Constant = C->getAPIntValue();
   3143     } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) {
   3144       APInt SplatValue, SplatUndef;
   3145       unsigned SplatBitSize;
   3146       bool HasAnyUndefs;
   3147       bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef,
   3148                                              SplatBitSize, HasAnyUndefs);
   3149       if (IsSplat) {
   3150         // Undef bits can contribute to a possible optimisation if set, so
   3151         // set them.
   3152         SplatValue |= SplatUndef;
   3153 
   3154         // The splat value may be something like "0x00FFFFFF", which means 0 for
   3155         // the first vector value and FF for the rest, repeating. We need a mask
   3156         // that will apply equally to all members of the vector, so AND all the
   3157         // lanes of the constant together.
   3158         EVT VT = Vector->getValueType(0);
   3159         unsigned BitWidth = VT.getVectorElementType().getSizeInBits();
   3160 
   3161         // If the splat value has been compressed to a bitlength lower
   3162         // than the size of the vector lane, we need to re-expand it to
   3163         // the lane size.
   3164         if (BitWidth > SplatBitSize)
   3165           for (SplatValue = SplatValue.zextOrTrunc(BitWidth);
   3166                SplatBitSize < BitWidth;
   3167                SplatBitSize = SplatBitSize * 2)
   3168             SplatValue |= SplatValue.shl(SplatBitSize);
   3169 
   3170         // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a
   3171         // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value.
   3172         if (SplatBitSize % BitWidth == 0) {
   3173           Constant = APInt::getAllOnesValue(BitWidth);
   3174           for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i)
   3175             Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth);
   3176         }
   3177       }
   3178     }
   3179 
   3180     // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is
   3181     // actually legal and isn't going to get expanded, else this is a false
   3182     // optimisation.
   3183     bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD,
   3184                                                     Load->getValueType(0),
   3185                                                     Load->getMemoryVT());
   3186 
   3187     // Resize the constant to the same size as the original memory access before
   3188     // extension. If it is still the AllOnesValue then this AND is completely
   3189     // unneeded.
   3190     Constant =
   3191       Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits());
   3192 
   3193     bool B;
   3194     switch (Load->getExtensionType()) {
   3195     default: B = false; break;
   3196     case ISD::EXTLOAD: B = CanZextLoadProfitably; break;
   3197     case ISD::ZEXTLOAD:
   3198     case ISD::NON_EXTLOAD: B = true; break;
   3199     }
   3200 
   3201     if (B && Constant.isAllOnesValue()) {
   3202       // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to
   3203       // preserve semantics once we get rid of the AND.
   3204       SDValue NewLoad(Load, 0);
   3205       if (Load->getExtensionType() == ISD::EXTLOAD) {
   3206         NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD,
   3207                               Load->getValueType(0), SDLoc(Load),
   3208                               Load->getChain(), Load->getBasePtr(),
   3209                               Load->getOffset(), Load->getMemoryVT(),
   3210                               Load->getMemOperand());
   3211         // Replace uses of the EXTLOAD with the new ZEXTLOAD.
   3212         if (Load->getNumValues() == 3) {
   3213           // PRE/POST_INC loads have 3 values.
   3214           SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1),
   3215                            NewLoad.getValue(2) };
   3216           CombineTo(Load, To, 3, true);
   3217         } else {
   3218           CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1));
   3219         }
   3220       }
   3221 
   3222       // Fold the AND away, taking care not to fold to the old load node if we
   3223       // replaced it.
   3224       CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0);
   3225 
   3226       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   3227     }
   3228   }
   3229 
   3230   // fold (and (load x), 255) -> (zextload x, i8)
   3231   // fold (and (extload x, i16), 255) -> (zextload x, i8)
   3232   // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8)
   3233   if (N1C && (N0.getOpcode() == ISD::LOAD ||
   3234               (N0.getOpcode() == ISD::ANY_EXTEND &&
   3235                N0.getOperand(0).getOpcode() == ISD::LOAD))) {
   3236     bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND;
   3237     LoadSDNode *LN0 = HasAnyExt
   3238       ? cast<LoadSDNode>(N0.getOperand(0))
   3239       : cast<LoadSDNode>(N0);
   3240     if (LN0->getExtensionType() != ISD::SEXTLOAD &&
   3241         LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) {
   3242       auto NarrowLoad = false;
   3243       EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT;
   3244       EVT ExtVT, LoadedVT;
   3245       if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT,
   3246                            NarrowLoad)) {
   3247         if (!NarrowLoad) {
   3248           SDValue NewLoad =
   3249             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3250                            LN0->getChain(), LN0->getBasePtr(), ExtVT,
   3251                            LN0->getMemOperand());
   3252           AddToWorklist(N);
   3253           CombineTo(LN0, NewLoad, NewLoad.getValue(1));
   3254           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3255         } else {
   3256           EVT PtrType = LN0->getOperand(1).getValueType();
   3257 
   3258           unsigned Alignment = LN0->getAlignment();
   3259           SDValue NewPtr = LN0->getBasePtr();
   3260 
   3261           // For big endian targets, we need to add an offset to the pointer
   3262           // to load the correct bytes.  For little endian systems, we merely
   3263           // need to read fewer bytes from the same pointer.
   3264           if (DAG.getDataLayout().isBigEndian()) {
   3265             unsigned LVTStoreBytes = LoadedVT.getStoreSize();
   3266             unsigned EVTStoreBytes = ExtVT.getStoreSize();
   3267             unsigned PtrOff = LVTStoreBytes - EVTStoreBytes;
   3268             SDLoc DL(LN0);
   3269             NewPtr = DAG.getNode(ISD::ADD, DL, PtrType,
   3270                                  NewPtr, DAG.getConstant(PtrOff, DL, PtrType));
   3271             Alignment = MinAlign(Alignment, PtrOff);
   3272           }
   3273 
   3274           AddToWorklist(NewPtr.getNode());
   3275 
   3276           SDValue Load =
   3277             DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy,
   3278                            LN0->getChain(), NewPtr,
   3279                            LN0->getPointerInfo(),
   3280                            ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   3281                            LN0->isInvariant(), Alignment, LN0->getAAInfo());
   3282           AddToWorklist(N);
   3283           CombineTo(LN0, Load, Load.getValue(1));
   3284           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3285         }
   3286       }
   3287     }
   3288   }
   3289 
   3290   if (SDValue Combined = visitANDLike(N0, N1, N))
   3291     return Combined;
   3292 
   3293   // Simplify: (and (op x...), (op y...))  -> (op (and x, y))
   3294   if (N0.getOpcode() == N1.getOpcode())
   3295     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3296       return Tmp;
   3297 
   3298   // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1)
   3299   // fold (and (sra)) -> (and (srl)) when possible.
   3300   if (!VT.isVector() &&
   3301       SimplifyDemandedBits(SDValue(N, 0)))
   3302     return SDValue(N, 0);
   3303 
   3304   // fold (zext_inreg (extload x)) -> (zextload x)
   3305   if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) {
   3306     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3307     EVT MemVT = LN0->getMemoryVT();
   3308     // If we zero all the possible extended bits, then we can turn this into
   3309     // a zextload if we are running before legalize or the operation is legal.
   3310     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3311     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3312                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3313         ((!LegalOperations && !LN0->isVolatile()) ||
   3314          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3315       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3316                                        LN0->getChain(), LN0->getBasePtr(),
   3317                                        MemVT, LN0->getMemOperand());
   3318       AddToWorklist(N);
   3319       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3320       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3321     }
   3322   }
   3323   // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use
   3324   if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   3325       N0.hasOneUse()) {
   3326     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   3327     EVT MemVT = LN0->getMemoryVT();
   3328     // If we zero all the possible extended bits, then we can turn this into
   3329     // a zextload if we are running before legalize or the operation is legal.
   3330     unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits();
   3331     if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth,
   3332                            BitWidth - MemVT.getScalarType().getSizeInBits())) &&
   3333         ((!LegalOperations && !LN0->isVolatile()) ||
   3334          TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) {
   3335       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT,
   3336                                        LN0->getChain(), LN0->getBasePtr(),
   3337                                        MemVT, LN0->getMemOperand());
   3338       AddToWorklist(N);
   3339       CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   3340       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   3341     }
   3342   }
   3343   // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const)
   3344   if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) {
   3345     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   3346                                            N0.getOperand(1), false))
   3347       return BSwap;
   3348   }
   3349 
   3350   return SDValue();
   3351 }
   3352 
   3353 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16.
   3354 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1,
   3355                                         bool DemandHighBits) {
   3356   if (!LegalOperations)
   3357     return SDValue();
   3358 
   3359   EVT VT = N->getValueType(0);
   3360   if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16)
   3361     return SDValue();
   3362   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3363     return SDValue();
   3364 
   3365   // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00)
   3366   bool LookPassAnd0 = false;
   3367   bool LookPassAnd1 = false;
   3368   if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL)
   3369       std::swap(N0, N1);
   3370   if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL)
   3371       std::swap(N0, N1);
   3372   if (N0.getOpcode() == ISD::AND) {
   3373     if (!N0.getNode()->hasOneUse())
   3374       return SDValue();
   3375     ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3376     if (!N01C || N01C->getZExtValue() != 0xFF00)
   3377       return SDValue();
   3378     N0 = N0.getOperand(0);
   3379     LookPassAnd0 = true;
   3380   }
   3381 
   3382   if (N1.getOpcode() == ISD::AND) {
   3383     if (!N1.getNode()->hasOneUse())
   3384       return SDValue();
   3385     ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3386     if (!N11C || N11C->getZExtValue() != 0xFF)
   3387       return SDValue();
   3388     N1 = N1.getOperand(0);
   3389     LookPassAnd1 = true;
   3390   }
   3391 
   3392   if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL)
   3393     std::swap(N0, N1);
   3394   if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL)
   3395     return SDValue();
   3396   if (!N0.getNode()->hasOneUse() ||
   3397       !N1.getNode()->hasOneUse())
   3398     return SDValue();
   3399 
   3400   ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3401   ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1));
   3402   if (!N01C || !N11C)
   3403     return SDValue();
   3404   if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8)
   3405     return SDValue();
   3406 
   3407   // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8)
   3408   SDValue N00 = N0->getOperand(0);
   3409   if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) {
   3410     if (!N00.getNode()->hasOneUse())
   3411       return SDValue();
   3412     ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1));
   3413     if (!N001C || N001C->getZExtValue() != 0xFF)
   3414       return SDValue();
   3415     N00 = N00.getOperand(0);
   3416     LookPassAnd0 = true;
   3417   }
   3418 
   3419   SDValue N10 = N1->getOperand(0);
   3420   if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) {
   3421     if (!N10.getNode()->hasOneUse())
   3422       return SDValue();
   3423     ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1));
   3424     if (!N101C || N101C->getZExtValue() != 0xFF00)
   3425       return SDValue();
   3426     N10 = N10.getOperand(0);
   3427     LookPassAnd1 = true;
   3428   }
   3429 
   3430   if (N00 != N10)
   3431     return SDValue();
   3432 
   3433   // Make sure everything beyond the low halfword gets set to zero since the SRL
   3434   // 16 will clear the top bits.
   3435   unsigned OpSizeInBits = VT.getSizeInBits();
   3436   if (DemandHighBits && OpSizeInBits > 16) {
   3437     // If the left-shift isn't masked out then the only way this is a bswap is
   3438     // if all bits beyond the low 8 are 0. In that case the entire pattern
   3439     // reduces to a left shift anyway: leave it for other parts of the combiner.
   3440     if (!LookPassAnd0)
   3441       return SDValue();
   3442 
   3443     // However, if the right shift isn't masked out then it might be because
   3444     // it's not needed. See if we can spot that too.
   3445     if (!LookPassAnd1 &&
   3446         !DAG.MaskedValueIsZero(
   3447             N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16)))
   3448       return SDValue();
   3449   }
   3450 
   3451   SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00);
   3452   if (OpSizeInBits > 16) {
   3453     SDLoc DL(N);
   3454     Res = DAG.getNode(ISD::SRL, DL, VT, Res,
   3455                       DAG.getConstant(OpSizeInBits - 16, DL,
   3456                                       getShiftAmountTy(VT)));
   3457   }
   3458   return Res;
   3459 }
   3460 
   3461 /// Return true if the specified node is an element that makes up a 32-bit
   3462 /// packed halfword byteswap.
   3463 /// ((x & 0x000000ff) << 8) |
   3464 /// ((x & 0x0000ff00) >> 8) |
   3465 /// ((x & 0x00ff0000) << 8) |
   3466 /// ((x & 0xff000000) >> 8)
   3467 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) {
   3468   if (!N.getNode()->hasOneUse())
   3469     return false;
   3470 
   3471   unsigned Opc = N.getOpcode();
   3472   if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL)
   3473     return false;
   3474 
   3475   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3476   if (!N1C)
   3477     return false;
   3478 
   3479   unsigned Num;
   3480   switch (N1C->getZExtValue()) {
   3481   default:
   3482     return false;
   3483   case 0xFF:       Num = 0; break;
   3484   case 0xFF00:     Num = 1; break;
   3485   case 0xFF0000:   Num = 2; break;
   3486   case 0xFF000000: Num = 3; break;
   3487   }
   3488 
   3489   // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00).
   3490   SDValue N0 = N.getOperand(0);
   3491   if (Opc == ISD::AND) {
   3492     if (Num == 0 || Num == 2) {
   3493       // (x >> 8) & 0xff
   3494       // (x >> 8) & 0xff0000
   3495       if (N0.getOpcode() != ISD::SRL)
   3496         return false;
   3497       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3498       if (!C || C->getZExtValue() != 8)
   3499         return false;
   3500     } else {
   3501       // (x << 8) & 0xff00
   3502       // (x << 8) & 0xff000000
   3503       if (N0.getOpcode() != ISD::SHL)
   3504         return false;
   3505       ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   3506       if (!C || C->getZExtValue() != 8)
   3507         return false;
   3508     }
   3509   } else if (Opc == ISD::SHL) {
   3510     // (x & 0xff) << 8
   3511     // (x & 0xff0000) << 8
   3512     if (Num != 0 && Num != 2)
   3513       return false;
   3514     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3515     if (!C || C->getZExtValue() != 8)
   3516       return false;
   3517   } else { // Opc == ISD::SRL
   3518     // (x & 0xff00) >> 8
   3519     // (x & 0xff000000) >> 8
   3520     if (Num != 1 && Num != 3)
   3521       return false;
   3522     ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1));
   3523     if (!C || C->getZExtValue() != 8)
   3524       return false;
   3525   }
   3526 
   3527   if (Parts[Num])
   3528     return false;
   3529 
   3530   Parts[Num] = N0.getOperand(0).getNode();
   3531   return true;
   3532 }
   3533 
   3534 /// Match a 32-bit packed halfword bswap. That is
   3535 /// ((x & 0x000000ff) << 8) |
   3536 /// ((x & 0x0000ff00) >> 8) |
   3537 /// ((x & 0x00ff0000) << 8) |
   3538 /// ((x & 0xff000000) >> 8)
   3539 /// => (rotl (bswap x), 16)
   3540 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) {
   3541   if (!LegalOperations)
   3542     return SDValue();
   3543 
   3544   EVT VT = N->getValueType(0);
   3545   if (VT != MVT::i32)
   3546     return SDValue();
   3547   if (!TLI.isOperationLegal(ISD::BSWAP, VT))
   3548     return SDValue();
   3549 
   3550   // Look for either
   3551   // (or (or (and), (and)), (or (and), (and)))
   3552   // (or (or (or (and), (and)), (and)), (and))
   3553   if (N0.getOpcode() != ISD::OR)
   3554     return SDValue();
   3555   SDValue N00 = N0.getOperand(0);
   3556   SDValue N01 = N0.getOperand(1);
   3557   SDNode *Parts[4] = {};
   3558 
   3559   if (N1.getOpcode() == ISD::OR &&
   3560       N00.getNumOperands() == 2 && N01.getNumOperands() == 2) {
   3561     // (or (or (and), (and)), (or (and), (and)))
   3562     SDValue N000 = N00.getOperand(0);
   3563     if (!isBSwapHWordElement(N000, Parts))
   3564       return SDValue();
   3565 
   3566     SDValue N001 = N00.getOperand(1);
   3567     if (!isBSwapHWordElement(N001, Parts))
   3568       return SDValue();
   3569     SDValue N010 = N01.getOperand(0);
   3570     if (!isBSwapHWordElement(N010, Parts))
   3571       return SDValue();
   3572     SDValue N011 = N01.getOperand(1);
   3573     if (!isBSwapHWordElement(N011, Parts))
   3574       return SDValue();
   3575   } else {
   3576     // (or (or (or (and), (and)), (and)), (and))
   3577     if (!isBSwapHWordElement(N1, Parts))
   3578       return SDValue();
   3579     if (!isBSwapHWordElement(N01, Parts))
   3580       return SDValue();
   3581     if (N00.getOpcode() != ISD::OR)
   3582       return SDValue();
   3583     SDValue N000 = N00.getOperand(0);
   3584     if (!isBSwapHWordElement(N000, Parts))
   3585       return SDValue();
   3586     SDValue N001 = N00.getOperand(1);
   3587     if (!isBSwapHWordElement(N001, Parts))
   3588       return SDValue();
   3589   }
   3590 
   3591   // Make sure the parts are all coming from the same node.
   3592   if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3])
   3593     return SDValue();
   3594 
   3595   SDLoc DL(N);
   3596   SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT,
   3597                               SDValue(Parts[0], 0));
   3598 
   3599   // Result of the bswap should be rotated by 16. If it's not legal, then
   3600   // do  (x << 16) | (x >> 16).
   3601   SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT));
   3602   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT))
   3603     return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt);
   3604   if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT))
   3605     return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt);
   3606   return DAG.getNode(ISD::OR, DL, VT,
   3607                      DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt),
   3608                      DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt));
   3609 }
   3610 
   3611 /// This contains all DAGCombine rules which reduce two values combined by
   3612 /// an Or operation to a single value \see visitANDLike().
   3613 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) {
   3614   EVT VT = N1.getValueType();
   3615   // fold (or x, undef) -> -1
   3616   if (!LegalOperations &&
   3617       (N0.isUndef() || N1.isUndef())) {
   3618     EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
   3619     return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()),
   3620                            SDLoc(LocReference), VT);
   3621   }
   3622   // fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
   3623   SDValue LL, LR, RL, RR, CC0, CC1;
   3624   if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
   3625     ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get();
   3626     ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get();
   3627 
   3628     if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) {
   3629       // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0)
   3630       // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0)
   3631       if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) {
   3632         SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR),
   3633                                      LR.getValueType(), LL, RL);
   3634         AddToWorklist(ORNode.getNode());
   3635         return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1);
   3636       }
   3637       // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1)
   3638       // fold (or (setgt X, -1), (setgt Y  -1)) -> (setgt (and X, Y), -1)
   3639       if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) {
   3640         SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR),
   3641                                       LR.getValueType(), LL, RL);
   3642         AddToWorklist(ANDNode.getNode());
   3643         return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1);
   3644       }
   3645     }
   3646     // canonicalize equivalent to ll == rl
   3647     if (LL == RR && LR == RL) {
   3648       Op1 = ISD::getSetCCSwappedOperands(Op1);
   3649       std::swap(RL, RR);
   3650     }
   3651     if (LL == RL && LR == RR) {
   3652       bool isInteger = LL.getValueType().isInteger();
   3653       ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger);
   3654       if (Result != ISD::SETCC_INVALID &&
   3655           (!LegalOperations ||
   3656            (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) &&
   3657             TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) {
   3658         EVT CCVT = getSetCCResultType(LL.getValueType());
   3659         if (N0.getValueType() == CCVT ||
   3660             (!LegalOperations && N0.getValueType() == MVT::i1))
   3661           return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(),
   3662                               LL, LR, Result);
   3663       }
   3664     }
   3665   }
   3666 
   3667   // (or (and X, C1), (and Y, C2))  -> (and (or X, Y), C3) if possible.
   3668   if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND &&
   3669       // Don't increase # computations.
   3670       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3671     // We can only do this xform if we know that bits from X that are set in C2
   3672     // but not in C1 are already zero.  Likewise for Y.
   3673     if (const ConstantSDNode *N0O1C =
   3674         getAsNonOpaqueConstant(N0.getOperand(1))) {
   3675       if (const ConstantSDNode *N1O1C =
   3676           getAsNonOpaqueConstant(N1.getOperand(1))) {
   3677         // We can only do this xform if we know that bits from X that are set in
   3678         // C2 but not in C1 are already zero.  Likewise for Y.
   3679         const APInt &LHSMask = N0O1C->getAPIntValue();
   3680         const APInt &RHSMask = N1O1C->getAPIntValue();
   3681 
   3682         if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) &&
   3683             DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) {
   3684           SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3685                                   N0.getOperand(0), N1.getOperand(0));
   3686           SDLoc DL(LocReference);
   3687           return DAG.getNode(ISD::AND, DL, VT, X,
   3688                              DAG.getConstant(LHSMask | RHSMask, DL, VT));
   3689         }
   3690       }
   3691     }
   3692   }
   3693 
   3694   // (or (and X, M), (and X, N)) -> (and X, (or M, N))
   3695   if (N0.getOpcode() == ISD::AND &&
   3696       N1.getOpcode() == ISD::AND &&
   3697       N0.getOperand(0) == N1.getOperand(0) &&
   3698       // Don't increase # computations.
   3699       (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) {
   3700     SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT,
   3701                             N0.getOperand(1), N1.getOperand(1));
   3702     return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X);
   3703   }
   3704 
   3705   return SDValue();
   3706 }
   3707 
   3708 SDValue DAGCombiner::visitOR(SDNode *N) {
   3709   SDValue N0 = N->getOperand(0);
   3710   SDValue N1 = N->getOperand(1);
   3711   EVT VT = N1.getValueType();
   3712 
   3713   // fold vector ops
   3714   if (VT.isVector()) {
   3715     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   3716       return FoldedVOp;
   3717 
   3718     // fold (or x, 0) -> x, vector edition
   3719     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   3720       return N1;
   3721     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   3722       return N0;
   3723 
   3724     // fold (or x, -1) -> -1, vector edition
   3725     if (ISD::isBuildVectorAllOnes(N0.getNode()))
   3726       // do not return N0, because undef node may exist in N0
   3727       return DAG.getConstant(
   3728           APInt::getAllOnesValue(
   3729               N0.getValueType().getScalarType().getSizeInBits()),
   3730           SDLoc(N), N0.getValueType());
   3731     if (ISD::isBuildVectorAllOnes(N1.getNode()))
   3732       // do not return N1, because undef node may exist in N1
   3733       return DAG.getConstant(
   3734           APInt::getAllOnesValue(
   3735               N1.getValueType().getScalarType().getSizeInBits()),
   3736           SDLoc(N), N1.getValueType());
   3737 
   3738     // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask)
   3739     // Do this only if the resulting shuffle is legal.
   3740     if (isa<ShuffleVectorSDNode>(N0) &&
   3741         isa<ShuffleVectorSDNode>(N1) &&
   3742         // Avoid folding a node with illegal type.
   3743         TLI.isTypeLegal(VT)) {
   3744       bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode());
   3745       bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode());
   3746       bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   3747       bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode());
   3748       // Ensure both shuffles have a zero input.
   3749       if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) {
   3750         assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!");
   3751         assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!");
   3752         const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0);
   3753         const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1);
   3754         bool CanFold = true;
   3755         int NumElts = VT.getVectorNumElements();
   3756         SmallVector<int, 4> Mask(NumElts);
   3757 
   3758         for (int i = 0; i != NumElts; ++i) {
   3759           int M0 = SV0->getMaskElt(i);
   3760           int M1 = SV1->getMaskElt(i);
   3761 
   3762           // Determine if either index is pointing to a zero vector.
   3763           bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts));
   3764           bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts));
   3765 
   3766           // If one element is zero and the otherside is undef, keep undef.
   3767           // This also handles the case that both are undef.
   3768           if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) {
   3769             Mask[i] = -1;
   3770             continue;
   3771           }
   3772 
   3773           // Make sure only one of the elements is zero.
   3774           if (M0Zero == M1Zero) {
   3775             CanFold = false;
   3776             break;
   3777           }
   3778 
   3779           assert((M0 >= 0 || M1 >= 0) && "Undef index!");
   3780 
   3781           // We have a zero and non-zero element. If the non-zero came from
   3782           // SV0 make the index a LHS index. If it came from SV1, make it
   3783           // a RHS index. We need to mod by NumElts because we don't care
   3784           // which operand it came from in the original shuffles.
   3785           Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts;
   3786         }
   3787 
   3788         if (CanFold) {
   3789           SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0);
   3790           SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0);
   3791 
   3792           bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
   3793           if (!LegalMask) {
   3794             std::swap(NewLHS, NewRHS);
   3795             ShuffleVectorSDNode::commuteMask(Mask);
   3796             LegalMask = TLI.isShuffleMaskLegal(Mask, VT);
   3797           }
   3798 
   3799           if (LegalMask)
   3800             return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask);
   3801         }
   3802       }
   3803     }
   3804   }
   3805 
   3806   // fold (or c1, c2) -> c1|c2
   3807   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   3808   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   3809   if (N0C && N1C && !N1C->isOpaque())
   3810     return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C);
   3811   // canonicalize constant to RHS
   3812   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   3813      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   3814     return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0);
   3815   // fold (or x, 0) -> x
   3816   if (isNullConstant(N1))
   3817     return N0;
   3818   // fold (or x, -1) -> -1
   3819   if (isAllOnesConstant(N1))
   3820     return N1;
   3821   // fold (or x, c) -> c iff (x & ~c) == 0
   3822   if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue()))
   3823     return N1;
   3824 
   3825   if (SDValue Combined = visitORLike(N0, N1, N))
   3826     return Combined;
   3827 
   3828   // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16)
   3829   if (SDValue BSwap = MatchBSwapHWord(N, N0, N1))
   3830     return BSwap;
   3831   if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1))
   3832     return BSwap;
   3833 
   3834   // reassociate or
   3835   if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1))
   3836     return ROR;
   3837   // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
   3838   // iff (c1 & c2) == 0.
   3839   if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   3840              isa<ConstantSDNode>(N0.getOperand(1))) {
   3841     ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
   3842     if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) {
   3843       if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT,
   3844                                                    N1C, C1))
   3845         return DAG.getNode(
   3846             ISD::AND, SDLoc(N), VT,
   3847             DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR);
   3848       return SDValue();
   3849     }
   3850   }
   3851   // Simplify: (or (op x...), (op y...))  -> (op (or x, y))
   3852   if (N0.getOpcode() == N1.getOpcode())
   3853     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   3854       return Tmp;
   3855 
   3856   // See if this is some rotate idiom.
   3857   if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N)))
   3858     return SDValue(Rot, 0);
   3859 
   3860   // Simplify the operands using demanded-bits information.
   3861   if (!VT.isVector() &&
   3862       SimplifyDemandedBits(SDValue(N, 0)))
   3863     return SDValue(N, 0);
   3864 
   3865   return SDValue();
   3866 }
   3867 
   3868 /// Match "(X shl/srl V1) & V2" where V2 may not be present.
   3869 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) {
   3870   if (Op.getOpcode() == ISD::AND) {
   3871     if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) {
   3872       Mask = Op.getOperand(1);
   3873       Op = Op.getOperand(0);
   3874     } else {
   3875       return false;
   3876     }
   3877   }
   3878 
   3879   if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) {
   3880     Shift = Op;
   3881     return true;
   3882   }
   3883 
   3884   return false;
   3885 }
   3886 
   3887 // Return true if we can prove that, whenever Neg and Pos are both in the
   3888 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos).  This means that
   3889 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits:
   3890 //
   3891 //     (or (shift1 X, Neg), (shift2 X, Pos))
   3892 //
   3893 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate
   3894 // in direction shift1 by Neg.  The range [0, EltSize) means that we only need
   3895 // to consider shift amounts with defined behavior.
   3896 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) {
   3897   // If EltSize is a power of 2 then:
   3898   //
   3899   //  (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1)
   3900   //  (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize).
   3901   //
   3902   // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check
   3903   // for the stronger condition:
   3904   //
   3905   //     Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1)    [A]
   3906   //
   3907   // for all Neg and Pos.  Since Neg & (EltSize - 1) == Neg' & (EltSize - 1)
   3908   // we can just replace Neg with Neg' for the rest of the function.
   3909   //
   3910   // In other cases we check for the even stronger condition:
   3911   //
   3912   //     Neg == EltSize - Pos                                    [B]
   3913   //
   3914   // for all Neg and Pos.  Note that the (or ...) then invokes undefined
   3915   // behavior if Pos == 0 (and consequently Neg == EltSize).
   3916   //
   3917   // We could actually use [A] whenever EltSize is a power of 2, but the
   3918   // only extra cases that it would match are those uninteresting ones
   3919   // where Neg and Pos are never in range at the same time.  E.g. for
   3920   // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos)
   3921   // as well as (sub 32, Pos), but:
   3922   //
   3923   //     (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos))
   3924   //
   3925   // always invokes undefined behavior for 32-bit X.
   3926   //
   3927   // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise.
   3928   unsigned MaskLoBits = 0;
   3929   if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) {
   3930     if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) {
   3931       if (NegC->getAPIntValue() == EltSize - 1) {
   3932         Neg = Neg.getOperand(0);
   3933         MaskLoBits = Log2_64(EltSize);
   3934       }
   3935     }
   3936   }
   3937 
   3938   // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1.
   3939   if (Neg.getOpcode() != ISD::SUB)
   3940     return false;
   3941   ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0));
   3942   if (!NegC)
   3943     return false;
   3944   SDValue NegOp1 = Neg.getOperand(1);
   3945 
   3946   // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with
   3947   // Pos'.  The truncation is redundant for the purpose of the equality.
   3948   if (MaskLoBits && Pos.getOpcode() == ISD::AND)
   3949     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3950       if (PosC->getAPIntValue() == EltSize - 1)
   3951         Pos = Pos.getOperand(0);
   3952 
   3953   // The condition we need is now:
   3954   //
   3955   //     (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask
   3956   //
   3957   // If NegOp1 == Pos then we need:
   3958   //
   3959   //              EltSize & Mask == NegC & Mask
   3960   //
   3961   // (because "x & Mask" is a truncation and distributes through subtraction).
   3962   APInt Width;
   3963   if (Pos == NegOp1)
   3964     Width = NegC->getAPIntValue();
   3965 
   3966   // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC.
   3967   // Then the condition we want to prove becomes:
   3968   //
   3969   //     (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask
   3970   //
   3971   // which, again because "x & Mask" is a truncation, becomes:
   3972   //
   3973   //                NegC & Mask == (EltSize - PosC) & Mask
   3974   //             EltSize & Mask == (NegC + PosC) & Mask
   3975   else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) {
   3976     if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1)))
   3977       Width = PosC->getAPIntValue() + NegC->getAPIntValue();
   3978     else
   3979       return false;
   3980   } else
   3981     return false;
   3982 
   3983   // Now we just need to check that EltSize & Mask == Width & Mask.
   3984   if (MaskLoBits)
   3985     // EltSize & Mask is 0 since Mask is EltSize - 1.
   3986     return Width.getLoBits(MaskLoBits) == 0;
   3987   return Width == EltSize;
   3988 }
   3989 
   3990 // A subroutine of MatchRotate used once we have found an OR of two opposite
   3991 // shifts of Shifted.  If Neg == <operand size> - Pos then the OR reduces
   3992 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the
   3993 // former being preferred if supported.  InnerPos and InnerNeg are Pos and
   3994 // Neg with outer conversions stripped away.
   3995 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos,
   3996                                        SDValue Neg, SDValue InnerPos,
   3997                                        SDValue InnerNeg, unsigned PosOpcode,
   3998                                        unsigned NegOpcode, const SDLoc &DL) {
   3999   // fold (or (shl x, (*ext y)),
   4000   //          (srl x, (*ext (sub 32, y)))) ->
   4001   //   (rotl x, y) or (rotr x, (sub 32, y))
   4002   //
   4003   // fold (or (shl x, (*ext (sub 32, y))),
   4004   //          (srl x, (*ext y))) ->
   4005   //   (rotr x, y) or (rotl x, (sub 32, y))
   4006   EVT VT = Shifted.getValueType();
   4007   if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) {
   4008     bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT);
   4009     return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted,
   4010                        HasPos ? Pos : Neg).getNode();
   4011   }
   4012 
   4013   return nullptr;
   4014 }
   4015 
   4016 // MatchRotate - Handle an 'or' of two operands.  If this is one of the many
   4017 // idioms for rotate, and if the target supports rotation instructions, generate
   4018 // a rot[lr].
   4019 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) {
   4020   // Must be a legal type.  Expanded 'n promoted things won't work with rotates.
   4021   EVT VT = LHS.getValueType();
   4022   if (!TLI.isTypeLegal(VT)) return nullptr;
   4023 
   4024   // The target must have at least one rotate flavor.
   4025   bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT);
   4026   bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT);
   4027   if (!HasROTL && !HasROTR) return nullptr;
   4028 
   4029   // Match "(X shl/srl V1) & V2" where V2 may not be present.
   4030   SDValue LHSShift;   // The shift.
   4031   SDValue LHSMask;    // AND value if any.
   4032   if (!MatchRotateHalf(LHS, LHSShift, LHSMask))
   4033     return nullptr; // Not part of a rotate.
   4034 
   4035   SDValue RHSShift;   // The shift.
   4036   SDValue RHSMask;    // AND value if any.
   4037   if (!MatchRotateHalf(RHS, RHSShift, RHSMask))
   4038     return nullptr; // Not part of a rotate.
   4039 
   4040   if (LHSShift.getOperand(0) != RHSShift.getOperand(0))
   4041     return nullptr;   // Not shifting the same value.
   4042 
   4043   if (LHSShift.getOpcode() == RHSShift.getOpcode())
   4044     return nullptr;   // Shifts must disagree.
   4045 
   4046   // Canonicalize shl to left side in a shl/srl pair.
   4047   if (RHSShift.getOpcode() == ISD::SHL) {
   4048     std::swap(LHS, RHS);
   4049     std::swap(LHSShift, RHSShift);
   4050     std::swap(LHSMask, RHSMask);
   4051   }
   4052 
   4053   unsigned EltSizeInBits = VT.getScalarSizeInBits();
   4054   SDValue LHSShiftArg = LHSShift.getOperand(0);
   4055   SDValue LHSShiftAmt = LHSShift.getOperand(1);
   4056   SDValue RHSShiftArg = RHSShift.getOperand(0);
   4057   SDValue RHSShiftAmt = RHSShift.getOperand(1);
   4058 
   4059   // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1)
   4060   // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2)
   4061   if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) {
   4062     uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue();
   4063     uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue();
   4064     if ((LShVal + RShVal) != EltSizeInBits)
   4065       return nullptr;
   4066 
   4067     SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT,
   4068                               LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt);
   4069 
   4070     // If there is an AND of either shifted operand, apply it to the result.
   4071     if (LHSMask.getNode() || RHSMask.getNode()) {
   4072       APInt AllBits = APInt::getAllOnesValue(EltSizeInBits);
   4073       SDValue Mask = DAG.getConstant(AllBits, DL, VT);
   4074 
   4075       if (LHSMask.getNode()) {
   4076         APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal);
   4077         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4078                            DAG.getNode(ISD::OR, DL, VT, LHSMask,
   4079                                        DAG.getConstant(RHSBits, DL, VT)));
   4080       }
   4081       if (RHSMask.getNode()) {
   4082         APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal);
   4083         Mask = DAG.getNode(ISD::AND, DL, VT, Mask,
   4084                            DAG.getNode(ISD::OR, DL, VT, RHSMask,
   4085                                        DAG.getConstant(LHSBits, DL, VT)));
   4086       }
   4087 
   4088       Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask);
   4089     }
   4090 
   4091     return Rot.getNode();
   4092   }
   4093 
   4094   // If there is a mask here, and we have a variable shift, we can't be sure
   4095   // that we're masking out the right stuff.
   4096   if (LHSMask.getNode() || RHSMask.getNode())
   4097     return nullptr;
   4098 
   4099   // If the shift amount is sign/zext/any-extended just peel it off.
   4100   SDValue LExtOp0 = LHSShiftAmt;
   4101   SDValue RExtOp0 = RHSShiftAmt;
   4102   if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4103        LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4104        LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4105        LHSShiftAmt.getOpcode() == ISD::TRUNCATE) &&
   4106       (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND ||
   4107        RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND ||
   4108        RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND ||
   4109        RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) {
   4110     LExtOp0 = LHSShiftAmt.getOperand(0);
   4111     RExtOp0 = RHSShiftAmt.getOperand(0);
   4112   }
   4113 
   4114   SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt,
   4115                                    LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL);
   4116   if (TryL)
   4117     return TryL;
   4118 
   4119   SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt,
   4120                                    RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL);
   4121   if (TryR)
   4122     return TryR;
   4123 
   4124   return nullptr;
   4125 }
   4126 
   4127 SDValue DAGCombiner::visitXOR(SDNode *N) {
   4128   SDValue N0 = N->getOperand(0);
   4129   SDValue N1 = N->getOperand(1);
   4130   EVT VT = N0.getValueType();
   4131 
   4132   // fold vector ops
   4133   if (VT.isVector()) {
   4134     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4135       return FoldedVOp;
   4136 
   4137     // fold (xor x, 0) -> x, vector edition
   4138     if (ISD::isBuildVectorAllZeros(N0.getNode()))
   4139       return N1;
   4140     if (ISD::isBuildVectorAllZeros(N1.getNode()))
   4141       return N0;
   4142   }
   4143 
   4144   // fold (xor undef, undef) -> 0. This is a common idiom (misuse).
   4145   if (N0.isUndef() && N1.isUndef())
   4146     return DAG.getConstant(0, SDLoc(N), VT);
   4147   // fold (xor x, undef) -> undef
   4148   if (N0.isUndef())
   4149     return N0;
   4150   if (N1.isUndef())
   4151     return N1;
   4152   // fold (xor c1, c2) -> c1^c2
   4153   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4154   ConstantSDNode *N1C = getAsNonOpaqueConstant(N1);
   4155   if (N0C && N1C)
   4156     return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C);
   4157   // canonicalize constant to RHS
   4158   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   4159      !DAG.isConstantIntBuildVectorOrConstantInt(N1))
   4160     return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0);
   4161   // fold (xor x, 0) -> x
   4162   if (isNullConstant(N1))
   4163     return N0;
   4164   // reassociate xor
   4165   if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1))
   4166     return RXOR;
   4167 
   4168   // fold !(x cc y) -> (x !cc y)
   4169   SDValue LHS, RHS, CC;
   4170   if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) {
   4171     bool isInt = LHS.getValueType().isInteger();
   4172     ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(),
   4173                                                isInt);
   4174 
   4175     if (!LegalOperations ||
   4176         TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) {
   4177       switch (N0.getOpcode()) {
   4178       default:
   4179         llvm_unreachable("Unhandled SetCC Equivalent!");
   4180       case ISD::SETCC:
   4181         return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC);
   4182       case ISD::SELECT_CC:
   4183         return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2),
   4184                                N0.getOperand(3), NotCC);
   4185       }
   4186     }
   4187   }
   4188 
   4189   // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y)))
   4190   if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND &&
   4191       N0.getNode()->hasOneUse() &&
   4192       isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){
   4193     SDValue V = N0.getOperand(0);
   4194     SDLoc DL(N0);
   4195     V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V,
   4196                     DAG.getConstant(1, DL, V.getValueType()));
   4197     AddToWorklist(V.getNode());
   4198     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V);
   4199   }
   4200 
   4201   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc
   4202   if (isOneConstant(N1) && VT == MVT::i1 &&
   4203       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4204     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4205     if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) {
   4206       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4207       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4208       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4209       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4210       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4211     }
   4212   }
   4213   // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants
   4214   if (isAllOnesConstant(N1) &&
   4215       (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) {
   4216     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   4217     if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) {
   4218       unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND;
   4219       LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS
   4220       RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS
   4221       AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode());
   4222       return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS);
   4223     }
   4224   }
   4225   // fold (xor (and x, y), y) -> (and (not x), y)
   4226   if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
   4227       N0->getOperand(1) == N1) {
   4228     SDValue X = N0->getOperand(0);
   4229     SDValue NotX = DAG.getNOT(SDLoc(X), X, VT);
   4230     AddToWorklist(NotX.getNode());
   4231     return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1);
   4232   }
   4233   // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2))
   4234   if (N1C && N0.getOpcode() == ISD::XOR) {
   4235     if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) {
   4236       SDLoc DL(N);
   4237       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1),
   4238                          DAG.getConstant(N1C->getAPIntValue() ^
   4239                                          N00C->getAPIntValue(), DL, VT));
   4240     }
   4241     if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) {
   4242       SDLoc DL(N);
   4243       return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0),
   4244                          DAG.getConstant(N1C->getAPIntValue() ^
   4245                                          N01C->getAPIntValue(), DL, VT));
   4246     }
   4247   }
   4248   // fold (xor x, x) -> 0
   4249   if (N0 == N1)
   4250     return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes);
   4251 
   4252   // fold (xor (shl 1, x), -1) -> (rotl ~1, x)
   4253   // Here is a concrete example of this equivalence:
   4254   // i16   x ==  14
   4255   // i16 shl ==   1 << 14  == 16384 == 0b0100000000000000
   4256   // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111
   4257   //
   4258   // =>
   4259   //
   4260   // i16     ~1      == 0b1111111111111110
   4261   // i16 rol(~1, 14) == 0b1011111111111111
   4262   //
   4263   // Some additional tips to help conceptualize this transform:
   4264   // - Try to see the operation as placing a single zero in a value of all ones.
   4265   // - There exists no value for x which would allow the result to contain zero.
   4266   // - Values of x larger than the bitwidth are undefined and do not require a
   4267   //   consistent result.
   4268   // - Pushing the zero left requires shifting one bits in from the right.
   4269   // A rotate left of ~1 is a nice way of achieving the desired result.
   4270   if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL
   4271       && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) {
   4272     SDLoc DL(N);
   4273     return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT),
   4274                        N0.getOperand(1));
   4275   }
   4276 
   4277   // Simplify: xor (op x...), (op y...)  -> (op (xor x, y))
   4278   if (N0.getOpcode() == N1.getOpcode())
   4279     if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N))
   4280       return Tmp;
   4281 
   4282   // Simplify the expression using non-local knowledge.
   4283   if (!VT.isVector() &&
   4284       SimplifyDemandedBits(SDValue(N, 0)))
   4285     return SDValue(N, 0);
   4286 
   4287   return SDValue();
   4288 }
   4289 
   4290 /// Handle transforms common to the three shifts, when the shift amount is a
   4291 /// constant.
   4292 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) {
   4293   SDNode *LHS = N->getOperand(0).getNode();
   4294   if (!LHS->hasOneUse()) return SDValue();
   4295 
   4296   // We want to pull some binops through shifts, so that we have (and (shift))
   4297   // instead of (shift (and)), likewise for add, or, xor, etc.  This sort of
   4298   // thing happens with address calculations, so it's important to canonicalize
   4299   // it.
   4300   bool HighBitSet = false;  // Can we transform this if the high bit is set?
   4301 
   4302   switch (LHS->getOpcode()) {
   4303   default: return SDValue();
   4304   case ISD::OR:
   4305   case ISD::XOR:
   4306     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4307     break;
   4308   case ISD::AND:
   4309     HighBitSet = true;  // We can only transform sra if the high bit is set.
   4310     break;
   4311   case ISD::ADD:
   4312     if (N->getOpcode() != ISD::SHL)
   4313       return SDValue(); // only shl(add) not sr[al](add).
   4314     HighBitSet = false; // We can only transform sra if the high bit is clear.
   4315     break;
   4316   }
   4317 
   4318   // We require the RHS of the binop to be a constant and not opaque as well.
   4319   ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1));
   4320   if (!BinOpCst) return SDValue();
   4321 
   4322   // FIXME: disable this unless the input to the binop is a shift by a constant.
   4323   // If it is not a shift, it pessimizes some common cases like:
   4324   //
   4325   //    void foo(int *X, int i) { X[i & 1235] = 1; }
   4326   //    int bar(int *X, int i) { return X[i & 255]; }
   4327   SDNode *BinOpLHSVal = LHS->getOperand(0).getNode();
   4328   if ((BinOpLHSVal->getOpcode() != ISD::SHL &&
   4329        BinOpLHSVal->getOpcode() != ISD::SRA &&
   4330        BinOpLHSVal->getOpcode() != ISD::SRL) ||
   4331       !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1)))
   4332     return SDValue();
   4333 
   4334   EVT VT = N->getValueType(0);
   4335 
   4336   // If this is a signed shift right, and the high bit is modified by the
   4337   // logical operation, do not perform the transformation. The highBitSet
   4338   // boolean indicates the value of the high bit of the constant which would
   4339   // cause it to be modified for this operation.
   4340   if (N->getOpcode() == ISD::SRA) {
   4341     bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative();
   4342     if (BinOpRHSSignSet != HighBitSet)
   4343       return SDValue();
   4344   }
   4345 
   4346   if (!TLI.isDesirableToCommuteWithShift(LHS))
   4347     return SDValue();
   4348 
   4349   // Fold the constants, shifting the binop RHS by the shift amount.
   4350   SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)),
   4351                                N->getValueType(0),
   4352                                LHS->getOperand(1), N->getOperand(1));
   4353   assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!");
   4354 
   4355   // Create the new shift.
   4356   SDValue NewShift = DAG.getNode(N->getOpcode(),
   4357                                  SDLoc(LHS->getOperand(0)),
   4358                                  VT, LHS->getOperand(0), N->getOperand(1));
   4359 
   4360   // Create the new binop.
   4361   return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS);
   4362 }
   4363 
   4364 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) {
   4365   assert(N->getOpcode() == ISD::TRUNCATE);
   4366   assert(N->getOperand(0).getOpcode() == ISD::AND);
   4367 
   4368   // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC)
   4369   if (N->hasOneUse() && N->getOperand(0).hasOneUse()) {
   4370     SDValue N01 = N->getOperand(0).getOperand(1);
   4371 
   4372     if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) {
   4373       if (!N01C->isOpaque()) {
   4374         EVT TruncVT = N->getValueType(0);
   4375         SDValue N00 = N->getOperand(0).getOperand(0);
   4376         APInt TruncC = N01C->getAPIntValue();
   4377         TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits());
   4378         SDLoc DL(N);
   4379 
   4380         return DAG.getNode(ISD::AND, DL, TruncVT,
   4381                            DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00),
   4382                            DAG.getConstant(TruncC, DL, TruncVT));
   4383       }
   4384     }
   4385   }
   4386 
   4387   return SDValue();
   4388 }
   4389 
   4390 SDValue DAGCombiner::visitRotate(SDNode *N) {
   4391   // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))).
   4392   if (N->getOperand(1).getOpcode() == ISD::TRUNCATE &&
   4393       N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) {
   4394     if (SDValue NewOp1 =
   4395             distributeTruncateThroughAnd(N->getOperand(1).getNode()))
   4396       return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0),
   4397                          N->getOperand(0), NewOp1);
   4398   }
   4399   return SDValue();
   4400 }
   4401 
   4402 SDValue DAGCombiner::visitSHL(SDNode *N) {
   4403   SDValue N0 = N->getOperand(0);
   4404   SDValue N1 = N->getOperand(1);
   4405   EVT VT = N0.getValueType();
   4406   unsigned OpSizeInBits = VT.getScalarSizeInBits();
   4407 
   4408   // fold vector ops
   4409   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4410   if (VT.isVector()) {
   4411     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4412       return FoldedVOp;
   4413 
   4414     BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1);
   4415     // If setcc produces all-one true value then:
   4416     // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV)
   4417     if (N1CV && N1CV->isConstant()) {
   4418       if (N0.getOpcode() == ISD::AND) {
   4419         SDValue N00 = N0->getOperand(0);
   4420         SDValue N01 = N0->getOperand(1);
   4421         BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01);
   4422 
   4423         if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC &&
   4424             TLI.getBooleanContents(N00.getOperand(0).getValueType()) ==
   4425                 TargetLowering::ZeroOrNegativeOneBooleanContent) {
   4426           if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT,
   4427                                                      N01CV, N1CV))
   4428             return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C);
   4429         }
   4430       } else {
   4431         N1C = isConstOrConstSplat(N1);
   4432       }
   4433     }
   4434   }
   4435 
   4436   // fold (shl c1, c2) -> c1<<c2
   4437   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4438   if (N0C && N1C && !N1C->isOpaque())
   4439     return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C);
   4440   // fold (shl 0, x) -> 0
   4441   if (isNullConstant(N0))
   4442     return N0;
   4443   // fold (shl x, c >= size(x)) -> undef
   4444   if (N1C && N1C->getAPIntValue().uge(OpSizeInBits))
   4445     return DAG.getUNDEF(VT);
   4446   // fold (shl x, 0) -> x
   4447   if (N1C && N1C->isNullValue())
   4448     return N0;
   4449   // fold (shl undef, x) -> 0
   4450   if (N0.isUndef())
   4451     return DAG.getConstant(0, SDLoc(N), VT);
   4452   // if (shl x, c) is known to be zero, return 0
   4453   if (DAG.MaskedValueIsZero(SDValue(N, 0),
   4454                             APInt::getAllOnesValue(OpSizeInBits)))
   4455     return DAG.getConstant(0, SDLoc(N), VT);
   4456   // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
   4457   if (N1.getOpcode() == ISD::TRUNCATE &&
   4458       N1.getOperand(0).getOpcode() == ISD::AND) {
   4459     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   4460       return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1);
   4461   }
   4462 
   4463   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4464     return SDValue(N, 0);
   4465 
   4466   // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2))
   4467   if (N1C && N0.getOpcode() == ISD::SHL) {
   4468     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4469       uint64_t c1 = N0C1->getZExtValue();
   4470       uint64_t c2 = N1C->getZExtValue();
   4471       SDLoc DL(N);
   4472       if (c1 + c2 >= OpSizeInBits)
   4473         return DAG.getConstant(0, DL, VT);
   4474       return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4475                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4476     }
   4477   }
   4478 
   4479   // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2)))
   4480   // For this to be valid, the second form must not preserve any of the bits
   4481   // that are shifted out by the inner shift in the first form.  This means
   4482   // the outer shift size must be >= the number of bits added by the ext.
   4483   // As a corollary, we don't care what kind of ext it is.
   4484   if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND ||
   4485               N0.getOpcode() == ISD::ANY_EXTEND ||
   4486               N0.getOpcode() == ISD::SIGN_EXTEND) &&
   4487       N0.getOperand(0).getOpcode() == ISD::SHL) {
   4488     SDValue N0Op0 = N0.getOperand(0);
   4489     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4490       uint64_t c1 = N0Op0C1->getZExtValue();
   4491       uint64_t c2 = N1C->getZExtValue();
   4492       EVT InnerShiftVT = N0Op0.getValueType();
   4493       uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits();
   4494       if (c2 >= OpSizeInBits - InnerShiftSize) {
   4495         SDLoc DL(N0);
   4496         if (c1 + c2 >= OpSizeInBits)
   4497           return DAG.getConstant(0, DL, VT);
   4498         return DAG.getNode(ISD::SHL, DL, VT,
   4499                            DAG.getNode(N0.getOpcode(), DL, VT,
   4500                                        N0Op0->getOperand(0)),
   4501                            DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4502       }
   4503     }
   4504   }
   4505 
   4506   // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C))
   4507   // Only fold this if the inner zext has no other uses to avoid increasing
   4508   // the total number of instructions.
   4509   if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() &&
   4510       N0.getOperand(0).getOpcode() == ISD::SRL) {
   4511     SDValue N0Op0 = N0.getOperand(0);
   4512     if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4513       uint64_t c1 = N0Op0C1->getZExtValue();
   4514       if (c1 < VT.getScalarSizeInBits()) {
   4515         uint64_t c2 = N1C->getZExtValue();
   4516         if (c1 == c2) {
   4517           SDValue NewOp0 = N0.getOperand(0);
   4518           EVT CountVT = NewOp0.getOperand(1).getValueType();
   4519           SDLoc DL(N);
   4520           SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(),
   4521                                        NewOp0,
   4522                                        DAG.getConstant(c2, DL, CountVT));
   4523           AddToWorklist(NewSHL.getNode());
   4524           return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL);
   4525         }
   4526       }
   4527     }
   4528   }
   4529 
   4530   // fold (shl (sr[la] exact X,  C1), C2) -> (shl    X, (C2-C1)) if C1 <= C2
   4531   // fold (shl (sr[la] exact X,  C1), C2) -> (sr[la] X, (C2-C1)) if C1  > C2
   4532   if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) &&
   4533       cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) {
   4534     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4535       uint64_t C1 = N0C1->getZExtValue();
   4536       uint64_t C2 = N1C->getZExtValue();
   4537       SDLoc DL(N);
   4538       if (C1 <= C2)
   4539         return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4540                            DAG.getConstant(C2 - C1, DL, N1.getValueType()));
   4541       return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0),
   4542                          DAG.getConstant(C1 - C2, DL, N1.getValueType()));
   4543     }
   4544   }
   4545 
   4546   // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or
   4547   //                               (and (srl x, (sub c1, c2), MASK)
   4548   // Only fold this if the inner shift has no other uses -- if it does, folding
   4549   // this will increase the total number of instructions.
   4550   if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   4551     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4552       uint64_t c1 = N0C1->getZExtValue();
   4553       if (c1 < OpSizeInBits) {
   4554         uint64_t c2 = N1C->getZExtValue();
   4555         APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1);
   4556         SDValue Shift;
   4557         if (c2 > c1) {
   4558           Mask = Mask.shl(c2 - c1);
   4559           SDLoc DL(N);
   4560           Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0),
   4561                               DAG.getConstant(c2 - c1, DL, N1.getValueType()));
   4562         } else {
   4563           Mask = Mask.lshr(c1 - c2);
   4564           SDLoc DL(N);
   4565           Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
   4566                               DAG.getConstant(c1 - c2, DL, N1.getValueType()));
   4567         }
   4568         SDLoc DL(N0);
   4569         return DAG.getNode(ISD::AND, DL, VT, Shift,
   4570                            DAG.getConstant(Mask, DL, VT));
   4571       }
   4572     }
   4573   }
   4574   // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1))
   4575   if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) {
   4576     unsigned BitSize = VT.getScalarSizeInBits();
   4577     SDLoc DL(N);
   4578     SDValue HiBitsMask =
   4579       DAG.getConstant(APInt::getHighBitsSet(BitSize,
   4580                                             BitSize - N1C->getZExtValue()),
   4581                       DL, VT);
   4582     return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
   4583                        HiBitsMask);
   4584   }
   4585 
   4586   // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2)
   4587   // Variant of version done on multiply, except mul by a power of 2 is turned
   4588   // into a shift.
   4589   APInt Val;
   4590   if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() &&
   4591       (isa<ConstantSDNode>(N0.getOperand(1)) ||
   4592        ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) {
   4593     SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1);
   4594     SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1);
   4595     return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1);
   4596   }
   4597 
   4598   // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2)
   4599   if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) {
   4600     if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4601       if (SDValue Folded =
   4602               DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C))
   4603         return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded);
   4604     }
   4605   }
   4606 
   4607   if (N1C && !N1C->isOpaque())
   4608     if (SDValue NewSHL = visitShiftByConstant(N, N1C))
   4609       return NewSHL;
   4610 
   4611   return SDValue();
   4612 }
   4613 
   4614 SDValue DAGCombiner::visitSRA(SDNode *N) {
   4615   SDValue N0 = N->getOperand(0);
   4616   SDValue N1 = N->getOperand(1);
   4617   EVT VT = N0.getValueType();
   4618   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4619 
   4620   // fold vector ops
   4621   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4622   if (VT.isVector()) {
   4623     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4624       return FoldedVOp;
   4625 
   4626     N1C = isConstOrConstSplat(N1);
   4627   }
   4628 
   4629   // fold (sra c1, c2) -> (sra c1, c2)
   4630   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4631   if (N0C && N1C && !N1C->isOpaque())
   4632     return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C);
   4633   // fold (sra 0, x) -> 0
   4634   if (isNullConstant(N0))
   4635     return N0;
   4636   // fold (sra -1, x) -> -1
   4637   if (isAllOnesConstant(N0))
   4638     return N0;
   4639   // fold (sra x, (setge c, size(x))) -> undef
   4640   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4641     return DAG.getUNDEF(VT);
   4642   // fold (sra x, 0) -> x
   4643   if (N1C && N1C->isNullValue())
   4644     return N0;
   4645   // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
   4646   // sext_inreg.
   4647   if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
   4648     unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
   4649     EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
   4650     if (VT.isVector())
   4651       ExtVT = EVT::getVectorVT(*DAG.getContext(),
   4652                                ExtVT, VT.getVectorNumElements());
   4653     if ((!LegalOperations ||
   4654          TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT)))
   4655       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   4656                          N0.getOperand(0), DAG.getValueType(ExtVT));
   4657   }
   4658 
   4659   // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2))
   4660   if (N1C && N0.getOpcode() == ISD::SRA) {
   4661     if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) {
   4662       unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
   4663       if (Sum >= OpSizeInBits)
   4664         Sum = OpSizeInBits - 1;
   4665       SDLoc DL(N);
   4666       return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0),
   4667                          DAG.getConstant(Sum, DL, N1.getValueType()));
   4668     }
   4669   }
   4670 
   4671   // fold (sra (shl X, m), (sub result_size, n))
   4672   // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for
   4673   // result_size - n != m.
   4674   // If truncate is free for the target sext(shl) is likely to result in better
   4675   // code.
   4676   if (N0.getOpcode() == ISD::SHL && N1C) {
   4677     // Get the two constanst of the shifts, CN0 = m, CN = n.
   4678     const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1));
   4679     if (N01C) {
   4680       LLVMContext &Ctx = *DAG.getContext();
   4681       // Determine what the truncate's result bitsize and type would be.
   4682       EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue());
   4683 
   4684       if (VT.isVector())
   4685         TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements());
   4686 
   4687       // Determine the residual right-shift amount.
   4688       int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
   4689 
   4690       // If the shift is not a no-op (in which case this should be just a sign
   4691       // extend already), the truncated to type is legal, sign_extend is legal
   4692       // on that type, and the truncate to that type is both legal and free,
   4693       // perform the transform.
   4694       if ((ShiftAmt > 0) &&
   4695           TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) &&
   4696           TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) &&
   4697           TLI.isTruncateFree(VT, TruncVT)) {
   4698 
   4699         SDLoc DL(N);
   4700         SDValue Amt = DAG.getConstant(ShiftAmt, DL,
   4701             getShiftAmountTy(N0.getOperand(0).getValueType()));
   4702         SDValue Shift = DAG.getNode(ISD::SRL, DL, VT,
   4703                                     N0.getOperand(0), Amt);
   4704         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT,
   4705                                     Shift);
   4706         return DAG.getNode(ISD::SIGN_EXTEND, DL,
   4707                            N->getValueType(0), Trunc);
   4708       }
   4709     }
   4710   }
   4711 
   4712   // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))).
   4713   if (N1.getOpcode() == ISD::TRUNCATE &&
   4714       N1.getOperand(0).getOpcode() == ISD::AND) {
   4715     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   4716       return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1);
   4717   }
   4718 
   4719   // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2))
   4720   //      if c1 is equal to the number of bits the trunc removes
   4721   if (N0.getOpcode() == ISD::TRUNCATE &&
   4722       (N0.getOperand(0).getOpcode() == ISD::SRL ||
   4723        N0.getOperand(0).getOpcode() == ISD::SRA) &&
   4724       N0.getOperand(0).hasOneUse() &&
   4725       N0.getOperand(0).getOperand(1).hasOneUse() &&
   4726       N1C) {
   4727     SDValue N0Op0 = N0.getOperand(0);
   4728     if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) {
   4729       unsigned LargeShiftVal = LargeShift->getZExtValue();
   4730       EVT LargeVT = N0Op0.getValueType();
   4731 
   4732       if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) {
   4733         SDLoc DL(N);
   4734         SDValue Amt =
   4735           DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), DL,
   4736                           getShiftAmountTy(N0Op0.getOperand(0).getValueType()));
   4737         SDValue SRA = DAG.getNode(ISD::SRA, DL, LargeVT,
   4738                                   N0Op0.getOperand(0), Amt);
   4739         return DAG.getNode(ISD::TRUNCATE, DL, VT, SRA);
   4740       }
   4741     }
   4742   }
   4743 
   4744   // Simplify, based on bits shifted out of the LHS.
   4745   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4746     return SDValue(N, 0);
   4747 
   4748 
   4749   // If the sign bit is known to be zero, switch this to a SRL.
   4750   if (DAG.SignBitIsZero(N0))
   4751     return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1);
   4752 
   4753   if (N1C && !N1C->isOpaque())
   4754     if (SDValue NewSRA = visitShiftByConstant(N, N1C))
   4755       return NewSRA;
   4756 
   4757   return SDValue();
   4758 }
   4759 
   4760 SDValue DAGCombiner::visitSRL(SDNode *N) {
   4761   SDValue N0 = N->getOperand(0);
   4762   SDValue N1 = N->getOperand(1);
   4763   EVT VT = N0.getValueType();
   4764   unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
   4765 
   4766   // fold vector ops
   4767   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
   4768   if (VT.isVector()) {
   4769     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   4770       return FoldedVOp;
   4771 
   4772     N1C = isConstOrConstSplat(N1);
   4773   }
   4774 
   4775   // fold (srl c1, c2) -> c1 >>u c2
   4776   ConstantSDNode *N0C = getAsNonOpaqueConstant(N0);
   4777   if (N0C && N1C && !N1C->isOpaque())
   4778     return DAG.FoldConstantArithmetic(ISD::SRL, SDLoc(N), VT, N0C, N1C);
   4779   // fold (srl 0, x) -> 0
   4780   if (isNullConstant(N0))
   4781     return N0;
   4782   // fold (srl x, c >= size(x)) -> undef
   4783   if (N1C && N1C->getZExtValue() >= OpSizeInBits)
   4784     return DAG.getUNDEF(VT);
   4785   // fold (srl x, 0) -> x
   4786   if (N1C && N1C->isNullValue())
   4787     return N0;
   4788   // if (srl x, c) is known to be zero, return 0
   4789   if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0),
   4790                                    APInt::getAllOnesValue(OpSizeInBits)))
   4791     return DAG.getConstant(0, SDLoc(N), VT);
   4792 
   4793   // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2))
   4794   if (N1C && N0.getOpcode() == ISD::SRL) {
   4795     if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) {
   4796       uint64_t c1 = N01C->getZExtValue();
   4797       uint64_t c2 = N1C->getZExtValue();
   4798       SDLoc DL(N);
   4799       if (c1 + c2 >= OpSizeInBits)
   4800         return DAG.getConstant(0, DL, VT);
   4801       return DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0),
   4802                          DAG.getConstant(c1 + c2, DL, N1.getValueType()));
   4803     }
   4804   }
   4805 
   4806   // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2)))
   4807   if (N1C && N0.getOpcode() == ISD::TRUNCATE &&
   4808       N0.getOperand(0).getOpcode() == ISD::SRL &&
   4809       isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) {
   4810     uint64_t c1 =
   4811       cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue();
   4812     uint64_t c2 = N1C->getZExtValue();
   4813     EVT InnerShiftVT = N0.getOperand(0).getValueType();
   4814     EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType();
   4815     uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits();
   4816     // This is only valid if the OpSizeInBits + c1 = size of inner shift.
   4817     if (c1 + OpSizeInBits == InnerShiftSize) {
   4818       SDLoc DL(N0);
   4819       if (c1 + c2 >= InnerShiftSize)
   4820         return DAG.getConstant(0, DL, VT);
   4821       return DAG.getNode(ISD::TRUNCATE, DL, VT,
   4822                          DAG.getNode(ISD::SRL, DL, InnerShiftVT,
   4823                                      N0.getOperand(0)->getOperand(0),
   4824                                      DAG.getConstant(c1 + c2, DL,
   4825                                                      ShiftCountVT)));
   4826     }
   4827   }
   4828 
   4829   // fold (srl (shl x, c), c) -> (and x, cst2)
   4830   if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) {
   4831     unsigned BitSize = N0.getScalarValueSizeInBits();
   4832     if (BitSize <= 64) {
   4833       uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize;
   4834       SDLoc DL(N);
   4835       return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0),
   4836                          DAG.getConstant(~0ULL >> ShAmt, DL, VT));
   4837     }
   4838   }
   4839 
   4840   // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
   4841   if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
   4842     // Shifting in all undef bits?
   4843     EVT SmallVT = N0.getOperand(0).getValueType();
   4844     unsigned BitSize = SmallVT.getScalarSizeInBits();
   4845     if (N1C->getZExtValue() >= BitSize)
   4846       return DAG.getUNDEF(VT);
   4847 
   4848     if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) {
   4849       uint64_t ShiftAmt = N1C->getZExtValue();
   4850       SDLoc DL0(N0);
   4851       SDValue SmallShift = DAG.getNode(ISD::SRL, DL0, SmallVT,
   4852                                        N0.getOperand(0),
   4853                           DAG.getConstant(ShiftAmt, DL0,
   4854                                           getShiftAmountTy(SmallVT)));
   4855       AddToWorklist(SmallShift.getNode());
   4856       APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt);
   4857       SDLoc DL(N);
   4858       return DAG.getNode(ISD::AND, DL, VT,
   4859                          DAG.getNode(ISD::ANY_EXTEND, DL, VT, SmallShift),
   4860                          DAG.getConstant(Mask, DL, VT));
   4861     }
   4862   }
   4863 
   4864   // fold (srl (sra X, Y), 31) -> (srl X, 31).  This srl only looks at the sign
   4865   // bit, which is unmodified by sra.
   4866   if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) {
   4867     if (N0.getOpcode() == ISD::SRA)
   4868       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1);
   4869   }
   4870 
   4871   // fold (srl (ctlz x), "5") -> x  iff x has one bit set (the low bit).
   4872   if (N1C && N0.getOpcode() == ISD::CTLZ &&
   4873       N1C->getAPIntValue() == Log2_32(OpSizeInBits)) {
   4874     APInt KnownZero, KnownOne;
   4875     DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne);
   4876 
   4877     // If any of the input bits are KnownOne, then the input couldn't be all
   4878     // zeros, thus the result of the srl will always be zero.
   4879     if (KnownOne.getBoolValue()) return DAG.getConstant(0, SDLoc(N0), VT);
   4880 
   4881     // If all of the bits input the to ctlz node are known to be zero, then
   4882     // the result of the ctlz is "32" and the result of the shift is one.
   4883     APInt UnknownBits = ~KnownZero;
   4884     if (UnknownBits == 0) return DAG.getConstant(1, SDLoc(N0), VT);
   4885 
   4886     // Otherwise, check to see if there is exactly one bit input to the ctlz.
   4887     if ((UnknownBits & (UnknownBits - 1)) == 0) {
   4888       // Okay, we know that only that the single bit specified by UnknownBits
   4889       // could be set on input to the CTLZ node. If this bit is set, the SRL
   4890       // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair
   4891       // to an SRL/XOR pair, which is likely to simplify more.
   4892       unsigned ShAmt = UnknownBits.countTrailingZeros();
   4893       SDValue Op = N0.getOperand(0);
   4894 
   4895       if (ShAmt) {
   4896         SDLoc DL(N0);
   4897         Op = DAG.getNode(ISD::SRL, DL, VT, Op,
   4898                   DAG.getConstant(ShAmt, DL,
   4899                                   getShiftAmountTy(Op.getValueType())));
   4900         AddToWorklist(Op.getNode());
   4901       }
   4902 
   4903       SDLoc DL(N);
   4904       return DAG.getNode(ISD::XOR, DL, VT,
   4905                          Op, DAG.getConstant(1, DL, VT));
   4906     }
   4907   }
   4908 
   4909   // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))).
   4910   if (N1.getOpcode() == ISD::TRUNCATE &&
   4911       N1.getOperand(0).getOpcode() == ISD::AND) {
   4912     if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()))
   4913       return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1);
   4914   }
   4915 
   4916   // fold operands of srl based on knowledge that the low bits are not
   4917   // demanded.
   4918   if (N1C && SimplifyDemandedBits(SDValue(N, 0)))
   4919     return SDValue(N, 0);
   4920 
   4921   if (N1C && !N1C->isOpaque())
   4922     if (SDValue NewSRL = visitShiftByConstant(N, N1C))
   4923       return NewSRL;
   4924 
   4925   // Attempt to convert a srl of a load into a narrower zero-extending load.
   4926   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   4927     return NarrowLoad;
   4928 
   4929   // Here is a common situation. We want to optimize:
   4930   //
   4931   //   %a = ...
   4932   //   %b = and i32 %a, 2
   4933   //   %c = srl i32 %b, 1
   4934   //   brcond i32 %c ...
   4935   //
   4936   // into
   4937   //
   4938   //   %a = ...
   4939   //   %b = and %a, 2
   4940   //   %c = setcc eq %b, 0
   4941   //   brcond %c ...
   4942   //
   4943   // However when after the source operand of SRL is optimized into AND, the SRL
   4944   // itself may not be optimized further. Look for it and add the BRCOND into
   4945   // the worklist.
   4946   if (N->hasOneUse()) {
   4947     SDNode *Use = *N->use_begin();
   4948     if (Use->getOpcode() == ISD::BRCOND)
   4949       AddToWorklist(Use);
   4950     else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) {
   4951       // Also look pass the truncate.
   4952       Use = *Use->use_begin();
   4953       if (Use->getOpcode() == ISD::BRCOND)
   4954         AddToWorklist(Use);
   4955     }
   4956   }
   4957 
   4958   return SDValue();
   4959 }
   4960 
   4961 SDValue DAGCombiner::visitBSWAP(SDNode *N) {
   4962   SDValue N0 = N->getOperand(0);
   4963   EVT VT = N->getValueType(0);
   4964 
   4965   // fold (bswap c1) -> c2
   4966   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   4967     return DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N0);
   4968   // fold (bswap (bswap x)) -> x
   4969   if (N0.getOpcode() == ISD::BSWAP)
   4970     return N0->getOperand(0);
   4971   return SDValue();
   4972 }
   4973 
   4974 SDValue DAGCombiner::visitBITREVERSE(SDNode *N) {
   4975   SDValue N0 = N->getOperand(0);
   4976 
   4977   // fold (bitreverse (bitreverse x)) -> x
   4978   if (N0.getOpcode() == ISD::BITREVERSE)
   4979     return N0.getOperand(0);
   4980   return SDValue();
   4981 }
   4982 
   4983 SDValue DAGCombiner::visitCTLZ(SDNode *N) {
   4984   SDValue N0 = N->getOperand(0);
   4985   EVT VT = N->getValueType(0);
   4986 
   4987   // fold (ctlz c1) -> c2
   4988   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   4989     return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0);
   4990   return SDValue();
   4991 }
   4992 
   4993 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) {
   4994   SDValue N0 = N->getOperand(0);
   4995   EVT VT = N->getValueType(0);
   4996 
   4997   // fold (ctlz_zero_undef c1) -> c2
   4998   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   4999     return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   5000   return SDValue();
   5001 }
   5002 
   5003 SDValue DAGCombiner::visitCTTZ(SDNode *N) {
   5004   SDValue N0 = N->getOperand(0);
   5005   EVT VT = N->getValueType(0);
   5006 
   5007   // fold (cttz c1) -> c2
   5008   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   5009     return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0);
   5010   return SDValue();
   5011 }
   5012 
   5013 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) {
   5014   SDValue N0 = N->getOperand(0);
   5015   EVT VT = N->getValueType(0);
   5016 
   5017   // fold (cttz_zero_undef c1) -> c2
   5018   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   5019     return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0);
   5020   return SDValue();
   5021 }
   5022 
   5023 SDValue DAGCombiner::visitCTPOP(SDNode *N) {
   5024   SDValue N0 = N->getOperand(0);
   5025   EVT VT = N->getValueType(0);
   5026 
   5027   // fold (ctpop c1) -> c2
   5028   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   5029     return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0);
   5030   return SDValue();
   5031 }
   5032 
   5033 
   5034 /// \brief Generate Min/Max node
   5035 static SDValue combineMinNumMaxNum(const SDLoc &DL, EVT VT, SDValue LHS,
   5036                                    SDValue RHS, SDValue True, SDValue False,
   5037                                    ISD::CondCode CC, const TargetLowering &TLI,
   5038                                    SelectionDAG &DAG) {
   5039   if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True))
   5040     return SDValue();
   5041 
   5042   switch (CC) {
   5043   case ISD::SETOLT:
   5044   case ISD::SETOLE:
   5045   case ISD::SETLT:
   5046   case ISD::SETLE:
   5047   case ISD::SETULT:
   5048   case ISD::SETULE: {
   5049     unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM;
   5050     if (TLI.isOperationLegal(Opcode, VT))
   5051       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   5052     return SDValue();
   5053   }
   5054   case ISD::SETOGT:
   5055   case ISD::SETOGE:
   5056   case ISD::SETGT:
   5057   case ISD::SETGE:
   5058   case ISD::SETUGT:
   5059   case ISD::SETUGE: {
   5060     unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM;
   5061     if (TLI.isOperationLegal(Opcode, VT))
   5062       return DAG.getNode(Opcode, DL, VT, LHS, RHS);
   5063     return SDValue();
   5064   }
   5065   default:
   5066     return SDValue();
   5067   }
   5068 }
   5069 
   5070 SDValue DAGCombiner::visitSELECT(SDNode *N) {
   5071   SDValue N0 = N->getOperand(0);
   5072   SDValue N1 = N->getOperand(1);
   5073   SDValue N2 = N->getOperand(2);
   5074   EVT VT = N->getValueType(0);
   5075   EVT VT0 = N0.getValueType();
   5076 
   5077   // fold (select C, X, X) -> X
   5078   if (N1 == N2)
   5079     return N1;
   5080   if (const ConstantSDNode *N0C = dyn_cast<const ConstantSDNode>(N0)) {
   5081     // fold (select true, X, Y) -> X
   5082     // fold (select false, X, Y) -> Y
   5083     return !N0C->isNullValue() ? N1 : N2;
   5084   }
   5085   // fold (select C, 1, X) -> (or C, X)
   5086   if (VT == MVT::i1 && isOneConstant(N1))
   5087     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   5088   // fold (select C, 0, 1) -> (xor C, 1)
   5089   // We can't do this reliably if integer based booleans have different contents
   5090   // to floating point based booleans. This is because we can't tell whether we
   5091   // have an integer-based boolean or a floating-point-based boolean unless we
   5092   // can find the SETCC that produced it and inspect its operands. This is
   5093   // fairly easy if C is the SETCC node, but it can potentially be
   5094   // undiscoverable (or not reasonably discoverable). For example, it could be
   5095   // in another basic block or it could require searching a complicated
   5096   // expression.
   5097   if (VT.isInteger() &&
   5098       (VT0 == MVT::i1 || (VT0.isInteger() &&
   5099                           TLI.getBooleanContents(false, false) ==
   5100                               TLI.getBooleanContents(false, true) &&
   5101                           TLI.getBooleanContents(false, false) ==
   5102                               TargetLowering::ZeroOrOneBooleanContent)) &&
   5103       isNullConstant(N1) && isOneConstant(N2)) {
   5104     SDValue XORNode;
   5105     if (VT == VT0) {
   5106       SDLoc DL(N);
   5107       return DAG.getNode(ISD::XOR, DL, VT0,
   5108                          N0, DAG.getConstant(1, DL, VT0));
   5109     }
   5110     SDLoc DL0(N0);
   5111     XORNode = DAG.getNode(ISD::XOR, DL0, VT0,
   5112                           N0, DAG.getConstant(1, DL0, VT0));
   5113     AddToWorklist(XORNode.getNode());
   5114     if (VT.bitsGT(VT0))
   5115       return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode);
   5116     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode);
   5117   }
   5118   // fold (select C, 0, X) -> (and (not C), X)
   5119   if (VT == VT0 && VT == MVT::i1 && isNullConstant(N1)) {
   5120     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   5121     AddToWorklist(NOTNode.getNode());
   5122     return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2);
   5123   }
   5124   // fold (select C, X, 1) -> (or (not C), X)
   5125   if (VT == VT0 && VT == MVT::i1 && isOneConstant(N2)) {
   5126     SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT);
   5127     AddToWorklist(NOTNode.getNode());
   5128     return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1);
   5129   }
   5130   // fold (select C, X, 0) -> (and C, X)
   5131   if (VT == MVT::i1 && isNullConstant(N2))
   5132     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   5133   // fold (select X, X, Y) -> (or X, Y)
   5134   // fold (select X, 1, Y) -> (or X, Y)
   5135   if (VT == MVT::i1 && (N0 == N1 || isOneConstant(N1)))
   5136     return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2);
   5137   // fold (select X, Y, X) -> (and X, Y)
   5138   // fold (select X, Y, 0) -> (and X, Y)
   5139   if (VT == MVT::i1 && (N0 == N2 || isNullConstant(N2)))
   5140     return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1);
   5141 
   5142   // If we can fold this based on the true/false value, do so.
   5143   if (SimplifySelectOps(N, N1, N2))
   5144     return SDValue(N, 0);  // Don't revisit N.
   5145 
   5146   if (VT0 == MVT::i1) {
   5147     // The code in this block deals with the following 2 equivalences:
   5148     //    select(C0|C1, x, y) <=> select(C0, x, select(C1, x, y))
   5149     //    select(C0&C1, x, y) <=> select(C0, select(C1, x, y), y)
   5150     // The target can specify its prefered form with the
   5151     // shouldNormalizeToSelectSequence() callback. However we always transform
   5152     // to the right anyway if we find the inner select exists in the DAG anyway
   5153     // and we always transform to the left side if we know that we can further
   5154     // optimize the combination of the conditions.
   5155     bool normalizeToSequence
   5156       = TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT);
   5157     // select (and Cond0, Cond1), X, Y
   5158     //   -> select Cond0, (select Cond1, X, Y), Y
   5159     if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) {
   5160       SDValue Cond0 = N0->getOperand(0);
   5161       SDValue Cond1 = N0->getOperand(1);
   5162       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
   5163                                         N1.getValueType(), Cond1, N1, N2);
   5164       if (normalizeToSequence || !InnerSelect.use_empty())
   5165         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0,
   5166                            InnerSelect, N2);
   5167     }
   5168     // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y)
   5169     if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) {
   5170       SDValue Cond0 = N0->getOperand(0);
   5171       SDValue Cond1 = N0->getOperand(1);
   5172       SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N),
   5173                                         N1.getValueType(), Cond1, N1, N2);
   5174       if (normalizeToSequence || !InnerSelect.use_empty())
   5175         return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1,
   5176                            InnerSelect);
   5177     }
   5178 
   5179     // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y
   5180     if (N1->getOpcode() == ISD::SELECT && N1->hasOneUse()) {
   5181       SDValue N1_0 = N1->getOperand(0);
   5182       SDValue N1_1 = N1->getOperand(1);
   5183       SDValue N1_2 = N1->getOperand(2);
   5184       if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) {
   5185         // Create the actual and node if we can generate good code for it.
   5186         if (!normalizeToSequence) {
   5187           SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(),
   5188                                     N0, N1_0);
   5189           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And,
   5190                              N1_1, N2);
   5191         }
   5192         // Otherwise see if we can optimize the "and" to a better pattern.
   5193         if (SDValue Combined = visitANDLike(N0, N1_0, N))
   5194           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
   5195                              N1_1, N2);
   5196       }
   5197     }
   5198     // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y
   5199     if (N2->getOpcode() == ISD::SELECT && N2->hasOneUse()) {
   5200       SDValue N2_0 = N2->getOperand(0);
   5201       SDValue N2_1 = N2->getOperand(1);
   5202       SDValue N2_2 = N2->getOperand(2);
   5203       if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) {
   5204         // Create the actual or node if we can generate good code for it.
   5205         if (!normalizeToSequence) {
   5206           SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(),
   5207                                    N0, N2_0);
   5208           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or,
   5209                              N1, N2_2);
   5210         }
   5211         // Otherwise see if we can optimize to a better pattern.
   5212         if (SDValue Combined = visitORLike(N0, N2_0, N))
   5213           return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined,
   5214                              N1, N2_2);
   5215       }
   5216     }
   5217   }
   5218 
   5219   // fold selects based on a setcc into other things, such as min/max/abs
   5220   if (N0.getOpcode() == ISD::SETCC) {
   5221     // select x, y (fcmp lt x, y) -> fminnum x, y
   5222     // select x, y (fcmp gt x, y) -> fmaxnum x, y
   5223     //
   5224     // This is OK if we don't care about what happens if either operand is a
   5225     // NaN.
   5226     //
   5227 
   5228     // FIXME: Instead of testing for UnsafeFPMath, this should be checking for
   5229     // no signed zeros as well as no nans.
   5230     const TargetOptions &Options = DAG.getTarget().Options;
   5231     if (Options.UnsafeFPMath &&
   5232         VT.isFloatingPoint() && N0.hasOneUse() &&
   5233         DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) {
   5234       ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   5235 
   5236       if (SDValue FMinMax = combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0),
   5237                                                 N0.getOperand(1), N1, N2, CC,
   5238                                                 TLI, DAG))
   5239         return FMinMax;
   5240     }
   5241 
   5242     if ((!LegalOperations &&
   5243          TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) ||
   5244         TLI.isOperationLegal(ISD::SELECT_CC, VT))
   5245       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT,
   5246                          N0.getOperand(0), N0.getOperand(1),
   5247                          N1, N2, N0.getOperand(2));
   5248     return SimplifySelect(SDLoc(N), N0, N1, N2);
   5249   }
   5250 
   5251   return SDValue();
   5252 }
   5253 
   5254 static
   5255 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) {
   5256   SDLoc DL(N);
   5257   EVT LoVT, HiVT;
   5258   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0));
   5259 
   5260   // Split the inputs.
   5261   SDValue Lo, Hi, LL, LH, RL, RH;
   5262   std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0);
   5263   std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1);
   5264 
   5265   Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2));
   5266   Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2));
   5267 
   5268   return std::make_pair(Lo, Hi);
   5269 }
   5270 
   5271 // This function assumes all the vselect's arguments are CONCAT_VECTOR
   5272 // nodes and that the condition is a BV of ConstantSDNodes (or undefs).
   5273 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) {
   5274   SDLoc dl(N);
   5275   SDValue Cond = N->getOperand(0);
   5276   SDValue LHS = N->getOperand(1);
   5277   SDValue RHS = N->getOperand(2);
   5278   EVT VT = N->getValueType(0);
   5279   int NumElems = VT.getVectorNumElements();
   5280   assert(LHS.getOpcode() == ISD::CONCAT_VECTORS &&
   5281          RHS.getOpcode() == ISD::CONCAT_VECTORS &&
   5282          Cond.getOpcode() == ISD::BUILD_VECTOR);
   5283 
   5284   // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about
   5285   // binary ones here.
   5286   if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2)
   5287     return SDValue();
   5288 
   5289   // We're sure we have an even number of elements due to the
   5290   // concat_vectors we have as arguments to vselect.
   5291   // Skip BV elements until we find one that's not an UNDEF
   5292   // After we find an UNDEF element, keep looping until we get to half the
   5293   // length of the BV and see if all the non-undef nodes are the same.
   5294   ConstantSDNode *BottomHalf = nullptr;
   5295   for (int i = 0; i < NumElems / 2; ++i) {
   5296     if (Cond->getOperand(i)->isUndef())
   5297       continue;
   5298 
   5299     if (BottomHalf == nullptr)
   5300       BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   5301     else if (Cond->getOperand(i).getNode() != BottomHalf)
   5302       return SDValue();
   5303   }
   5304 
   5305   // Do the same for the second half of the BuildVector
   5306   ConstantSDNode *TopHalf = nullptr;
   5307   for (int i = NumElems / 2; i < NumElems; ++i) {
   5308     if (Cond->getOperand(i)->isUndef())
   5309       continue;
   5310 
   5311     if (TopHalf == nullptr)
   5312       TopHalf = cast<ConstantSDNode>(Cond.getOperand(i));
   5313     else if (Cond->getOperand(i).getNode() != TopHalf)
   5314       return SDValue();
   5315   }
   5316 
   5317   assert(TopHalf && BottomHalf &&
   5318          "One half of the selector was all UNDEFs and the other was all the "
   5319          "same value. This should have been addressed before this function.");
   5320   return DAG.getNode(
   5321       ISD::CONCAT_VECTORS, dl, VT,
   5322       BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0),
   5323       TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1));
   5324 }
   5325 
   5326 SDValue DAGCombiner::visitMSCATTER(SDNode *N) {
   5327 
   5328   if (Level >= AfterLegalizeTypes)
   5329     return SDValue();
   5330 
   5331   MaskedScatterSDNode *MSC = cast<MaskedScatterSDNode>(N);
   5332   SDValue Mask = MSC->getMask();
   5333   SDValue Data  = MSC->getValue();
   5334   SDLoc DL(N);
   5335 
   5336   // If the MSCATTER data type requires splitting and the mask is provided by a
   5337   // SETCC, then split both nodes and its operands before legalization. This
   5338   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5339   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5340   if (Mask.getOpcode() != ISD::SETCC)
   5341     return SDValue();
   5342 
   5343   // Check if any splitting is required.
   5344   if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
   5345       TargetLowering::TypeSplitVector)
   5346     return SDValue();
   5347   SDValue MaskLo, MaskHi, Lo, Hi;
   5348   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5349 
   5350   EVT LoVT, HiVT;
   5351   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MSC->getValueType(0));
   5352 
   5353   SDValue Chain = MSC->getChain();
   5354 
   5355   EVT MemoryVT = MSC->getMemoryVT();
   5356   unsigned Alignment = MSC->getOriginalAlignment();
   5357 
   5358   EVT LoMemVT, HiMemVT;
   5359   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5360 
   5361   SDValue DataLo, DataHi;
   5362   std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   5363 
   5364   SDValue BasePtr = MSC->getBasePtr();
   5365   SDValue IndexLo, IndexHi;
   5366   std::tie(IndexLo, IndexHi) = DAG.SplitVector(MSC->getIndex(), DL);
   5367 
   5368   MachineMemOperand *MMO = DAG.getMachineFunction().
   5369     getMachineMemOperand(MSC->getPointerInfo(),
   5370                           MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   5371                           Alignment, MSC->getAAInfo(), MSC->getRanges());
   5372 
   5373   SDValue OpsLo[] = { Chain, DataLo, MaskLo, BasePtr, IndexLo };
   5374   Lo = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataLo.getValueType(),
   5375                             DL, OpsLo, MMO);
   5376 
   5377   SDValue OpsHi[] = {Chain, DataHi, MaskHi, BasePtr, IndexHi};
   5378   Hi = DAG.getMaskedScatter(DAG.getVTList(MVT::Other), DataHi.getValueType(),
   5379                             DL, OpsHi, MMO);
   5380 
   5381   AddToWorklist(Lo.getNode());
   5382   AddToWorklist(Hi.getNode());
   5383 
   5384   return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   5385 }
   5386 
   5387 SDValue DAGCombiner::visitMSTORE(SDNode *N) {
   5388 
   5389   if (Level >= AfterLegalizeTypes)
   5390     return SDValue();
   5391 
   5392   MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N);
   5393   SDValue Mask = MST->getMask();
   5394   SDValue Data  = MST->getValue();
   5395   SDLoc DL(N);
   5396 
   5397   // If the MSTORE data type requires splitting and the mask is provided by a
   5398   // SETCC, then split both nodes and its operands before legalization. This
   5399   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5400   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5401   if (Mask.getOpcode() == ISD::SETCC) {
   5402 
   5403     // Check if any splitting is required.
   5404     if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) !=
   5405         TargetLowering::TypeSplitVector)
   5406       return SDValue();
   5407 
   5408     SDValue MaskLo, MaskHi, Lo, Hi;
   5409     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5410 
   5411     EVT LoVT, HiVT;
   5412     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0));
   5413 
   5414     SDValue Chain = MST->getChain();
   5415     SDValue Ptr   = MST->getBasePtr();
   5416 
   5417     EVT MemoryVT = MST->getMemoryVT();
   5418     unsigned Alignment = MST->getOriginalAlignment();
   5419 
   5420     // if Alignment is equal to the vector size,
   5421     // take the half of it for the second part
   5422     unsigned SecondHalfAlignment =
   5423       (Alignment == Data->getValueType(0).getSizeInBits()/8) ?
   5424          Alignment/2 : Alignment;
   5425 
   5426     EVT LoMemVT, HiMemVT;
   5427     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5428 
   5429     SDValue DataLo, DataHi;
   5430     std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL);
   5431 
   5432     MachineMemOperand *MMO = DAG.getMachineFunction().
   5433       getMachineMemOperand(MST->getPointerInfo(),
   5434                            MachineMemOperand::MOStore,  LoMemVT.getStoreSize(),
   5435                            Alignment, MST->getAAInfo(), MST->getRanges());
   5436 
   5437     Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO,
   5438                             MST->isTruncatingStore());
   5439 
   5440     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   5441     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   5442                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
   5443 
   5444     MMO = DAG.getMachineFunction().
   5445       getMachineMemOperand(MST->getPointerInfo(),
   5446                            MachineMemOperand::MOStore,  HiMemVT.getStoreSize(),
   5447                            SecondHalfAlignment, MST->getAAInfo(),
   5448                            MST->getRanges());
   5449 
   5450     Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO,
   5451                             MST->isTruncatingStore());
   5452 
   5453     AddToWorklist(Lo.getNode());
   5454     AddToWorklist(Hi.getNode());
   5455 
   5456     return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi);
   5457   }
   5458   return SDValue();
   5459 }
   5460 
   5461 SDValue DAGCombiner::visitMGATHER(SDNode *N) {
   5462 
   5463   if (Level >= AfterLegalizeTypes)
   5464     return SDValue();
   5465 
   5466   MaskedGatherSDNode *MGT = dyn_cast<MaskedGatherSDNode>(N);
   5467   SDValue Mask = MGT->getMask();
   5468   SDLoc DL(N);
   5469 
   5470   // If the MGATHER result requires splitting and the mask is provided by a
   5471   // SETCC, then split both nodes and its operands before legalization. This
   5472   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5473   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5474 
   5475   if (Mask.getOpcode() != ISD::SETCC)
   5476     return SDValue();
   5477 
   5478   EVT VT = N->getValueType(0);
   5479 
   5480   // Check if any splitting is required.
   5481   if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5482       TargetLowering::TypeSplitVector)
   5483     return SDValue();
   5484 
   5485   SDValue MaskLo, MaskHi, Lo, Hi;
   5486   std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5487 
   5488   SDValue Src0 = MGT->getValue();
   5489   SDValue Src0Lo, Src0Hi;
   5490   std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   5491 
   5492   EVT LoVT, HiVT;
   5493   std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(VT);
   5494 
   5495   SDValue Chain = MGT->getChain();
   5496   EVT MemoryVT = MGT->getMemoryVT();
   5497   unsigned Alignment = MGT->getOriginalAlignment();
   5498 
   5499   EVT LoMemVT, HiMemVT;
   5500   std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5501 
   5502   SDValue BasePtr = MGT->getBasePtr();
   5503   SDValue Index = MGT->getIndex();
   5504   SDValue IndexLo, IndexHi;
   5505   std::tie(IndexLo, IndexHi) = DAG.SplitVector(Index, DL);
   5506 
   5507   MachineMemOperand *MMO = DAG.getMachineFunction().
   5508     getMachineMemOperand(MGT->getPointerInfo(),
   5509                           MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   5510                           Alignment, MGT->getAAInfo(), MGT->getRanges());
   5511 
   5512   SDValue OpsLo[] = { Chain, Src0Lo, MaskLo, BasePtr, IndexLo };
   5513   Lo = DAG.getMaskedGather(DAG.getVTList(LoVT, MVT::Other), LoVT, DL, OpsLo,
   5514                             MMO);
   5515 
   5516   SDValue OpsHi[] = {Chain, Src0Hi, MaskHi, BasePtr, IndexHi};
   5517   Hi = DAG.getMaskedGather(DAG.getVTList(HiVT, MVT::Other), HiVT, DL, OpsHi,
   5518                             MMO);
   5519 
   5520   AddToWorklist(Lo.getNode());
   5521   AddToWorklist(Hi.getNode());
   5522 
   5523   // Build a factor node to remember that this load is independent of the
   5524   // other one.
   5525   Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   5526                       Hi.getValue(1));
   5527 
   5528   // Legalized the chain result - switch anything that used the old chain to
   5529   // use the new one.
   5530   DAG.ReplaceAllUsesOfValueWith(SDValue(MGT, 1), Chain);
   5531 
   5532   SDValue GatherRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5533 
   5534   SDValue RetOps[] = { GatherRes, Chain };
   5535   return DAG.getMergeValues(RetOps, DL);
   5536 }
   5537 
   5538 SDValue DAGCombiner::visitMLOAD(SDNode *N) {
   5539 
   5540   if (Level >= AfterLegalizeTypes)
   5541     return SDValue();
   5542 
   5543   MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N);
   5544   SDValue Mask = MLD->getMask();
   5545   SDLoc DL(N);
   5546 
   5547   // If the MLOAD result requires splitting and the mask is provided by a
   5548   // SETCC, then split both nodes and its operands before legalization. This
   5549   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5550   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5551 
   5552   if (Mask.getOpcode() == ISD::SETCC) {
   5553     EVT VT = N->getValueType(0);
   5554 
   5555     // Check if any splitting is required.
   5556     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5557         TargetLowering::TypeSplitVector)
   5558       return SDValue();
   5559 
   5560     SDValue MaskLo, MaskHi, Lo, Hi;
   5561     std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG);
   5562 
   5563     SDValue Src0 = MLD->getSrc0();
   5564     SDValue Src0Lo, Src0Hi;
   5565     std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL);
   5566 
   5567     EVT LoVT, HiVT;
   5568     std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0));
   5569 
   5570     SDValue Chain = MLD->getChain();
   5571     SDValue Ptr   = MLD->getBasePtr();
   5572     EVT MemoryVT = MLD->getMemoryVT();
   5573     unsigned Alignment = MLD->getOriginalAlignment();
   5574 
   5575     // if Alignment is equal to the vector size,
   5576     // take the half of it for the second part
   5577     unsigned SecondHalfAlignment =
   5578       (Alignment == MLD->getValueType(0).getSizeInBits()/8) ?
   5579          Alignment/2 : Alignment;
   5580 
   5581     EVT LoMemVT, HiMemVT;
   5582     std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT);
   5583 
   5584     MachineMemOperand *MMO = DAG.getMachineFunction().
   5585     getMachineMemOperand(MLD->getPointerInfo(),
   5586                          MachineMemOperand::MOLoad,  LoMemVT.getStoreSize(),
   5587                          Alignment, MLD->getAAInfo(), MLD->getRanges());
   5588 
   5589     Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO,
   5590                            ISD::NON_EXTLOAD);
   5591 
   5592     unsigned IncrementSize = LoMemVT.getSizeInBits()/8;
   5593     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   5594                       DAG.getConstant(IncrementSize, DL, Ptr.getValueType()));
   5595 
   5596     MMO = DAG.getMachineFunction().
   5597     getMachineMemOperand(MLD->getPointerInfo(),
   5598                          MachineMemOperand::MOLoad,  HiMemVT.getStoreSize(),
   5599                          SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges());
   5600 
   5601     Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO,
   5602                            ISD::NON_EXTLOAD);
   5603 
   5604     AddToWorklist(Lo.getNode());
   5605     AddToWorklist(Hi.getNode());
   5606 
   5607     // Build a factor node to remember that this load is independent of the
   5608     // other one.
   5609     Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1),
   5610                         Hi.getValue(1));
   5611 
   5612     // Legalized the chain result - switch anything that used the old chain to
   5613     // use the new one.
   5614     DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain);
   5615 
   5616     SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5617 
   5618     SDValue RetOps[] = { LoadRes, Chain };
   5619     return DAG.getMergeValues(RetOps, DL);
   5620   }
   5621   return SDValue();
   5622 }
   5623 
   5624 SDValue DAGCombiner::visitVSELECT(SDNode *N) {
   5625   SDValue N0 = N->getOperand(0);
   5626   SDValue N1 = N->getOperand(1);
   5627   SDValue N2 = N->getOperand(2);
   5628   SDLoc DL(N);
   5629 
   5630   // Canonicalize integer abs.
   5631   // vselect (setg[te] X,  0),  X, -X ->
   5632   // vselect (setgt    X, -1),  X, -X ->
   5633   // vselect (setl[te] X,  0), -X,  X ->
   5634   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   5635   if (N0.getOpcode() == ISD::SETCC) {
   5636     SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1);
   5637     ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   5638     bool isAbs = false;
   5639     bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode());
   5640 
   5641     if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   5642          (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) &&
   5643         N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1))
   5644       isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode());
   5645     else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) &&
   5646              N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1))
   5647       isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode());
   5648 
   5649     if (isAbs) {
   5650       EVT VT = LHS.getValueType();
   5651       SDValue Shift = DAG.getNode(
   5652           ISD::SRA, DL, VT, LHS,
   5653           DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, DL, VT));
   5654       SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift);
   5655       AddToWorklist(Shift.getNode());
   5656       AddToWorklist(Add.getNode());
   5657       return DAG.getNode(ISD::XOR, DL, VT, Add, Shift);
   5658     }
   5659   }
   5660 
   5661   if (SimplifySelectOps(N, N1, N2))
   5662     return SDValue(N, 0);  // Don't revisit N.
   5663 
   5664   // If the VSELECT result requires splitting and the mask is provided by a
   5665   // SETCC, then split both nodes and its operands before legalization. This
   5666   // prevents the type legalizer from unrolling SETCC into scalar comparisons
   5667   // and enables future optimizations (e.g. min/max pattern matching on X86).
   5668   if (N0.getOpcode() == ISD::SETCC) {
   5669     EVT VT = N->getValueType(0);
   5670 
   5671     // Check if any splitting is required.
   5672     if (TLI.getTypeAction(*DAG.getContext(), VT) !=
   5673         TargetLowering::TypeSplitVector)
   5674       return SDValue();
   5675 
   5676     SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH;
   5677     std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG);
   5678     std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1);
   5679     std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2);
   5680 
   5681     Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL);
   5682     Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH);
   5683 
   5684     // Add the new VSELECT nodes to the work list in case they need to be split
   5685     // again.
   5686     AddToWorklist(Lo.getNode());
   5687     AddToWorklist(Hi.getNode());
   5688 
   5689     return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi);
   5690   }
   5691 
   5692   // Fold (vselect (build_vector all_ones), N1, N2) -> N1
   5693   if (ISD::isBuildVectorAllOnes(N0.getNode()))
   5694     return N1;
   5695   // Fold (vselect (build_vector all_zeros), N1, N2) -> N2
   5696   if (ISD::isBuildVectorAllZeros(N0.getNode()))
   5697     return N2;
   5698 
   5699   // The ConvertSelectToConcatVector function is assuming both the above
   5700   // checks for (vselect (build_vector all{ones,zeros) ...) have been made
   5701   // and addressed.
   5702   if (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   5703       N2.getOpcode() == ISD::CONCAT_VECTORS &&
   5704       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) {
   5705     if (SDValue CV = ConvertSelectToConcatVector(N, DAG))
   5706       return CV;
   5707   }
   5708 
   5709   return SDValue();
   5710 }
   5711 
   5712 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) {
   5713   SDValue N0 = N->getOperand(0);
   5714   SDValue N1 = N->getOperand(1);
   5715   SDValue N2 = N->getOperand(2);
   5716   SDValue N3 = N->getOperand(3);
   5717   SDValue N4 = N->getOperand(4);
   5718   ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get();
   5719 
   5720   // fold select_cc lhs, rhs, x, x, cc -> x
   5721   if (N2 == N3)
   5722     return N2;
   5723 
   5724   // Determine if the condition we're dealing with is constant
   5725   if (SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1,
   5726                                   CC, SDLoc(N), false)) {
   5727     AddToWorklist(SCC.getNode());
   5728 
   5729     if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) {
   5730       if (!SCCC->isNullValue())
   5731         return N2;    // cond always true -> true val
   5732       else
   5733         return N3;    // cond always false -> false val
   5734     } else if (SCC->isUndef()) {
   5735       // When the condition is UNDEF, just return the first operand. This is
   5736       // coherent the DAG creation, no setcc node is created in this case
   5737       return N2;
   5738     } else if (SCC.getOpcode() == ISD::SETCC) {
   5739       // Fold to a simpler select_cc
   5740       return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(),
   5741                          SCC.getOperand(0), SCC.getOperand(1), N2, N3,
   5742                          SCC.getOperand(2));
   5743     }
   5744   }
   5745 
   5746   // If we can fold this based on the true/false value, do so.
   5747   if (SimplifySelectOps(N, N2, N3))
   5748     return SDValue(N, 0);  // Don't revisit N.
   5749 
   5750   // fold select_cc into other things, such as min/max/abs
   5751   return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC);
   5752 }
   5753 
   5754 SDValue DAGCombiner::visitSETCC(SDNode *N) {
   5755   return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1),
   5756                        cast<CondCodeSDNode>(N->getOperand(2))->get(),
   5757                        SDLoc(N));
   5758 }
   5759 
   5760 SDValue DAGCombiner::visitSETCCE(SDNode *N) {
   5761   SDValue LHS = N->getOperand(0);
   5762   SDValue RHS = N->getOperand(1);
   5763   SDValue Carry = N->getOperand(2);
   5764   SDValue Cond = N->getOperand(3);
   5765 
   5766   // If Carry is false, fold to a regular SETCC.
   5767   if (Carry.getOpcode() == ISD::CARRY_FALSE)
   5768     return DAG.getNode(ISD::SETCC, SDLoc(N), N->getVTList(), LHS, RHS, Cond);
   5769 
   5770   return SDValue();
   5771 }
   5772 
   5773 /// Try to fold a sext/zext/aext dag node into a ConstantSDNode or
   5774 /// a build_vector of constants.
   5775 /// This function is called by the DAGCombiner when visiting sext/zext/aext
   5776 /// dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND).
   5777 /// Vector extends are not folded if operations are legal; this is to
   5778 /// avoid introducing illegal build_vector dag nodes.
   5779 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI,
   5780                                          SelectionDAG &DAG, bool LegalTypes,
   5781                                          bool LegalOperations) {
   5782   unsigned Opcode = N->getOpcode();
   5783   SDValue N0 = N->getOperand(0);
   5784   EVT VT = N->getValueType(0);
   5785 
   5786   assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND ||
   5787          Opcode == ISD::ANY_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG ||
   5788          Opcode == ISD::ZERO_EXTEND_VECTOR_INREG)
   5789          && "Expected EXTEND dag node in input!");
   5790 
   5791   // fold (sext c1) -> c1
   5792   // fold (zext c1) -> c1
   5793   // fold (aext c1) -> c1
   5794   if (isa<ConstantSDNode>(N0))
   5795     return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode();
   5796 
   5797   // fold (sext (build_vector AllConstants) -> (build_vector AllConstants)
   5798   // fold (zext (build_vector AllConstants) -> (build_vector AllConstants)
   5799   // fold (aext (build_vector AllConstants) -> (build_vector AllConstants)
   5800   EVT SVT = VT.getScalarType();
   5801   if (!(VT.isVector() &&
   5802       (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) &&
   5803       ISD::isBuildVectorOfConstantSDNodes(N0.getNode())))
   5804     return nullptr;
   5805 
   5806   // We can fold this node into a build_vector.
   5807   unsigned VTBits = SVT.getSizeInBits();
   5808   unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits();
   5809   SmallVector<SDValue, 8> Elts;
   5810   unsigned NumElts = VT.getVectorNumElements();
   5811   SDLoc DL(N);
   5812 
   5813   for (unsigned i=0; i != NumElts; ++i) {
   5814     SDValue Op = N0->getOperand(i);
   5815     if (Op->isUndef()) {
   5816       Elts.push_back(DAG.getUNDEF(SVT));
   5817       continue;
   5818     }
   5819 
   5820     SDLoc DL(Op);
   5821     // Get the constant value and if needed trunc it to the size of the type.
   5822     // Nodes like build_vector might have constants wider than the scalar type.
   5823     APInt C = cast<ConstantSDNode>(Op)->getAPIntValue().zextOrTrunc(EVTBits);
   5824     if (Opcode == ISD::SIGN_EXTEND || Opcode == ISD::SIGN_EXTEND_VECTOR_INREG)
   5825       Elts.push_back(DAG.getConstant(C.sext(VTBits), DL, SVT));
   5826     else
   5827       Elts.push_back(DAG.getConstant(C.zext(VTBits), DL, SVT));
   5828   }
   5829 
   5830   return DAG.getBuildVector(VT, DL, Elts).getNode();
   5831 }
   5832 
   5833 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this:
   5834 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))"
   5835 // transformation. Returns true if extension are possible and the above
   5836 // mentioned transformation is profitable.
   5837 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0,
   5838                                     unsigned ExtOpc,
   5839                                     SmallVectorImpl<SDNode *> &ExtendNodes,
   5840                                     const TargetLowering &TLI) {
   5841   bool HasCopyToRegUses = false;
   5842   bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType());
   5843   for (SDNode::use_iterator UI = N0.getNode()->use_begin(),
   5844                             UE = N0.getNode()->use_end();
   5845        UI != UE; ++UI) {
   5846     SDNode *User = *UI;
   5847     if (User == N)
   5848       continue;
   5849     if (UI.getUse().getResNo() != N0.getResNo())
   5850       continue;
   5851     // FIXME: Only extend SETCC N, N and SETCC N, c for now.
   5852     if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) {
   5853       ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get();
   5854       if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC))
   5855         // Sign bits will be lost after a zext.
   5856         return false;
   5857       bool Add = false;
   5858       for (unsigned i = 0; i != 2; ++i) {
   5859         SDValue UseOp = User->getOperand(i);
   5860         if (UseOp == N0)
   5861           continue;
   5862         if (!isa<ConstantSDNode>(UseOp))
   5863           return false;
   5864         Add = true;
   5865       }
   5866       if (Add)
   5867         ExtendNodes.push_back(User);
   5868       continue;
   5869     }
   5870     // If truncates aren't free and there are users we can't
   5871     // extend, it isn't worthwhile.
   5872     if (!isTruncFree)
   5873       return false;
   5874     // Remember if this value is live-out.
   5875     if (User->getOpcode() == ISD::CopyToReg)
   5876       HasCopyToRegUses = true;
   5877   }
   5878 
   5879   if (HasCopyToRegUses) {
   5880     bool BothLiveOut = false;
   5881     for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end();
   5882          UI != UE; ++UI) {
   5883       SDUse &Use = UI.getUse();
   5884       if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) {
   5885         BothLiveOut = true;
   5886         break;
   5887       }
   5888     }
   5889     if (BothLiveOut)
   5890       // Both unextended and extended values are live out. There had better be
   5891       // a good reason for the transformation.
   5892       return ExtendNodes.size();
   5893   }
   5894   return true;
   5895 }
   5896 
   5897 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs,
   5898                                   SDValue Trunc, SDValue ExtLoad,
   5899                                   const SDLoc &DL, ISD::NodeType ExtType) {
   5900   // Extend SetCC uses if necessary.
   5901   for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) {
   5902     SDNode *SetCC = SetCCs[i];
   5903     SmallVector<SDValue, 4> Ops;
   5904 
   5905     for (unsigned j = 0; j != 2; ++j) {
   5906       SDValue SOp = SetCC->getOperand(j);
   5907       if (SOp == Trunc)
   5908         Ops.push_back(ExtLoad);
   5909       else
   5910         Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp));
   5911     }
   5912 
   5913     Ops.push_back(SetCC->getOperand(2));
   5914     CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops));
   5915   }
   5916 }
   5917 
   5918 // FIXME: Bring more similar combines here, common to sext/zext (maybe aext?).
   5919 SDValue DAGCombiner::CombineExtLoad(SDNode *N) {
   5920   SDValue N0 = N->getOperand(0);
   5921   EVT DstVT = N->getValueType(0);
   5922   EVT SrcVT = N0.getValueType();
   5923 
   5924   assert((N->getOpcode() == ISD::SIGN_EXTEND ||
   5925           N->getOpcode() == ISD::ZERO_EXTEND) &&
   5926          "Unexpected node type (not an extend)!");
   5927 
   5928   // fold (sext (load x)) to multiple smaller sextloads; same for zext.
   5929   // For example, on a target with legal v4i32, but illegal v8i32, turn:
   5930   //   (v8i32 (sext (v8i16 (load x))))
   5931   // into:
   5932   //   (v8i32 (concat_vectors (v4i32 (sextload x)),
   5933   //                          (v4i32 (sextload (x + 16)))))
   5934   // Where uses of the original load, i.e.:
   5935   //   (v8i16 (load x))
   5936   // are replaced with:
   5937   //   (v8i16 (truncate
   5938   //     (v8i32 (concat_vectors (v4i32 (sextload x)),
   5939   //                            (v4i32 (sextload (x + 16)))))))
   5940   //
   5941   // This combine is only applicable to illegal, but splittable, vectors.
   5942   // All legal types, and illegal non-vector types, are handled elsewhere.
   5943   // This combine is controlled by TargetLowering::isVectorLoadExtDesirable.
   5944   //
   5945   if (N0->getOpcode() != ISD::LOAD)
   5946     return SDValue();
   5947 
   5948   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   5949 
   5950   if (!ISD::isNON_EXTLoad(LN0) || !ISD::isUNINDEXEDLoad(LN0) ||
   5951       !N0.hasOneUse() || LN0->isVolatile() || !DstVT.isVector() ||
   5952       !DstVT.isPow2VectorType() || !TLI.isVectorLoadExtDesirable(SDValue(N, 0)))
   5953     return SDValue();
   5954 
   5955   SmallVector<SDNode *, 4> SetCCs;
   5956   if (!ExtendUsesToFormExtLoad(N, N0, N->getOpcode(), SetCCs, TLI))
   5957     return SDValue();
   5958 
   5959   ISD::LoadExtType ExtType =
   5960       N->getOpcode() == ISD::SIGN_EXTEND ? ISD::SEXTLOAD : ISD::ZEXTLOAD;
   5961 
   5962   // Try to split the vector types to get down to legal types.
   5963   EVT SplitSrcVT = SrcVT;
   5964   EVT SplitDstVT = DstVT;
   5965   while (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT) &&
   5966          SplitSrcVT.getVectorNumElements() > 1) {
   5967     SplitDstVT = DAG.GetSplitDestVTs(SplitDstVT).first;
   5968     SplitSrcVT = DAG.GetSplitDestVTs(SplitSrcVT).first;
   5969   }
   5970 
   5971   if (!TLI.isLoadExtLegalOrCustom(ExtType, SplitDstVT, SplitSrcVT))
   5972     return SDValue();
   5973 
   5974   SDLoc DL(N);
   5975   const unsigned NumSplits =
   5976       DstVT.getVectorNumElements() / SplitDstVT.getVectorNumElements();
   5977   const unsigned Stride = SplitSrcVT.getStoreSize();
   5978   SmallVector<SDValue, 4> Loads;
   5979   SmallVector<SDValue, 4> Chains;
   5980 
   5981   SDValue BasePtr = LN0->getBasePtr();
   5982   for (unsigned Idx = 0; Idx < NumSplits; Idx++) {
   5983     const unsigned Offset = Idx * Stride;
   5984     const unsigned Align = MinAlign(LN0->getAlignment(), Offset);
   5985 
   5986     SDValue SplitLoad = DAG.getExtLoad(
   5987         ExtType, DL, SplitDstVT, LN0->getChain(), BasePtr,
   5988         LN0->getPointerInfo().getWithOffset(Offset), SplitSrcVT,
   5989         LN0->isVolatile(), LN0->isNonTemporal(), LN0->isInvariant(),
   5990         Align, LN0->getAAInfo());
   5991 
   5992     BasePtr = DAG.getNode(ISD::ADD, DL, BasePtr.getValueType(), BasePtr,
   5993                           DAG.getConstant(Stride, DL, BasePtr.getValueType()));
   5994 
   5995     Loads.push_back(SplitLoad.getValue(0));
   5996     Chains.push_back(SplitLoad.getValue(1));
   5997   }
   5998 
   5999   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
   6000   SDValue NewValue = DAG.getNode(ISD::CONCAT_VECTORS, DL, DstVT, Loads);
   6001 
   6002   CombineTo(N, NewValue);
   6003 
   6004   // Replace uses of the original load (before extension)
   6005   // with a truncate of the concatenated sextloaded vectors.
   6006   SDValue Trunc =
   6007       DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), NewValue);
   6008   CombineTo(N0.getNode(), Trunc, NewChain);
   6009   ExtendSetCCUses(SetCCs, Trunc, NewValue, DL,
   6010                   (ISD::NodeType)N->getOpcode());
   6011   return SDValue(N, 0); // Return N so it doesn't get rechecked!
   6012 }
   6013 
   6014 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
   6015   SDValue N0 = N->getOperand(0);
   6016   EVT VT = N->getValueType(0);
   6017 
   6018   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   6019                                               LegalOperations))
   6020     return SDValue(Res, 0);
   6021 
   6022   // fold (sext (sext x)) -> (sext x)
   6023   // fold (sext (aext x)) -> (sext x)
   6024   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   6025     return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT,
   6026                        N0.getOperand(0));
   6027 
   6028   if (N0.getOpcode() == ISD::TRUNCATE) {
   6029     // fold (sext (truncate (load x))) -> (sext (smaller load x))
   6030     // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n)))
   6031     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6032       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   6033       if (NarrowLoad.getNode() != N0.getNode()) {
   6034         CombineTo(N0.getNode(), NarrowLoad);
   6035         // CombineTo deleted the truncate, if needed, but not what's under it.
   6036         AddToWorklist(oye);
   6037       }
   6038       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6039     }
   6040 
   6041     // See if the value being truncated is already sign extended.  If so, just
   6042     // eliminate the trunc/sext pair.
   6043     SDValue Op = N0.getOperand(0);
   6044     unsigned OpBits   = Op.getValueType().getScalarType().getSizeInBits();
   6045     unsigned MidBits  = N0.getValueType().getScalarType().getSizeInBits();
   6046     unsigned DestBits = VT.getScalarType().getSizeInBits();
   6047     unsigned NumSignBits = DAG.ComputeNumSignBits(Op);
   6048 
   6049     if (OpBits == DestBits) {
   6050       // Op is i32, Mid is i8, and Dest is i32.  If Op has more than 24 sign
   6051       // bits, it is already ready.
   6052       if (NumSignBits > DestBits-MidBits)
   6053         return Op;
   6054     } else if (OpBits < DestBits) {
   6055       // Op is i32, Mid is i8, and Dest is i64.  If Op has more than 24 sign
   6056       // bits, just sext from i32.
   6057       if (NumSignBits > OpBits-MidBits)
   6058         return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op);
   6059     } else {
   6060       // Op is i64, Mid is i8, and Dest is i32.  If Op has more than 56 sign
   6061       // bits, just truncate to i32.
   6062       if (NumSignBits > OpBits-MidBits)
   6063         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6064     }
   6065 
   6066     // fold (sext (truncate x)) -> (sextinreg x).
   6067     if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG,
   6068                                                  N0.getValueType())) {
   6069       if (OpBits < DestBits)
   6070         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op);
   6071       else if (OpBits > DestBits)
   6072         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op);
   6073       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op,
   6074                          DAG.getValueType(N0.getValueType()));
   6075     }
   6076   }
   6077 
   6078   // fold (sext (load x)) -> (sext (truncate (sextload x)))
   6079   // Only generate vector extloads when 1) they're legal, and 2) they are
   6080   // deemed desirable by the target.
   6081   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6082       ((!LegalOperations && !VT.isVector() &&
   6083         !cast<LoadSDNode>(N0)->isVolatile()) ||
   6084        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()))) {
   6085     bool DoXform = true;
   6086     SmallVector<SDNode*, 4> SetCCs;
   6087     if (!N0.hasOneUse())
   6088       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI);
   6089     if (VT.isVector())
   6090       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
   6091     if (DoXform) {
   6092       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6093       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6094                                        LN0->getChain(),
   6095                                        LN0->getBasePtr(), N0.getValueType(),
   6096                                        LN0->getMemOperand());
   6097       CombineTo(N, ExtLoad);
   6098       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6099                                   N0.getValueType(), ExtLoad);
   6100       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6101       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6102                       ISD::SIGN_EXTEND);
   6103       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6104     }
   6105   }
   6106 
   6107   // fold (sext (load x)) to multiple smaller sextloads.
   6108   // Only on illegal but splittable vectors.
   6109   if (SDValue ExtLoad = CombineExtLoad(N))
   6110     return ExtLoad;
   6111 
   6112   // fold (sext (sextload x)) -> (sext (truncate (sextload x)))
   6113   // fold (sext ( extload x)) -> (sext (truncate (sextload x)))
   6114   if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   6115       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   6116     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6117     EVT MemVT = LN0->getMemoryVT();
   6118     if ((!LegalOperations && !LN0->isVolatile()) ||
   6119         TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, MemVT)) {
   6120       SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   6121                                        LN0->getChain(),
   6122                                        LN0->getBasePtr(), MemVT,
   6123                                        LN0->getMemOperand());
   6124       CombineTo(N, ExtLoad);
   6125       CombineTo(N0.getNode(),
   6126                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6127                             N0.getValueType(), ExtLoad),
   6128                 ExtLoad.getValue(1));
   6129       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6130     }
   6131   }
   6132 
   6133   // fold (sext (and/or/xor (load x), cst)) ->
   6134   //      (and/or/xor (sextload x), (sext cst))
   6135   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   6136        N0.getOpcode() == ISD::XOR) &&
   6137       isa<LoadSDNode>(N0.getOperand(0)) &&
   6138       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6139       TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, N0.getValueType()) &&
   6140       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   6141     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   6142     if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) {
   6143       bool DoXform = true;
   6144       SmallVector<SDNode*, 4> SetCCs;
   6145       if (!N0.hasOneUse())
   6146         DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND,
   6147                                           SetCCs, TLI);
   6148       if (DoXform) {
   6149         SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT,
   6150                                          LN0->getChain(), LN0->getBasePtr(),
   6151                                          LN0->getMemoryVT(),
   6152                                          LN0->getMemOperand());
   6153         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6154         Mask = Mask.sext(VT.getSizeInBits());
   6155         SDLoc DL(N);
   6156         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   6157                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   6158         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   6159                                     SDLoc(N0.getOperand(0)),
   6160                                     N0.getOperand(0).getValueType(), ExtLoad);
   6161         CombineTo(N, And);
   6162         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   6163         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
   6164                         ISD::SIGN_EXTEND);
   6165         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6166       }
   6167     }
   6168   }
   6169 
   6170   if (N0.getOpcode() == ISD::SETCC) {
   6171     EVT N0VT = N0.getOperand(0).getValueType();
   6172     // sext(setcc) -> sext_in_reg(vsetcc) for vectors.
   6173     // Only do this before legalize for now.
   6174     if (VT.isVector() && !LegalOperations &&
   6175         TLI.getBooleanContents(N0VT) ==
   6176             TargetLowering::ZeroOrNegativeOneBooleanContent) {
   6177       // On some architectures (such as SSE/NEON/etc) the SETCC result type is
   6178       // of the same size as the compared operands. Only optimize sext(setcc())
   6179       // if this is the case.
   6180       EVT SVT = getSetCCResultType(N0VT);
   6181 
   6182       // We know that the # elements of the results is the same as the
   6183       // # elements of the compare (and the # elements of the compare result
   6184       // for that matter).  Check to see that they are the same size.  If so,
   6185       // we know that the element size of the sext'd result matches the
   6186       // element size of the compare operands.
   6187       if (VT.getSizeInBits() == SVT.getSizeInBits())
   6188         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   6189                              N0.getOperand(1),
   6190                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6191 
   6192       // If the desired elements are smaller or larger than the source
   6193       // elements we can use a matching integer vector type and then
   6194       // truncate/sign extend
   6195       EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   6196       if (SVT == MatchingVectorType) {
   6197         SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType,
   6198                                N0.getOperand(0), N0.getOperand(1),
   6199                                cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6200         return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT);
   6201       }
   6202     }
   6203 
   6204     // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0)
   6205     unsigned ElementWidth = VT.getScalarType().getSizeInBits();
   6206     SDLoc DL(N);
   6207     SDValue NegOne =
   6208       DAG.getConstant(APInt::getAllOnesValue(ElementWidth), DL, VT);
   6209     if (SDValue SCC = SimplifySelectCC(
   6210             DL, N0.getOperand(0), N0.getOperand(1), NegOne,
   6211             DAG.getConstant(0, DL, VT),
   6212             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
   6213       return SCC;
   6214 
   6215     if (!VT.isVector()) {
   6216       EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType());
   6217       if (!LegalOperations ||
   6218           TLI.isOperationLegal(ISD::SETCC, N0.getOperand(0).getValueType())) {
   6219         SDLoc DL(N);
   6220         ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
   6221         SDValue SetCC = DAG.getSetCC(DL, SetCCVT,
   6222                                      N0.getOperand(0), N0.getOperand(1), CC);
   6223         return DAG.getSelect(DL, VT, SetCC,
   6224                              NegOne, DAG.getConstant(0, DL, VT));
   6225       }
   6226     }
   6227   }
   6228 
   6229   // fold (sext x) -> (zext x) if the sign bit is known zero.
   6230   if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) &&
   6231       DAG.SignBitIsZero(N0))
   6232     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0);
   6233 
   6234   return SDValue();
   6235 }
   6236 
   6237 // isTruncateOf - If N is a truncate of some other value, return true, record
   6238 // the value being truncated in Op and which of Op's bits are zero in KnownZero.
   6239 // This function computes KnownZero to avoid a duplicated call to
   6240 // computeKnownBits in the caller.
   6241 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op,
   6242                          APInt &KnownZero) {
   6243   APInt KnownOne;
   6244   if (N->getOpcode() == ISD::TRUNCATE) {
   6245     Op = N->getOperand(0);
   6246     DAG.computeKnownBits(Op, KnownZero, KnownOne);
   6247     return true;
   6248   }
   6249 
   6250   if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 ||
   6251       cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE)
   6252     return false;
   6253 
   6254   SDValue Op0 = N->getOperand(0);
   6255   SDValue Op1 = N->getOperand(1);
   6256   assert(Op0.getValueType() == Op1.getValueType());
   6257 
   6258   if (isNullConstant(Op0))
   6259     Op = Op1;
   6260   else if (isNullConstant(Op1))
   6261     Op = Op0;
   6262   else
   6263     return false;
   6264 
   6265   DAG.computeKnownBits(Op, KnownZero, KnownOne);
   6266 
   6267   if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue())
   6268     return false;
   6269 
   6270   return true;
   6271 }
   6272 
   6273 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
   6274   SDValue N0 = N->getOperand(0);
   6275   EVT VT = N->getValueType(0);
   6276 
   6277   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   6278                                               LegalOperations))
   6279     return SDValue(Res, 0);
   6280 
   6281   // fold (zext (zext x)) -> (zext x)
   6282   // fold (zext (aext x)) -> (zext x)
   6283   if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND)
   6284     return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT,
   6285                        N0.getOperand(0));
   6286 
   6287   // fold (zext (truncate x)) -> (zext x) or
   6288   //      (zext (truncate x)) -> (truncate x)
   6289   // This is valid when the truncated bits of x are already zero.
   6290   // FIXME: We should extend this to work for vectors too.
   6291   SDValue Op;
   6292   APInt KnownZero;
   6293   if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) {
   6294     APInt TruncatedBits =
   6295       (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ?
   6296       APInt(Op.getValueSizeInBits(), 0) :
   6297       APInt::getBitsSet(Op.getValueSizeInBits(),
   6298                         N0.getValueSizeInBits(),
   6299                         std::min(Op.getValueSizeInBits(),
   6300                                  VT.getSizeInBits()));
   6301     if (TruncatedBits == (KnownZero & TruncatedBits)) {
   6302       if (VT.bitsGT(Op.getValueType()))
   6303         return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op);
   6304       if (VT.bitsLT(Op.getValueType()))
   6305         return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6306 
   6307       return Op;
   6308     }
   6309   }
   6310 
   6311   // fold (zext (truncate (load x))) -> (zext (smaller load x))
   6312   // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n)))
   6313   if (N0.getOpcode() == ISD::TRUNCATE) {
   6314     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6315       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   6316       if (NarrowLoad.getNode() != N0.getNode()) {
   6317         CombineTo(N0.getNode(), NarrowLoad);
   6318         // CombineTo deleted the truncate, if needed, but not what's under it.
   6319         AddToWorklist(oye);
   6320       }
   6321       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6322     }
   6323   }
   6324 
   6325   // fold (zext (truncate x)) -> (and x, mask)
   6326   if (N0.getOpcode() == ISD::TRUNCATE) {
   6327     // fold (zext (truncate (load x))) -> (zext (smaller load x))
   6328     // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n)))
   6329     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6330       SDNode *oye = N0.getNode()->getOperand(0).getNode();
   6331       if (NarrowLoad.getNode() != N0.getNode()) {
   6332         CombineTo(N0.getNode(), NarrowLoad);
   6333         // CombineTo deleted the truncate, if needed, but not what's under it.
   6334         AddToWorklist(oye);
   6335       }
   6336       return SDValue(N, 0); // Return N so it doesn't get rechecked!
   6337     }
   6338 
   6339     EVT SrcVT = N0.getOperand(0).getValueType();
   6340     EVT MinVT = N0.getValueType();
   6341 
   6342     // Try to mask before the extension to avoid having to generate a larger mask,
   6343     // possibly over several sub-vectors.
   6344     if (SrcVT.bitsLT(VT)) {
   6345       if (!LegalOperations || (TLI.isOperationLegal(ISD::AND, SrcVT) &&
   6346                                TLI.isOperationLegal(ISD::ZERO_EXTEND, VT))) {
   6347         SDValue Op = N0.getOperand(0);
   6348         Op = DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   6349         AddToWorklist(Op.getNode());
   6350         return DAG.getZExtOrTrunc(Op, SDLoc(N), VT);
   6351       }
   6352     }
   6353 
   6354     if (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT)) {
   6355       SDValue Op = N0.getOperand(0);
   6356       if (SrcVT.bitsLT(VT)) {
   6357         Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op);
   6358         AddToWorklist(Op.getNode());
   6359       } else if (SrcVT.bitsGT(VT)) {
   6360         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op);
   6361         AddToWorklist(Op.getNode());
   6362       }
   6363       return DAG.getZeroExtendInReg(Op, SDLoc(N), MinVT.getScalarType());
   6364     }
   6365   }
   6366 
   6367   // Fold (zext (and (trunc x), cst)) -> (and x, cst),
   6368   // if either of the casts is not free.
   6369   if (N0.getOpcode() == ISD::AND &&
   6370       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   6371       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6372       (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   6373                            N0.getValueType()) ||
   6374        !TLI.isZExtFree(N0.getValueType(), VT))) {
   6375     SDValue X = N0.getOperand(0).getOperand(0);
   6376     if (X.getValueType().bitsLT(VT)) {
   6377       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X);
   6378     } else if (X.getValueType().bitsGT(VT)) {
   6379       X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   6380     }
   6381     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6382     Mask = Mask.zext(VT.getSizeInBits());
   6383     SDLoc DL(N);
   6384     return DAG.getNode(ISD::AND, DL, VT,
   6385                        X, DAG.getConstant(Mask, DL, VT));
   6386   }
   6387 
   6388   // fold (zext (load x)) -> (zext (truncate (zextload x)))
   6389   // Only generate vector extloads when 1) they're legal, and 2) they are
   6390   // deemed desirable by the target.
   6391   if (ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6392       ((!LegalOperations && !VT.isVector() &&
   6393         !cast<LoadSDNode>(N0)->isVolatile()) ||
   6394        TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()))) {
   6395     bool DoXform = true;
   6396     SmallVector<SDNode*, 4> SetCCs;
   6397     if (!N0.hasOneUse())
   6398       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI);
   6399     if (VT.isVector())
   6400       DoXform &= TLI.isVectorLoadExtDesirable(SDValue(N, 0));
   6401     if (DoXform) {
   6402       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6403       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   6404                                        LN0->getChain(),
   6405                                        LN0->getBasePtr(), N0.getValueType(),
   6406                                        LN0->getMemOperand());
   6407       CombineTo(N, ExtLoad);
   6408       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6409                                   N0.getValueType(), ExtLoad);
   6410       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6411 
   6412       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6413                       ISD::ZERO_EXTEND);
   6414       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6415     }
   6416   }
   6417 
   6418   // fold (zext (load x)) to multiple smaller zextloads.
   6419   // Only on illegal but splittable vectors.
   6420   if (SDValue ExtLoad = CombineExtLoad(N))
   6421     return ExtLoad;
   6422 
   6423   // fold (zext (and/or/xor (load x), cst)) ->
   6424   //      (and/or/xor (zextload x), (zext cst))
   6425   // Unless (and (load x) cst) will match as a zextload already and has
   6426   // additional users.
   6427   if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR ||
   6428        N0.getOpcode() == ISD::XOR) &&
   6429       isa<LoadSDNode>(N0.getOperand(0)) &&
   6430       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6431       TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, N0.getValueType()) &&
   6432       (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) {
   6433     LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0));
   6434     if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) {
   6435       bool DoXform = true;
   6436       SmallVector<SDNode*, 4> SetCCs;
   6437       if (!N0.hasOneUse()) {
   6438         if (N0.getOpcode() == ISD::AND) {
   6439           auto *AndC = cast<ConstantSDNode>(N0.getOperand(1));
   6440           auto NarrowLoad = false;
   6441           EVT LoadResultTy = AndC->getValueType(0);
   6442           EVT ExtVT, LoadedVT;
   6443           if (isAndLoadExtLoad(AndC, LN0, LoadResultTy, ExtVT, LoadedVT,
   6444                                NarrowLoad))
   6445             DoXform = false;
   6446         }
   6447         if (DoXform)
   6448           DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0),
   6449                                             ISD::ZERO_EXTEND, SetCCs, TLI);
   6450       }
   6451       if (DoXform) {
   6452         SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT,
   6453                                          LN0->getChain(), LN0->getBasePtr(),
   6454                                          LN0->getMemoryVT(),
   6455                                          LN0->getMemOperand());
   6456         APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6457         Mask = Mask.zext(VT.getSizeInBits());
   6458         SDLoc DL(N);
   6459         SDValue And = DAG.getNode(N0.getOpcode(), DL, VT,
   6460                                   ExtLoad, DAG.getConstant(Mask, DL, VT));
   6461         SDValue Trunc = DAG.getNode(ISD::TRUNCATE,
   6462                                     SDLoc(N0.getOperand(0)),
   6463                                     N0.getOperand(0).getValueType(), ExtLoad);
   6464         CombineTo(N, And);
   6465         CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1));
   6466         ExtendSetCCUses(SetCCs, Trunc, ExtLoad, DL,
   6467                         ISD::ZERO_EXTEND);
   6468         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6469       }
   6470     }
   6471   }
   6472 
   6473   // fold (zext (zextload x)) -> (zext (truncate (zextload x)))
   6474   // fold (zext ( extload x)) -> (zext (truncate (zextload x)))
   6475   if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) &&
   6476       ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) {
   6477     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6478     EVT MemVT = LN0->getMemoryVT();
   6479     if ((!LegalOperations && !LN0->isVolatile()) ||
   6480         TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT)) {
   6481       SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT,
   6482                                        LN0->getChain(),
   6483                                        LN0->getBasePtr(), MemVT,
   6484                                        LN0->getMemOperand());
   6485       CombineTo(N, ExtLoad);
   6486       CombineTo(N0.getNode(),
   6487                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(),
   6488                             ExtLoad),
   6489                 ExtLoad.getValue(1));
   6490       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6491     }
   6492   }
   6493 
   6494   if (N0.getOpcode() == ISD::SETCC) {
   6495     // Only do this before legalize for now.
   6496     if (!LegalOperations && VT.isVector() &&
   6497         N0.getValueType().getVectorElementType() == MVT::i1) {
   6498       EVT N00VT = N0.getOperand(0).getValueType();
   6499       if (getSetCCResultType(N00VT) == N0.getValueType())
   6500         return SDValue();
   6501 
   6502       // We know that the # elements of the results is the same as the #
   6503       // elements of the compare (and the # elements of the compare result for
   6504       // that matter). Check to see that they are the same size. If so, we know
   6505       // that the element size of the sext'd result matches the element size of
   6506       // the compare operands.
   6507       SDLoc DL(N);
   6508       SDValue VecOnes = DAG.getConstant(1, DL, VT);
   6509       if (VT.getSizeInBits() == N00VT.getSizeInBits()) {
   6510         // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors.
   6511         SDValue VSetCC = DAG.getNode(ISD::SETCC, DL, VT, N0.getOperand(0),
   6512                                      N0.getOperand(1), N0.getOperand(2));
   6513         return DAG.getNode(ISD::AND, DL, VT, VSetCC, VecOnes);
   6514       }
   6515 
   6516       // If the desired elements are smaller or larger than the source
   6517       // elements we can use a matching integer vector type and then
   6518       // truncate/sign extend.
   6519       EVT MatchingElementType = EVT::getIntegerVT(
   6520           *DAG.getContext(), N00VT.getScalarType().getSizeInBits());
   6521       EVT MatchingVectorType = EVT::getVectorVT(
   6522           *DAG.getContext(), MatchingElementType, N00VT.getVectorNumElements());
   6523       SDValue VsetCC =
   6524           DAG.getNode(ISD::SETCC, DL, MatchingVectorType, N0.getOperand(0),
   6525                       N0.getOperand(1), N0.getOperand(2));
   6526       return DAG.getNode(ISD::AND, DL, VT, DAG.getSExtOrTrunc(VsetCC, DL, VT),
   6527                          VecOnes);
   6528     }
   6529 
   6530     // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   6531     SDLoc DL(N);
   6532     if (SDValue SCC = SimplifySelectCC(
   6533             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
   6534             DAG.getConstant(0, DL, VT),
   6535             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
   6536       return SCC;
   6537   }
   6538 
   6539   // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
   6540   if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
   6541       isa<ConstantSDNode>(N0.getOperand(1)) &&
   6542       N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
   6543       N0.hasOneUse()) {
   6544     SDValue ShAmt = N0.getOperand(1);
   6545     unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue();
   6546     if (N0.getOpcode() == ISD::SHL) {
   6547       SDValue InnerZExt = N0.getOperand(0);
   6548       // If the original shl may be shifting out bits, do not perform this
   6549       // transformation.
   6550       unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() -
   6551         InnerZExt.getOperand(0).getValueType().getSizeInBits();
   6552       if (ShAmtVal > KnownZeroBits)
   6553         return SDValue();
   6554     }
   6555 
   6556     SDLoc DL(N);
   6557 
   6558     // Ensure that the shift amount is wide enough for the shifted value.
   6559     if (VT.getSizeInBits() >= 256)
   6560       ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt);
   6561 
   6562     return DAG.getNode(N0.getOpcode(), DL, VT,
   6563                        DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)),
   6564                        ShAmt);
   6565   }
   6566 
   6567   return SDValue();
   6568 }
   6569 
   6570 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) {
   6571   SDValue N0 = N->getOperand(0);
   6572   EVT VT = N->getValueType(0);
   6573 
   6574   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   6575                                               LegalOperations))
   6576     return SDValue(Res, 0);
   6577 
   6578   // fold (aext (aext x)) -> (aext x)
   6579   // fold (aext (zext x)) -> (zext x)
   6580   // fold (aext (sext x)) -> (sext x)
   6581   if (N0.getOpcode() == ISD::ANY_EXTEND  ||
   6582       N0.getOpcode() == ISD::ZERO_EXTEND ||
   6583       N0.getOpcode() == ISD::SIGN_EXTEND)
   6584     return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   6585 
   6586   // fold (aext (truncate (load x))) -> (aext (smaller load x))
   6587   // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n)))
   6588   if (N0.getOpcode() == ISD::TRUNCATE) {
   6589     if (SDValue NarrowLoad = ReduceLoadWidth(N0.getNode())) {
   6590       SDNode* oye = N0.getNode()->getOperand(0).getNode();
   6591       if (NarrowLoad.getNode() != N0.getNode()) {
   6592         CombineTo(N0.getNode(), NarrowLoad);
   6593         // CombineTo deleted the truncate, if needed, but not what's under it.
   6594         AddToWorklist(oye);
   6595       }
   6596       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6597     }
   6598   }
   6599 
   6600   // fold (aext (truncate x))
   6601   if (N0.getOpcode() == ISD::TRUNCATE) {
   6602     SDValue TruncOp = N0.getOperand(0);
   6603     if (TruncOp.getValueType() == VT)
   6604       return TruncOp; // x iff x size == zext size.
   6605     if (TruncOp.getValueType().bitsGT(VT))
   6606       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp);
   6607     return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp);
   6608   }
   6609 
   6610   // Fold (aext (and (trunc x), cst)) -> (and x, cst)
   6611   // if the trunc is not free.
   6612   if (N0.getOpcode() == ISD::AND &&
   6613       N0.getOperand(0).getOpcode() == ISD::TRUNCATE &&
   6614       N0.getOperand(1).getOpcode() == ISD::Constant &&
   6615       !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(),
   6616                           N0.getValueType())) {
   6617     SDValue X = N0.getOperand(0).getOperand(0);
   6618     if (X.getValueType().bitsLT(VT)) {
   6619       X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X);
   6620     } else if (X.getValueType().bitsGT(VT)) {
   6621       X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X);
   6622     }
   6623     APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
   6624     Mask = Mask.zext(VT.getSizeInBits());
   6625     SDLoc DL(N);
   6626     return DAG.getNode(ISD::AND, DL, VT,
   6627                        X, DAG.getConstant(Mask, DL, VT));
   6628   }
   6629 
   6630   // fold (aext (load x)) -> (aext (truncate (extload x)))
   6631   // None of the supported targets knows how to perform load and any_ext
   6632   // on vectors in one instruction.  We only perform this transformation on
   6633   // scalars.
   6634   if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() &&
   6635       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6636       TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   6637     bool DoXform = true;
   6638     SmallVector<SDNode*, 4> SetCCs;
   6639     if (!N0.hasOneUse())
   6640       DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI);
   6641     if (DoXform) {
   6642       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6643       SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   6644                                        LN0->getChain(),
   6645                                        LN0->getBasePtr(), N0.getValueType(),
   6646                                        LN0->getMemOperand());
   6647       CombineTo(N, ExtLoad);
   6648       SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6649                                   N0.getValueType(), ExtLoad);
   6650       CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1));
   6651       ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N),
   6652                       ISD::ANY_EXTEND);
   6653       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6654     }
   6655   }
   6656 
   6657   // fold (aext (zextload x)) -> (aext (truncate (zextload x)))
   6658   // fold (aext (sextload x)) -> (aext (truncate (sextload x)))
   6659   // fold (aext ( extload x)) -> (aext (truncate (extload  x)))
   6660   if (N0.getOpcode() == ISD::LOAD &&
   6661       !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   6662       N0.hasOneUse()) {
   6663     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6664     ISD::LoadExtType ExtType = LN0->getExtensionType();
   6665     EVT MemVT = LN0->getMemoryVT();
   6666     if (!LegalOperations || TLI.isLoadExtLegal(ExtType, VT, MemVT)) {
   6667       SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N),
   6668                                        VT, LN0->getChain(), LN0->getBasePtr(),
   6669                                        MemVT, LN0->getMemOperand());
   6670       CombineTo(N, ExtLoad);
   6671       CombineTo(N0.getNode(),
   6672                 DAG.getNode(ISD::TRUNCATE, SDLoc(N0),
   6673                             N0.getValueType(), ExtLoad),
   6674                 ExtLoad.getValue(1));
   6675       return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   6676     }
   6677   }
   6678 
   6679   if (N0.getOpcode() == ISD::SETCC) {
   6680     // For vectors:
   6681     // aext(setcc) -> vsetcc
   6682     // aext(setcc) -> truncate(vsetcc)
   6683     // aext(setcc) -> aext(vsetcc)
   6684     // Only do this before legalize for now.
   6685     if (VT.isVector() && !LegalOperations) {
   6686       EVT N0VT = N0.getOperand(0).getValueType();
   6687         // We know that the # elements of the results is the same as the
   6688         // # elements of the compare (and the # elements of the compare result
   6689         // for that matter).  Check to see that they are the same size.  If so,
   6690         // we know that the element size of the sext'd result matches the
   6691         // element size of the compare operands.
   6692       if (VT.getSizeInBits() == N0VT.getSizeInBits())
   6693         return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0),
   6694                              N0.getOperand(1),
   6695                              cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6696       // If the desired elements are smaller or larger than the source
   6697       // elements we can use a matching integer vector type and then
   6698       // truncate/any extend
   6699       else {
   6700         EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger();
   6701         SDValue VsetCC =
   6702           DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0),
   6703                         N0.getOperand(1),
   6704                         cast<CondCodeSDNode>(N0.getOperand(2))->get());
   6705         return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT);
   6706       }
   6707     }
   6708 
   6709     // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc
   6710     SDLoc DL(N);
   6711     if (SDValue SCC = SimplifySelectCC(
   6712             DL, N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, DL, VT),
   6713             DAG.getConstant(0, DL, VT),
   6714             cast<CondCodeSDNode>(N0.getOperand(2))->get(), true))
   6715       return SCC;
   6716   }
   6717 
   6718   return SDValue();
   6719 }
   6720 
   6721 /// See if the specified operand can be simplified with the knowledge that only
   6722 /// the bits specified by Mask are used.  If so, return the simpler operand,
   6723 /// otherwise return a null SDValue.
   6724 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) {
   6725   switch (V.getOpcode()) {
   6726   default: break;
   6727   case ISD::Constant: {
   6728     const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode());
   6729     assert(CV && "Const value should be ConstSDNode.");
   6730     const APInt &CVal = CV->getAPIntValue();
   6731     APInt NewVal = CVal & Mask;
   6732     if (NewVal != CVal)
   6733       return DAG.getConstant(NewVal, SDLoc(V), V.getValueType());
   6734     break;
   6735   }
   6736   case ISD::OR:
   6737   case ISD::XOR:
   6738     // If the LHS or RHS don't contribute bits to the or, drop them.
   6739     if (DAG.MaskedValueIsZero(V.getOperand(0), Mask))
   6740       return V.getOperand(1);
   6741     if (DAG.MaskedValueIsZero(V.getOperand(1), Mask))
   6742       return V.getOperand(0);
   6743     break;
   6744   case ISD::SRL:
   6745     // Only look at single-use SRLs.
   6746     if (!V.getNode()->hasOneUse())
   6747       break;
   6748     if (ConstantSDNode *RHSC = getAsNonOpaqueConstant(V.getOperand(1))) {
   6749       // See if we can recursively simplify the LHS.
   6750       unsigned Amt = RHSC->getZExtValue();
   6751 
   6752       // Watch out for shift count overflow though.
   6753       if (Amt >= Mask.getBitWidth()) break;
   6754       APInt NewMask = Mask << Amt;
   6755       if (SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask))
   6756         return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(),
   6757                            SimplifyLHS, V.getOperand(1));
   6758     }
   6759   }
   6760   return SDValue();
   6761 }
   6762 
   6763 /// If the result of a wider load is shifted to right of N  bits and then
   6764 /// truncated to a narrower type and where N is a multiple of number of bits of
   6765 /// the narrower type, transform it to a narrower load from address + N / num of
   6766 /// bits of new type. If the result is to be extended, also fold the extension
   6767 /// to form a extending load.
   6768 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) {
   6769   unsigned Opc = N->getOpcode();
   6770 
   6771   ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
   6772   SDValue N0 = N->getOperand(0);
   6773   EVT VT = N->getValueType(0);
   6774   EVT ExtVT = VT;
   6775 
   6776   // This transformation isn't valid for vector loads.
   6777   if (VT.isVector())
   6778     return SDValue();
   6779 
   6780   // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then
   6781   // extended to VT.
   6782   if (Opc == ISD::SIGN_EXTEND_INREG) {
   6783     ExtType = ISD::SEXTLOAD;
   6784     ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   6785   } else if (Opc == ISD::SRL) {
   6786     // Another special-case: SRL is basically zero-extending a narrower value.
   6787     ExtType = ISD::ZEXTLOAD;
   6788     N0 = SDValue(N, 0);
   6789     ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   6790     if (!N01) return SDValue();
   6791     ExtVT = EVT::getIntegerVT(*DAG.getContext(),
   6792                               VT.getSizeInBits() - N01->getZExtValue());
   6793   }
   6794   if (LegalOperations && !TLI.isLoadExtLegal(ExtType, VT, ExtVT))
   6795     return SDValue();
   6796 
   6797   unsigned EVTBits = ExtVT.getSizeInBits();
   6798 
   6799   // Do not generate loads of non-round integer types since these can
   6800   // be expensive (and would be wrong if the type is not byte sized).
   6801   if (!ExtVT.isRound())
   6802     return SDValue();
   6803 
   6804   unsigned ShAmt = 0;
   6805   if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) {
   6806     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   6807       ShAmt = N01->getZExtValue();
   6808       // Is the shift amount a multiple of size of VT?
   6809       if ((ShAmt & (EVTBits-1)) == 0) {
   6810         N0 = N0.getOperand(0);
   6811         // Is the load width a multiple of size of VT?
   6812         if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0)
   6813           return SDValue();
   6814       }
   6815 
   6816       // At this point, we must have a load or else we can't do the transform.
   6817       if (!isa<LoadSDNode>(N0)) return SDValue();
   6818 
   6819       // Because a SRL must be assumed to *need* to zero-extend the high bits
   6820       // (as opposed to anyext the high bits), we can't combine the zextload
   6821       // lowering of SRL and an sextload.
   6822       if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD)
   6823         return SDValue();
   6824 
   6825       // If the shift amount is larger than the input type then we're not
   6826       // accessing any of the loaded bytes.  If the load was a zextload/extload
   6827       // then the result of the shift+trunc is zero/undef (handled elsewhere).
   6828       if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits())
   6829         return SDValue();
   6830     }
   6831   }
   6832 
   6833   // If the load is shifted left (and the result isn't shifted back right),
   6834   // we can fold the truncate through the shift.
   6835   unsigned ShLeftAmt = 0;
   6836   if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   6837       ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) {
   6838     if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
   6839       ShLeftAmt = N01->getZExtValue();
   6840       N0 = N0.getOperand(0);
   6841     }
   6842   }
   6843 
   6844   // If we haven't found a load, we can't narrow it.  Don't transform one with
   6845   // multiple uses, this would require adding a new load.
   6846   if (!isa<LoadSDNode>(N0) || !N0.hasOneUse())
   6847     return SDValue();
   6848 
   6849   // Don't change the width of a volatile load.
   6850   LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   6851   if (LN0->isVolatile())
   6852     return SDValue();
   6853 
   6854   // Verify that we are actually reducing a load width here.
   6855   if (LN0->getMemoryVT().getSizeInBits() < EVTBits)
   6856     return SDValue();
   6857 
   6858   // For the transform to be legal, the load must produce only two values
   6859   // (the value loaded and the chain).  Don't transform a pre-increment
   6860   // load, for example, which produces an extra value.  Otherwise the
   6861   // transformation is not equivalent, and the downstream logic to replace
   6862   // uses gets things wrong.
   6863   if (LN0->getNumValues() > 2)
   6864     return SDValue();
   6865 
   6866   // If the load that we're shrinking is an extload and we're not just
   6867   // discarding the extension we can't simply shrink the load. Bail.
   6868   // TODO: It would be possible to merge the extensions in some cases.
   6869   if (LN0->getExtensionType() != ISD::NON_EXTLOAD &&
   6870       LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt)
   6871     return SDValue();
   6872 
   6873   if (!TLI.shouldReduceLoadWidth(LN0, ExtType, ExtVT))
   6874     return SDValue();
   6875 
   6876   EVT PtrType = N0.getOperand(1).getValueType();
   6877 
   6878   if (PtrType == MVT::Untyped || PtrType.isExtended())
   6879     // It's not possible to generate a constant of extended or untyped type.
   6880     return SDValue();
   6881 
   6882   // For big endian targets, we need to adjust the offset to the pointer to
   6883   // load the correct bytes.
   6884   if (DAG.getDataLayout().isBigEndian()) {
   6885     unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits();
   6886     unsigned EVTStoreBits = ExtVT.getStoreSizeInBits();
   6887     ShAmt = LVTStoreBits - EVTStoreBits - ShAmt;
   6888   }
   6889 
   6890   uint64_t PtrOff = ShAmt / 8;
   6891   unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff);
   6892   SDLoc DL(LN0);
   6893   // The original load itself didn't wrap, so an offset within it doesn't.
   6894   SDNodeFlags Flags;
   6895   Flags.setNoUnsignedWrap(true);
   6896   SDValue NewPtr = DAG.getNode(ISD::ADD, DL,
   6897                                PtrType, LN0->getBasePtr(),
   6898                                DAG.getConstant(PtrOff, DL, PtrType),
   6899                                &Flags);
   6900   AddToWorklist(NewPtr.getNode());
   6901 
   6902   SDValue Load;
   6903   if (ExtType == ISD::NON_EXTLOAD)
   6904     Load =  DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr,
   6905                         LN0->getPointerInfo().getWithOffset(PtrOff),
   6906                         LN0->isVolatile(), LN0->isNonTemporal(),
   6907                         LN0->isInvariant(), NewAlign, LN0->getAAInfo());
   6908   else
   6909     Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr,
   6910                           LN0->getPointerInfo().getWithOffset(PtrOff),
   6911                           ExtVT, LN0->isVolatile(), LN0->isNonTemporal(),
   6912                           LN0->isInvariant(), NewAlign, LN0->getAAInfo());
   6913 
   6914   // Replace the old load's chain with the new load's chain.
   6915   WorklistRemover DeadNodes(*this);
   6916   DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   6917 
   6918   // Shift the result left, if we've swallowed a left shift.
   6919   SDValue Result = Load;
   6920   if (ShLeftAmt != 0) {
   6921     EVT ShImmTy = getShiftAmountTy(Result.getValueType());
   6922     if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt))
   6923       ShImmTy = VT;
   6924     // If the shift amount is as large as the result size (but, presumably,
   6925     // no larger than the source) then the useful bits of the result are
   6926     // zero; we can't simply return the shortened shift, because the result
   6927     // of that operation is undefined.
   6928     SDLoc DL(N0);
   6929     if (ShLeftAmt >= VT.getSizeInBits())
   6930       Result = DAG.getConstant(0, DL, VT);
   6931     else
   6932       Result = DAG.getNode(ISD::SHL, DL, VT,
   6933                           Result, DAG.getConstant(ShLeftAmt, DL, ShImmTy));
   6934   }
   6935 
   6936   // Return the new loaded value.
   6937   return Result;
   6938 }
   6939 
   6940 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
   6941   SDValue N0 = N->getOperand(0);
   6942   SDValue N1 = N->getOperand(1);
   6943   EVT VT = N->getValueType(0);
   6944   EVT EVT = cast<VTSDNode>(N1)->getVT();
   6945   unsigned VTBits = VT.getScalarType().getSizeInBits();
   6946   unsigned EVTBits = EVT.getScalarType().getSizeInBits();
   6947 
   6948   if (N0.isUndef())
   6949     return DAG.getUNDEF(VT);
   6950 
   6951   // fold (sext_in_reg c1) -> c1
   6952   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   6953     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1);
   6954 
   6955   // If the input is already sign extended, just drop the extension.
   6956   if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
   6957     return N0;
   6958 
   6959   // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
   6960   if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
   6961       EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT()))
   6962     return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   6963                        N0.getOperand(0), N1);
   6964 
   6965   // fold (sext_in_reg (sext x)) -> (sext x)
   6966   // fold (sext_in_reg (aext x)) -> (sext x)
   6967   // if x is small enough.
   6968   if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
   6969     SDValue N00 = N0.getOperand(0);
   6970     if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits &&
   6971         (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT)))
   6972       return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1);
   6973   }
   6974 
   6975   // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero.
   6976   if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits)))
   6977     return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT);
   6978 
   6979   // fold operands of sext_in_reg based on knowledge that the top bits are not
   6980   // demanded.
   6981   if (SimplifyDemandedBits(SDValue(N, 0)))
   6982     return SDValue(N, 0);
   6983 
   6984   // fold (sext_in_reg (load x)) -> (smaller sextload x)
   6985   // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits))
   6986   if (SDValue NarrowLoad = ReduceLoadWidth(N))
   6987     return NarrowLoad;
   6988 
   6989   // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24)
   6990   // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible.
   6991   // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
   6992   if (N0.getOpcode() == ISD::SRL) {
   6993     if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
   6994       if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
   6995         // We can turn this into an SRA iff the input to the SRL is already sign
   6996         // extended enough.
   6997         unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
   6998         if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
   6999           return DAG.getNode(ISD::SRA, SDLoc(N), VT,
   7000                              N0.getOperand(0), N0.getOperand(1));
   7001       }
   7002   }
   7003 
   7004   // fold (sext_inreg (extload x)) -> (sextload x)
   7005   if (ISD::isEXTLoad(N0.getNode()) &&
   7006       ISD::isUNINDEXEDLoad(N0.getNode()) &&
   7007       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   7008       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   7009        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   7010     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7011     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   7012                                      LN0->getChain(),
   7013                                      LN0->getBasePtr(), EVT,
   7014                                      LN0->getMemOperand());
   7015     CombineTo(N, ExtLoad);
   7016     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   7017     AddToWorklist(ExtLoad.getNode());
   7018     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7019   }
   7020   // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use
   7021   if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) &&
   7022       N0.hasOneUse() &&
   7023       EVT == cast<LoadSDNode>(N0)->getMemoryVT() &&
   7024       ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
   7025        TLI.isLoadExtLegal(ISD::SEXTLOAD, VT, EVT))) {
   7026     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7027     SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT,
   7028                                      LN0->getChain(),
   7029                                      LN0->getBasePtr(), EVT,
   7030                                      LN0->getMemOperand());
   7031     CombineTo(N, ExtLoad);
   7032     CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1));
   7033     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   7034   }
   7035 
   7036   // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16))
   7037   if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) {
   7038     if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0),
   7039                                            N0.getOperand(1), false))
   7040       return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT,
   7041                          BSwap, N1);
   7042   }
   7043 
   7044   return SDValue();
   7045 }
   7046 
   7047 SDValue DAGCombiner::visitSIGN_EXTEND_VECTOR_INREG(SDNode *N) {
   7048   SDValue N0 = N->getOperand(0);
   7049   EVT VT = N->getValueType(0);
   7050 
   7051   if (N0.isUndef())
   7052     return DAG.getUNDEF(VT);
   7053 
   7054   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   7055                                               LegalOperations))
   7056     return SDValue(Res, 0);
   7057 
   7058   return SDValue();
   7059 }
   7060 
   7061 SDValue DAGCombiner::visitZERO_EXTEND_VECTOR_INREG(SDNode *N) {
   7062   SDValue N0 = N->getOperand(0);
   7063   EVT VT = N->getValueType(0);
   7064 
   7065   if (N0.isUndef())
   7066     return DAG.getUNDEF(VT);
   7067 
   7068   if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes,
   7069                                               LegalOperations))
   7070     return SDValue(Res, 0);
   7071 
   7072   return SDValue();
   7073 }
   7074 
   7075 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) {
   7076   SDValue N0 = N->getOperand(0);
   7077   EVT VT = N->getValueType(0);
   7078   bool isLE = DAG.getDataLayout().isLittleEndian();
   7079 
   7080   // noop truncate
   7081   if (N0.getValueType() == N->getValueType(0))
   7082     return N0;
   7083   // fold (truncate c1) -> c1
   7084   if (DAG.isConstantIntBuildVectorOrConstantInt(N0))
   7085     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0);
   7086   // fold (truncate (truncate x)) -> (truncate x)
   7087   if (N0.getOpcode() == ISD::TRUNCATE)
   7088     return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   7089   // fold (truncate (ext x)) -> (ext x) or (truncate x) or x
   7090   if (N0.getOpcode() == ISD::ZERO_EXTEND ||
   7091       N0.getOpcode() == ISD::SIGN_EXTEND ||
   7092       N0.getOpcode() == ISD::ANY_EXTEND) {
   7093     // if the source is smaller than the dest, we still need an extend.
   7094     if (N0.getOperand(0).getValueType().bitsLT(VT))
   7095       return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0));
   7096     // if the source is larger than the dest, than we just need the truncate.
   7097     if (N0.getOperand(0).getValueType().bitsGT(VT))
   7098       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0));
   7099     // if the source and dest are the same type, we can drop both the extend
   7100     // and the truncate.
   7101     return N0.getOperand(0);
   7102   }
   7103 
   7104   // Fold extract-and-trunc into a narrow extract. For example:
   7105   //   i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1)
   7106   //   i32 y = TRUNCATE(i64 x)
   7107   //        -- becomes --
   7108   //   v16i8 b = BITCAST (v2i64 val)
   7109   //   i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8)
   7110   //
   7111   // Note: We only run this optimization after type legalization (which often
   7112   // creates this pattern) and before operation legalization after which
   7113   // we need to be more careful about the vector instructions that we generate.
   7114   if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT &&
   7115       LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) {
   7116 
   7117     EVT VecTy = N0.getOperand(0).getValueType();
   7118     EVT ExTy = N0.getValueType();
   7119     EVT TrTy = N->getValueType(0);
   7120 
   7121     unsigned NumElem = VecTy.getVectorNumElements();
   7122     unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits();
   7123 
   7124     EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem);
   7125     assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size");
   7126 
   7127     SDValue EltNo = N0->getOperand(1);
   7128     if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) {
   7129       int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   7130       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   7131       int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1));
   7132 
   7133       SDLoc DL(N);
   7134       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, TrTy,
   7135                          DAG.getBitcast(NVT, N0.getOperand(0)),
   7136                          DAG.getConstant(Index, DL, IndexTy));
   7137     }
   7138   }
   7139 
   7140   // trunc (select c, a, b) -> select c, (trunc a), (trunc b)
   7141   if (N0.getOpcode() == ISD::SELECT) {
   7142     EVT SrcVT = N0.getValueType();
   7143     if ((!LegalOperations || TLI.isOperationLegal(ISD::SELECT, SrcVT)) &&
   7144         TLI.isTruncateFree(SrcVT, VT)) {
   7145       SDLoc SL(N0);
   7146       SDValue Cond = N0.getOperand(0);
   7147       SDValue TruncOp0 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(1));
   7148       SDValue TruncOp1 = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(2));
   7149       return DAG.getNode(ISD::SELECT, SDLoc(N), VT, Cond, TruncOp0, TruncOp1);
   7150     }
   7151   }
   7152 
   7153   // trunc (shl x, K) -> shl (trunc x), K => K < vt.size / 2
   7154   if (N0.getOpcode() == ISD::SHL && N0.hasOneUse() &&
   7155       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::SHL, VT)) &&
   7156       TLI.isTypeDesirableForOp(ISD::SHL, VT)) {
   7157     if (const ConstantSDNode *CAmt = isConstOrConstSplat(N0.getOperand(1))) {
   7158       uint64_t Amt = CAmt->getZExtValue();
   7159       unsigned Size = VT.getSizeInBits();
   7160 
   7161       if (Amt < Size / 2) {
   7162         SDLoc SL(N);
   7163         EVT AmtVT = TLI.getShiftAmountTy(VT, DAG.getDataLayout());
   7164 
   7165         SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, VT, N0.getOperand(0));
   7166         return DAG.getNode(ISD::SHL, SL, VT, Trunc,
   7167                            DAG.getConstant(Amt, SL, AmtVT));
   7168       }
   7169     }
   7170   }
   7171 
   7172   // Fold a series of buildvector, bitcast, and truncate if possible.
   7173   // For example fold
   7174   //   (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to
   7175   //   (2xi32 (buildvector x, y)).
   7176   if (Level == AfterLegalizeVectorOps && VT.isVector() &&
   7177       N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   7178       N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR &&
   7179       N0.getOperand(0).hasOneUse()) {
   7180 
   7181     SDValue BuildVect = N0.getOperand(0);
   7182     EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType();
   7183     EVT TruncVecEltTy = VT.getVectorElementType();
   7184 
   7185     // Check that the element types match.
   7186     if (BuildVectEltTy == TruncVecEltTy) {
   7187       // Now we only need to compute the offset of the truncated elements.
   7188       unsigned BuildVecNumElts =  BuildVect.getNumOperands();
   7189       unsigned TruncVecNumElts = VT.getVectorNumElements();
   7190       unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts;
   7191 
   7192       assert((BuildVecNumElts % TruncVecNumElts) == 0 &&
   7193              "Invalid number of elements");
   7194 
   7195       SmallVector<SDValue, 8> Opnds;
   7196       for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset)
   7197         Opnds.push_back(BuildVect.getOperand(i));
   7198 
   7199       return DAG.getBuildVector(VT, SDLoc(N), Opnds);
   7200     }
   7201   }
   7202 
   7203   // See if we can simplify the input to this truncate through knowledge that
   7204   // only the low bits are being used.
   7205   // For example "trunc (or (shl x, 8), y)" // -> trunc y
   7206   // Currently we only perform this optimization on scalars because vectors
   7207   // may have different active low bits.
   7208   if (!VT.isVector()) {
   7209     if (SDValue Shorter =
   7210             GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(),
   7211                                                      VT.getSizeInBits())))
   7212       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter);
   7213   }
   7214   // fold (truncate (load x)) -> (smaller load x)
   7215   // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits))
   7216   if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) {
   7217     if (SDValue Reduced = ReduceLoadWidth(N))
   7218       return Reduced;
   7219 
   7220     // Handle the case where the load remains an extending load even
   7221     // after truncation.
   7222     if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) {
   7223       LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7224       if (!LN0->isVolatile() &&
   7225           LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) {
   7226         SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0),
   7227                                          VT, LN0->getChain(), LN0->getBasePtr(),
   7228                                          LN0->getMemoryVT(),
   7229                                          LN0->getMemOperand());
   7230         DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1));
   7231         return NewLoad;
   7232       }
   7233     }
   7234   }
   7235   // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)),
   7236   // where ... are all 'undef'.
   7237   if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) {
   7238     SmallVector<EVT, 8> VTs;
   7239     SDValue V;
   7240     unsigned Idx = 0;
   7241     unsigned NumDefs = 0;
   7242 
   7243     for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) {
   7244       SDValue X = N0.getOperand(i);
   7245       if (!X.isUndef()) {
   7246         V = X;
   7247         Idx = i;
   7248         NumDefs++;
   7249       }
   7250       // Stop if more than one members are non-undef.
   7251       if (NumDefs > 1)
   7252         break;
   7253       VTs.push_back(EVT::getVectorVT(*DAG.getContext(),
   7254                                      VT.getVectorElementType(),
   7255                                      X.getValueType().getVectorNumElements()));
   7256     }
   7257 
   7258     if (NumDefs == 0)
   7259       return DAG.getUNDEF(VT);
   7260 
   7261     if (NumDefs == 1) {
   7262       assert(V.getNode() && "The single defined operand is empty!");
   7263       SmallVector<SDValue, 8> Opnds;
   7264       for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
   7265         if (i != Idx) {
   7266           Opnds.push_back(DAG.getUNDEF(VTs[i]));
   7267           continue;
   7268         }
   7269         SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V);
   7270         AddToWorklist(NV.getNode());
   7271         Opnds.push_back(NV);
   7272       }
   7273       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds);
   7274     }
   7275   }
   7276 
   7277   // Fold truncate of a bitcast of a vector to an extract of the low vector
   7278   // element.
   7279   //
   7280   // e.g. trunc (i64 (bitcast v2i32:x)) -> extract_vector_elt v2i32:x, 0
   7281   if (N0.getOpcode() == ISD::BITCAST && !VT.isVector()) {
   7282     SDValue VecSrc = N0.getOperand(0);
   7283     EVT SrcVT = VecSrc.getValueType();
   7284     if (SrcVT.isVector() && SrcVT.getScalarType() == VT &&
   7285         (!LegalOperations ||
   7286          TLI.isOperationLegal(ISD::EXTRACT_VECTOR_ELT, SrcVT))) {
   7287       SDLoc SL(N);
   7288 
   7289       EVT IdxVT = TLI.getVectorIdxTy(DAG.getDataLayout());
   7290       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SL, VT,
   7291                          VecSrc, DAG.getConstant(0, SL, IdxVT));
   7292     }
   7293   }
   7294 
   7295   // Simplify the operands using demanded-bits information.
   7296   if (!VT.isVector() &&
   7297       SimplifyDemandedBits(SDValue(N, 0)))
   7298     return SDValue(N, 0);
   7299 
   7300   return SDValue();
   7301 }
   7302 
   7303 static SDNode *getBuildPairElt(SDNode *N, unsigned i) {
   7304   SDValue Elt = N->getOperand(i);
   7305   if (Elt.getOpcode() != ISD::MERGE_VALUES)
   7306     return Elt.getNode();
   7307   return Elt.getOperand(Elt.getResNo()).getNode();
   7308 }
   7309 
   7310 /// build_pair (load, load) -> load
   7311 /// if load locations are consecutive.
   7312 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
   7313   assert(N->getOpcode() == ISD::BUILD_PAIR);
   7314 
   7315   LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0));
   7316   LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1));
   7317   if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() ||
   7318       LD1->getAddressSpace() != LD2->getAddressSpace())
   7319     return SDValue();
   7320   EVT LD1VT = LD1->getValueType(0);
   7321   unsigned LD1Bytes = LD1VT.getSizeInBits() / 8;
   7322   if (ISD::isNON_EXTLoad(LD2) && LD2->hasOneUse() &&
   7323       DAG.areNonVolatileConsecutiveLoads(LD2, LD1, LD1Bytes, 1)) {
   7324     unsigned Align = LD1->getAlignment();
   7325     unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   7326         VT.getTypeForEVT(*DAG.getContext()));
   7327 
   7328     if (NewAlign <= Align &&
   7329         (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)))
   7330       return DAG.getLoad(VT, SDLoc(N), LD1->getChain(),
   7331                          LD1->getBasePtr(), LD1->getPointerInfo(),
   7332                          false, false, false, Align);
   7333   }
   7334 
   7335   return SDValue();
   7336 }
   7337 
   7338 static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) {
   7339   // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi
   7340   // and Lo parts; on big-endian machines it doesn't.
   7341   return DAG.getDataLayout().isBigEndian() ? 1 : 0;
   7342 }
   7343 
   7344 static SDValue foldBitcastedFPLogic(SDNode *N, SelectionDAG &DAG,
   7345                                     const TargetLowering &TLI) {
   7346   // If this is not a bitcast to an FP type or if the target doesn't have
   7347   // IEEE754-compliant FP logic, we're done.
   7348   EVT VT = N->getValueType(0);
   7349   if (!VT.isFloatingPoint() || !TLI.hasBitPreservingFPLogic(VT))
   7350     return SDValue();
   7351 
   7352   // TODO: Use splat values for the constant-checking below and remove this
   7353   // restriction.
   7354   SDValue N0 = N->getOperand(0);
   7355   EVT SourceVT = N0.getValueType();
   7356   if (SourceVT.isVector())
   7357     return SDValue();
   7358 
   7359   unsigned FPOpcode;
   7360   APInt SignMask;
   7361   switch (N0.getOpcode()) {
   7362   case ISD::AND:
   7363     FPOpcode = ISD::FABS;
   7364     SignMask = ~APInt::getSignBit(SourceVT.getSizeInBits());
   7365     break;
   7366   case ISD::XOR:
   7367     FPOpcode = ISD::FNEG;
   7368     SignMask = APInt::getSignBit(SourceVT.getSizeInBits());
   7369     break;
   7370   // TODO: ISD::OR --> ISD::FNABS?
   7371   default:
   7372     return SDValue();
   7373   }
   7374 
   7375   // Fold (bitcast int (and (bitcast fp X to int), 0x7fff...) to fp) -> fabs X
   7376   // Fold (bitcast int (xor (bitcast fp X to int), 0x8000...) to fp) -> fneg X
   7377   SDValue LogicOp0 = N0.getOperand(0);
   7378   ConstantSDNode *LogicOp1 = dyn_cast<ConstantSDNode>(N0.getOperand(1));
   7379   if (LogicOp1 && LogicOp1->getAPIntValue() == SignMask &&
   7380       LogicOp0.getOpcode() == ISD::BITCAST &&
   7381       LogicOp0->getOperand(0).getValueType() == VT)
   7382     return DAG.getNode(FPOpcode, SDLoc(N), VT, LogicOp0->getOperand(0));
   7383 
   7384   return SDValue();
   7385 }
   7386 
   7387 SDValue DAGCombiner::visitBITCAST(SDNode *N) {
   7388   SDValue N0 = N->getOperand(0);
   7389   EVT VT = N->getValueType(0);
   7390 
   7391   // If the input is a BUILD_VECTOR with all constant elements, fold this now.
   7392   // Only do this before legalize, since afterward the target may be depending
   7393   // on the bitconvert.
   7394   // First check to see if this is all constant.
   7395   if (!LegalTypes &&
   7396       N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() &&
   7397       VT.isVector()) {
   7398     bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant();
   7399 
   7400     EVT DestEltVT = N->getValueType(0).getVectorElementType();
   7401     assert(!DestEltVT.isVector() &&
   7402            "Element type of vector ValueType must not be vector!");
   7403     if (isSimple)
   7404       return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT);
   7405   }
   7406 
   7407   // If the input is a constant, let getNode fold it.
   7408   if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) {
   7409     // If we can't allow illegal operations, we need to check that this is just
   7410     // a fp -> int or int -> conversion and that the resulting operation will
   7411     // be legal.
   7412     if (!LegalOperations ||
   7413         (isa<ConstantSDNode>(N0) && VT.isFloatingPoint() && !VT.isVector() &&
   7414          TLI.isOperationLegal(ISD::ConstantFP, VT)) ||
   7415         (isa<ConstantFPSDNode>(N0) && VT.isInteger() && !VT.isVector() &&
   7416          TLI.isOperationLegal(ISD::Constant, VT)))
   7417       return DAG.getBitcast(VT, N0);
   7418   }
   7419 
   7420   // (conv (conv x, t1), t2) -> (conv x, t2)
   7421   if (N0.getOpcode() == ISD::BITCAST)
   7422     return DAG.getBitcast(VT, N0.getOperand(0));
   7423 
   7424   // fold (conv (load x)) -> (load (conv*)x)
   7425   // If the resultant load doesn't need a higher alignment than the original!
   7426   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   7427       // Do not change the width of a volatile load.
   7428       !cast<LoadSDNode>(N0)->isVolatile() &&
   7429       // Do not remove the cast if the types differ in endian layout.
   7430       TLI.hasBigEndianPartOrdering(N0.getValueType(), DAG.getDataLayout()) ==
   7431           TLI.hasBigEndianPartOrdering(VT, DAG.getDataLayout()) &&
   7432       (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) &&
   7433       TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) {
   7434     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   7435     unsigned OrigAlign = LN0->getAlignment();
   7436 
   7437     bool Fast = false;
   7438     if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), VT,
   7439                                LN0->getAddressSpace(), OrigAlign, &Fast) &&
   7440         Fast) {
   7441       SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(),
   7442                                  LN0->getBasePtr(), LN0->getPointerInfo(),
   7443                                  LN0->isVolatile(), LN0->isNonTemporal(),
   7444                                  LN0->isInvariant(), OrigAlign,
   7445                                  LN0->getAAInfo());
   7446       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1));
   7447       return Load;
   7448     }
   7449   }
   7450 
   7451   if (SDValue V = foldBitcastedFPLogic(N, DAG, TLI))
   7452     return V;
   7453 
   7454   // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit)
   7455   // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit))
   7456   //
   7457   // For ppc_fp128:
   7458   // fold (bitcast (fneg x)) ->
   7459   //     flipbit = signbit
   7460   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   7461   //
   7462   // fold (bitcast (fabs x)) ->
   7463   //     flipbit = (and (extract_element (bitcast x), 0), signbit)
   7464   //     (xor (bitcast x) (build_pair flipbit, flipbit))
   7465   // This often reduces constant pool loads.
   7466   if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) ||
   7467        (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) &&
   7468       N0.getNode()->hasOneUse() && VT.isInteger() &&
   7469       !VT.isVector() && !N0.getValueType().isVector()) {
   7470     SDValue NewConv = DAG.getBitcast(VT, N0.getOperand(0));
   7471     AddToWorklist(NewConv.getNode());
   7472 
   7473     SDLoc DL(N);
   7474     if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   7475       assert(VT.getSizeInBits() == 128);
   7476       SDValue SignBit = DAG.getConstant(
   7477           APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64);
   7478       SDValue FlipBit;
   7479       if (N0.getOpcode() == ISD::FNEG) {
   7480         FlipBit = SignBit;
   7481         AddToWorklist(FlipBit.getNode());
   7482       } else {
   7483         assert(N0.getOpcode() == ISD::FABS);
   7484         SDValue Hi =
   7485             DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv,
   7486                         DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   7487                                               SDLoc(NewConv)));
   7488         AddToWorklist(Hi.getNode());
   7489         FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit);
   7490         AddToWorklist(FlipBit.getNode());
   7491       }
   7492       SDValue FlipBits =
   7493           DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   7494       AddToWorklist(FlipBits.getNode());
   7495       return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits);
   7496     }
   7497     APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   7498     if (N0.getOpcode() == ISD::FNEG)
   7499       return DAG.getNode(ISD::XOR, DL, VT,
   7500                          NewConv, DAG.getConstant(SignBit, DL, VT));
   7501     assert(N0.getOpcode() == ISD::FABS);
   7502     return DAG.getNode(ISD::AND, DL, VT,
   7503                        NewConv, DAG.getConstant(~SignBit, DL, VT));
   7504   }
   7505 
   7506   // fold (bitconvert (fcopysign cst, x)) ->
   7507   //         (or (and (bitconvert x), sign), (and cst, (not sign)))
   7508   // Note that we don't handle (copysign x, cst) because this can always be
   7509   // folded to an fneg or fabs.
   7510   //
   7511   // For ppc_fp128:
   7512   // fold (bitcast (fcopysign cst, x)) ->
   7513   //     flipbit = (and (extract_element
   7514   //                     (xor (bitcast cst), (bitcast x)), 0),
   7515   //                    signbit)
   7516   //     (xor (bitcast cst) (build_pair flipbit, flipbit))
   7517   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() &&
   7518       isa<ConstantFPSDNode>(N0.getOperand(0)) &&
   7519       VT.isInteger() && !VT.isVector()) {
   7520     unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits();
   7521     EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth);
   7522     if (isTypeLegal(IntXVT)) {
   7523       SDValue X = DAG.getBitcast(IntXVT, N0.getOperand(1));
   7524       AddToWorklist(X.getNode());
   7525 
   7526       // If X has a different width than the result/lhs, sext it or truncate it.
   7527       unsigned VTWidth = VT.getSizeInBits();
   7528       if (OrigXWidth < VTWidth) {
   7529         X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X);
   7530         AddToWorklist(X.getNode());
   7531       } else if (OrigXWidth > VTWidth) {
   7532         // To get the sign bit in the right place, we have to shift it right
   7533         // before truncating.
   7534         SDLoc DL(X);
   7535         X = DAG.getNode(ISD::SRL, DL,
   7536                         X.getValueType(), X,
   7537                         DAG.getConstant(OrigXWidth-VTWidth, DL,
   7538                                         X.getValueType()));
   7539         AddToWorklist(X.getNode());
   7540         X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X);
   7541         AddToWorklist(X.getNode());
   7542       }
   7543 
   7544       if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) {
   7545         APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2);
   7546         SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   7547         AddToWorklist(Cst.getNode());
   7548         SDValue X = DAG.getBitcast(VT, N0.getOperand(1));
   7549         AddToWorklist(X.getNode());
   7550         SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X);
   7551         AddToWorklist(XorResult.getNode());
   7552         SDValue XorResult64 = DAG.getNode(
   7553             ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult,
   7554             DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG),
   7555                                   SDLoc(XorResult)));
   7556         AddToWorklist(XorResult64.getNode());
   7557         SDValue FlipBit =
   7558             DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64,
   7559                         DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64));
   7560         AddToWorklist(FlipBit.getNode());
   7561         SDValue FlipBits =
   7562             DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit);
   7563         AddToWorklist(FlipBits.getNode());
   7564         return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits);
   7565       }
   7566       APInt SignBit = APInt::getSignBit(VT.getSizeInBits());
   7567       X = DAG.getNode(ISD::AND, SDLoc(X), VT,
   7568                       X, DAG.getConstant(SignBit, SDLoc(X), VT));
   7569       AddToWorklist(X.getNode());
   7570 
   7571       SDValue Cst = DAG.getBitcast(VT, N0.getOperand(0));
   7572       Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT,
   7573                         Cst, DAG.getConstant(~SignBit, SDLoc(Cst), VT));
   7574       AddToWorklist(Cst.getNode());
   7575 
   7576       return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst);
   7577     }
   7578   }
   7579 
   7580   // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive.
   7581   if (N0.getOpcode() == ISD::BUILD_PAIR)
   7582     if (SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT))
   7583       return CombineLD;
   7584 
   7585   // Remove double bitcasts from shuffles - this is often a legacy of
   7586   // XformToShuffleWithZero being used to combine bitmaskings (of
   7587   // float vectors bitcast to integer vectors) into shuffles.
   7588   // bitcast(shuffle(bitcast(s0),bitcast(s1))) -> shuffle(s0,s1)
   7589   if (Level < AfterLegalizeDAG && TLI.isTypeLegal(VT) && VT.isVector() &&
   7590       N0->getOpcode() == ISD::VECTOR_SHUFFLE &&
   7591       VT.getVectorNumElements() >= N0.getValueType().getVectorNumElements() &&
   7592       !(VT.getVectorNumElements() % N0.getValueType().getVectorNumElements())) {
   7593     ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N0);
   7594 
   7595     // If operands are a bitcast, peek through if it casts the original VT.
   7596     // If operands are a constant, just bitcast back to original VT.
   7597     auto PeekThroughBitcast = [&](SDValue Op) {
   7598       if (Op.getOpcode() == ISD::BITCAST &&
   7599           Op.getOperand(0).getValueType() == VT)
   7600         return SDValue(Op.getOperand(0));
   7601       if (ISD::isBuildVectorOfConstantSDNodes(Op.getNode()) ||
   7602           ISD::isBuildVectorOfConstantFPSDNodes(Op.getNode()))
   7603         return DAG.getBitcast(VT, Op);
   7604       return SDValue();
   7605     };
   7606 
   7607     SDValue SV0 = PeekThroughBitcast(N0->getOperand(0));
   7608     SDValue SV1 = PeekThroughBitcast(N0->getOperand(1));
   7609     if (!(SV0 && SV1))
   7610       return SDValue();
   7611 
   7612     int MaskScale =
   7613         VT.getVectorNumElements() / N0.getValueType().getVectorNumElements();
   7614     SmallVector<int, 8> NewMask;
   7615     for (int M : SVN->getMask())
   7616       for (int i = 0; i != MaskScale; ++i)
   7617         NewMask.push_back(M < 0 ? -1 : M * MaskScale + i);
   7618 
   7619     bool LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   7620     if (!LegalMask) {
   7621       std::swap(SV0, SV1);
   7622       ShuffleVectorSDNode::commuteMask(NewMask);
   7623       LegalMask = TLI.isShuffleMaskLegal(NewMask, VT);
   7624     }
   7625 
   7626     if (LegalMask)
   7627       return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, NewMask);
   7628   }
   7629 
   7630   return SDValue();
   7631 }
   7632 
   7633 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) {
   7634   EVT VT = N->getValueType(0);
   7635   return CombineConsecutiveLoads(N, VT);
   7636 }
   7637 
   7638 /// We know that BV is a build_vector node with Constant, ConstantFP or Undef
   7639 /// operands. DstEltVT indicates the destination element value type.
   7640 SDValue DAGCombiner::
   7641 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) {
   7642   EVT SrcEltVT = BV->getValueType(0).getVectorElementType();
   7643 
   7644   // If this is already the right type, we're done.
   7645   if (SrcEltVT == DstEltVT) return SDValue(BV, 0);
   7646 
   7647   unsigned SrcBitSize = SrcEltVT.getSizeInBits();
   7648   unsigned DstBitSize = DstEltVT.getSizeInBits();
   7649 
   7650   // If this is a conversion of N elements of one type to N elements of another
   7651   // type, convert each element.  This handles FP<->INT cases.
   7652   if (SrcBitSize == DstBitSize) {
   7653     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   7654                               BV->getValueType(0).getVectorNumElements());
   7655 
   7656     // Due to the FP element handling below calling this routine recursively,
   7657     // we can end up with a scalar-to-vector node here.
   7658     if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR)
   7659       return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT,
   7660                          DAG.getBitcast(DstEltVT, BV->getOperand(0)));
   7661 
   7662     SmallVector<SDValue, 8> Ops;
   7663     for (SDValue Op : BV->op_values()) {
   7664       // If the vector element type is not legal, the BUILD_VECTOR operands
   7665       // are promoted and implicitly truncated.  Make that explicit here.
   7666       if (Op.getValueType() != SrcEltVT)
   7667         Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op);
   7668       Ops.push_back(DAG.getBitcast(DstEltVT, Op));
   7669       AddToWorklist(Ops.back().getNode());
   7670     }
   7671     return DAG.getBuildVector(VT, SDLoc(BV), Ops);
   7672   }
   7673 
   7674   // Otherwise, we're growing or shrinking the elements.  To avoid having to
   7675   // handle annoying details of growing/shrinking FP values, we convert them to
   7676   // int first.
   7677   if (SrcEltVT.isFloatingPoint()) {
   7678     // Convert the input float vector to a int vector where the elements are the
   7679     // same sizes.
   7680     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits());
   7681     BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode();
   7682     SrcEltVT = IntVT;
   7683   }
   7684 
   7685   // Now we know the input is an integer vector.  If the output is a FP type,
   7686   // convert to integer first, then to FP of the right size.
   7687   if (DstEltVT.isFloatingPoint()) {
   7688     EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits());
   7689     SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode();
   7690 
   7691     // Next, convert to FP elements of the same size.
   7692     return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT);
   7693   }
   7694 
   7695   SDLoc DL(BV);
   7696 
   7697   // Okay, we know the src/dst types are both integers of differing types.
   7698   // Handling growing first.
   7699   assert(SrcEltVT.isInteger() && DstEltVT.isInteger());
   7700   if (SrcBitSize < DstBitSize) {
   7701     unsigned NumInputsPerOutput = DstBitSize/SrcBitSize;
   7702 
   7703     SmallVector<SDValue, 8> Ops;
   7704     for (unsigned i = 0, e = BV->getNumOperands(); i != e;
   7705          i += NumInputsPerOutput) {
   7706       bool isLE = DAG.getDataLayout().isLittleEndian();
   7707       APInt NewBits = APInt(DstBitSize, 0);
   7708       bool EltIsUndef = true;
   7709       for (unsigned j = 0; j != NumInputsPerOutput; ++j) {
   7710         // Shift the previously computed bits over.
   7711         NewBits <<= SrcBitSize;
   7712         SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j));
   7713         if (Op.isUndef()) continue;
   7714         EltIsUndef = false;
   7715 
   7716         NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue().
   7717                    zextOrTrunc(SrcBitSize).zext(DstBitSize);
   7718       }
   7719 
   7720       if (EltIsUndef)
   7721         Ops.push_back(DAG.getUNDEF(DstEltVT));
   7722       else
   7723         Ops.push_back(DAG.getConstant(NewBits, DL, DstEltVT));
   7724     }
   7725 
   7726     EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size());
   7727     return DAG.getBuildVector(VT, DL, Ops);
   7728   }
   7729 
   7730   // Finally, this must be the case where we are shrinking elements: each input
   7731   // turns into multiple outputs.
   7732   unsigned NumOutputsPerInput = SrcBitSize/DstBitSize;
   7733   EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT,
   7734                             NumOutputsPerInput*BV->getNumOperands());
   7735   SmallVector<SDValue, 8> Ops;
   7736 
   7737   for (const SDValue &Op : BV->op_values()) {
   7738     if (Op.isUndef()) {
   7739       Ops.append(NumOutputsPerInput, DAG.getUNDEF(DstEltVT));
   7740       continue;
   7741     }
   7742 
   7743     APInt OpVal = cast<ConstantSDNode>(Op)->
   7744                   getAPIntValue().zextOrTrunc(SrcBitSize);
   7745 
   7746     for (unsigned j = 0; j != NumOutputsPerInput; ++j) {
   7747       APInt ThisVal = OpVal.trunc(DstBitSize);
   7748       Ops.push_back(DAG.getConstant(ThisVal, DL, DstEltVT));
   7749       OpVal = OpVal.lshr(DstBitSize);
   7750     }
   7751 
   7752     // For big endian targets, swap the order of the pieces of each element.
   7753     if (DAG.getDataLayout().isBigEndian())
   7754       std::reverse(Ops.end()-NumOutputsPerInput, Ops.end());
   7755   }
   7756 
   7757   return DAG.getBuildVector(VT, DL, Ops);
   7758 }
   7759 
   7760 /// Try to perform FMA combining on a given FADD node.
   7761 SDValue DAGCombiner::visitFADDForFMACombine(SDNode *N) {
   7762   SDValue N0 = N->getOperand(0);
   7763   SDValue N1 = N->getOperand(1);
   7764   EVT VT = N->getValueType(0);
   7765   SDLoc SL(N);
   7766 
   7767   const TargetOptions &Options = DAG.getTarget().Options;
   7768   bool AllowFusion =
   7769       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   7770 
   7771   // Floating-point multiply-add with intermediate rounding.
   7772   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   7773 
   7774   // Floating-point multiply-add without intermediate rounding.
   7775   bool HasFMA =
   7776       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   7777       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   7778 
   7779   // No valid opcode, do not combine.
   7780   if (!HasFMAD && !HasFMA)
   7781     return SDValue();
   7782 
   7783   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   7784   ;
   7785   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
   7786     return SDValue();
   7787 
   7788   // Always prefer FMAD to FMA for precision.
   7789   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   7790   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   7791   bool LookThroughFPExt = TLI.isFPExtFree(VT);
   7792 
   7793   // If we have two choices trying to fold (fadd (fmul u, v), (fmul x, y)),
   7794   // prefer to fold the multiply with fewer uses.
   7795   if (Aggressive && N0.getOpcode() == ISD::FMUL &&
   7796       N1.getOpcode() == ISD::FMUL) {
   7797     if (N0.getNode()->use_size() > N1.getNode()->use_size())
   7798       std::swap(N0, N1);
   7799   }
   7800 
   7801   // fold (fadd (fmul x, y), z) -> (fma x, y, z)
   7802   if (N0.getOpcode() == ISD::FMUL &&
   7803       (Aggressive || N0->hasOneUse())) {
   7804     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7805                        N0.getOperand(0), N0.getOperand(1), N1);
   7806   }
   7807 
   7808   // fold (fadd x, (fmul y, z)) -> (fma y, z, x)
   7809   // Note: Commutes FADD operands.
   7810   if (N1.getOpcode() == ISD::FMUL &&
   7811       (Aggressive || N1->hasOneUse())) {
   7812     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7813                        N1.getOperand(0), N1.getOperand(1), N0);
   7814   }
   7815 
   7816   // Look through FP_EXTEND nodes to do more combining.
   7817   if (AllowFusion && LookThroughFPExt) {
   7818     // fold (fadd (fpext (fmul x, y)), z) -> (fma (fpext x), (fpext y), z)
   7819     if (N0.getOpcode() == ISD::FP_EXTEND) {
   7820       SDValue N00 = N0.getOperand(0);
   7821       if (N00.getOpcode() == ISD::FMUL)
   7822         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7823                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7824                                        N00.getOperand(0)),
   7825                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7826                                        N00.getOperand(1)), N1);
   7827     }
   7828 
   7829     // fold (fadd x, (fpext (fmul y, z))) -> (fma (fpext y), (fpext z), x)
   7830     // Note: Commutes FADD operands.
   7831     if (N1.getOpcode() == ISD::FP_EXTEND) {
   7832       SDValue N10 = N1.getOperand(0);
   7833       if (N10.getOpcode() == ISD::FMUL)
   7834         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7835                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7836                                        N10.getOperand(0)),
   7837                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   7838                                        N10.getOperand(1)), N0);
   7839     }
   7840   }
   7841 
   7842   // More folding opportunities when target permits.
   7843   if ((AllowFusion || HasFMAD)  && Aggressive) {
   7844     // fold (fadd (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, z))
   7845     if (N0.getOpcode() == PreferredFusedOpcode &&
   7846         N0.getOperand(2).getOpcode() == ISD::FMUL) {
   7847       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7848                          N0.getOperand(0), N0.getOperand(1),
   7849                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7850                                      N0.getOperand(2).getOperand(0),
   7851                                      N0.getOperand(2).getOperand(1),
   7852                                      N1));
   7853     }
   7854 
   7855     // fold (fadd x, (fma y, z, (fmul u, v)) -> (fma y, z (fma u, v, x))
   7856     if (N1->getOpcode() == PreferredFusedOpcode &&
   7857         N1.getOperand(2).getOpcode() == ISD::FMUL) {
   7858       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7859                          N1.getOperand(0), N1.getOperand(1),
   7860                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   7861                                      N1.getOperand(2).getOperand(0),
   7862                                      N1.getOperand(2).getOperand(1),
   7863                                      N0));
   7864     }
   7865 
   7866     if (AllowFusion && LookThroughFPExt) {
   7867       // fold (fadd (fma x, y, (fpext (fmul u, v))), z)
   7868       //   -> (fma x, y, (fma (fpext u), (fpext v), z))
   7869       auto FoldFAddFMAFPExtFMul = [&] (
   7870           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
   7871         return DAG.getNode(PreferredFusedOpcode, SL, VT, X, Y,
   7872                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   7873                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   7874                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   7875                                        Z));
   7876       };
   7877       if (N0.getOpcode() == PreferredFusedOpcode) {
   7878         SDValue N02 = N0.getOperand(2);
   7879         if (N02.getOpcode() == ISD::FP_EXTEND) {
   7880           SDValue N020 = N02.getOperand(0);
   7881           if (N020.getOpcode() == ISD::FMUL)
   7882             return FoldFAddFMAFPExtFMul(N0.getOperand(0), N0.getOperand(1),
   7883                                         N020.getOperand(0), N020.getOperand(1),
   7884                                         N1);
   7885         }
   7886       }
   7887 
   7888       // fold (fadd (fpext (fma x, y, (fmul u, v))), z)
   7889       //   -> (fma (fpext x), (fpext y), (fma (fpext u), (fpext v), z))
   7890       // FIXME: This turns two single-precision and one double-precision
   7891       // operation into two double-precision operations, which might not be
   7892       // interesting for all targets, especially GPUs.
   7893       auto FoldFAddFPExtFMAFMul = [&] (
   7894           SDValue X, SDValue Y, SDValue U, SDValue V, SDValue Z) {
   7895         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7896                            DAG.getNode(ISD::FP_EXTEND, SL, VT, X),
   7897                            DAG.getNode(ISD::FP_EXTEND, SL, VT, Y),
   7898                            DAG.getNode(PreferredFusedOpcode, SL, VT,
   7899                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, U),
   7900                                        DAG.getNode(ISD::FP_EXTEND, SL, VT, V),
   7901                                        Z));
   7902       };
   7903       if (N0.getOpcode() == ISD::FP_EXTEND) {
   7904         SDValue N00 = N0.getOperand(0);
   7905         if (N00.getOpcode() == PreferredFusedOpcode) {
   7906           SDValue N002 = N00.getOperand(2);
   7907           if (N002.getOpcode() == ISD::FMUL)
   7908             return FoldFAddFPExtFMAFMul(N00.getOperand(0), N00.getOperand(1),
   7909                                         N002.getOperand(0), N002.getOperand(1),
   7910                                         N1);
   7911         }
   7912       }
   7913 
   7914       // fold (fadd x, (fma y, z, (fpext (fmul u, v)))
   7915       //   -> (fma y, z, (fma (fpext u), (fpext v), x))
   7916       if (N1.getOpcode() == PreferredFusedOpcode) {
   7917         SDValue N12 = N1.getOperand(2);
   7918         if (N12.getOpcode() == ISD::FP_EXTEND) {
   7919           SDValue N120 = N12.getOperand(0);
   7920           if (N120.getOpcode() == ISD::FMUL)
   7921             return FoldFAddFMAFPExtFMul(N1.getOperand(0), N1.getOperand(1),
   7922                                         N120.getOperand(0), N120.getOperand(1),
   7923                                         N0);
   7924         }
   7925       }
   7926 
   7927       // fold (fadd x, (fpext (fma y, z, (fmul u, v)))
   7928       //   -> (fma (fpext y), (fpext z), (fma (fpext u), (fpext v), x))
   7929       // FIXME: This turns two single-precision and one double-precision
   7930       // operation into two double-precision operations, which might not be
   7931       // interesting for all targets, especially GPUs.
   7932       if (N1.getOpcode() == ISD::FP_EXTEND) {
   7933         SDValue N10 = N1.getOperand(0);
   7934         if (N10.getOpcode() == PreferredFusedOpcode) {
   7935           SDValue N102 = N10.getOperand(2);
   7936           if (N102.getOpcode() == ISD::FMUL)
   7937             return FoldFAddFPExtFMAFMul(N10.getOperand(0), N10.getOperand(1),
   7938                                         N102.getOperand(0), N102.getOperand(1),
   7939                                         N0);
   7940         }
   7941       }
   7942     }
   7943   }
   7944 
   7945   return SDValue();
   7946 }
   7947 
   7948 /// Try to perform FMA combining on a given FSUB node.
   7949 SDValue DAGCombiner::visitFSUBForFMACombine(SDNode *N) {
   7950   SDValue N0 = N->getOperand(0);
   7951   SDValue N1 = N->getOperand(1);
   7952   EVT VT = N->getValueType(0);
   7953   SDLoc SL(N);
   7954 
   7955   const TargetOptions &Options = DAG.getTarget().Options;
   7956   bool AllowFusion =
   7957       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   7958 
   7959   // Floating-point multiply-add with intermediate rounding.
   7960   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   7961 
   7962   // Floating-point multiply-add without intermediate rounding.
   7963   bool HasFMA =
   7964       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   7965       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   7966 
   7967   // No valid opcode, do not combine.
   7968   if (!HasFMAD && !HasFMA)
   7969     return SDValue();
   7970 
   7971   const SelectionDAGTargetInfo *STI = DAG.getSubtarget().getSelectionDAGInfo();
   7972   if (AllowFusion && STI && STI->generateFMAsInMachineCombiner(OptLevel))
   7973     return SDValue();
   7974 
   7975   // Always prefer FMAD to FMA for precision.
   7976   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   7977   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   7978   bool LookThroughFPExt = TLI.isFPExtFree(VT);
   7979 
   7980   // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z))
   7981   if (N0.getOpcode() == ISD::FMUL &&
   7982       (Aggressive || N0->hasOneUse())) {
   7983     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7984                        N0.getOperand(0), N0.getOperand(1),
   7985                        DAG.getNode(ISD::FNEG, SL, VT, N1));
   7986   }
   7987 
   7988   // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x)
   7989   // Note: Commutes FSUB operands.
   7990   if (N1.getOpcode() == ISD::FMUL &&
   7991       (Aggressive || N1->hasOneUse()))
   7992     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   7993                        DAG.getNode(ISD::FNEG, SL, VT,
   7994                                    N1.getOperand(0)),
   7995                        N1.getOperand(1), N0);
   7996 
   7997   // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z))
   7998   if (N0.getOpcode() == ISD::FNEG &&
   7999       N0.getOperand(0).getOpcode() == ISD::FMUL &&
   8000       (Aggressive || (N0->hasOneUse() && N0.getOperand(0).hasOneUse()))) {
   8001     SDValue N00 = N0.getOperand(0).getOperand(0);
   8002     SDValue N01 = N0.getOperand(0).getOperand(1);
   8003     return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8004                        DAG.getNode(ISD::FNEG, SL, VT, N00), N01,
   8005                        DAG.getNode(ISD::FNEG, SL, VT, N1));
   8006   }
   8007 
   8008   // Look through FP_EXTEND nodes to do more combining.
   8009   if (AllowFusion && LookThroughFPExt) {
   8010     // fold (fsub (fpext (fmul x, y)), z)
   8011     //   -> (fma (fpext x), (fpext y), (fneg z))
   8012     if (N0.getOpcode() == ISD::FP_EXTEND) {
   8013       SDValue N00 = N0.getOperand(0);
   8014       if (N00.getOpcode() == ISD::FMUL)
   8015         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8016                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8017                                        N00.getOperand(0)),
   8018                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8019                                        N00.getOperand(1)),
   8020                            DAG.getNode(ISD::FNEG, SL, VT, N1));
   8021     }
   8022 
   8023     // fold (fsub x, (fpext (fmul y, z)))
   8024     //   -> (fma (fneg (fpext y)), (fpext z), x)
   8025     // Note: Commutes FSUB operands.
   8026     if (N1.getOpcode() == ISD::FP_EXTEND) {
   8027       SDValue N10 = N1.getOperand(0);
   8028       if (N10.getOpcode() == ISD::FMUL)
   8029         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8030                            DAG.getNode(ISD::FNEG, SL, VT,
   8031                                        DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8032                                                    N10.getOperand(0))),
   8033                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8034                                        N10.getOperand(1)),
   8035                            N0);
   8036     }
   8037 
   8038     // fold (fsub (fpext (fneg (fmul, x, y))), z)
   8039     //   -> (fneg (fma (fpext x), (fpext y), z))
   8040     // Note: This could be removed with appropriate canonicalization of the
   8041     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   8042     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   8043     // from implementing the canonicalization in visitFSUB.
   8044     if (N0.getOpcode() == ISD::FP_EXTEND) {
   8045       SDValue N00 = N0.getOperand(0);
   8046       if (N00.getOpcode() == ISD::FNEG) {
   8047         SDValue N000 = N00.getOperand(0);
   8048         if (N000.getOpcode() == ISD::FMUL) {
   8049           return DAG.getNode(ISD::FNEG, SL, VT,
   8050                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8051                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8052                                                      N000.getOperand(0)),
   8053                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8054                                                      N000.getOperand(1)),
   8055                                          N1));
   8056         }
   8057       }
   8058     }
   8059 
   8060     // fold (fsub (fneg (fpext (fmul, x, y))), z)
   8061     //   -> (fneg (fma (fpext x)), (fpext y), z)
   8062     // Note: This could be removed with appropriate canonicalization of the
   8063     // input expression into (fneg (fadd (fpext (fmul, x, y)), z). However, the
   8064     // orthogonal flags -fp-contract=fast and -enable-unsafe-fp-math prevent
   8065     // from implementing the canonicalization in visitFSUB.
   8066     if (N0.getOpcode() == ISD::FNEG) {
   8067       SDValue N00 = N0.getOperand(0);
   8068       if (N00.getOpcode() == ISD::FP_EXTEND) {
   8069         SDValue N000 = N00.getOperand(0);
   8070         if (N000.getOpcode() == ISD::FMUL) {
   8071           return DAG.getNode(ISD::FNEG, SL, VT,
   8072                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8073                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8074                                                      N000.getOperand(0)),
   8075                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8076                                                      N000.getOperand(1)),
   8077                                          N1));
   8078         }
   8079       }
   8080     }
   8081 
   8082   }
   8083 
   8084   // More folding opportunities when target permits.
   8085   if ((AllowFusion || HasFMAD) && Aggressive) {
   8086     // fold (fsub (fma x, y, (fmul u, v)), z)
   8087     //   -> (fma x, y (fma u, v, (fneg z)))
   8088     if (N0.getOpcode() == PreferredFusedOpcode &&
   8089         N0.getOperand(2).getOpcode() == ISD::FMUL) {
   8090       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8091                          N0.getOperand(0), N0.getOperand(1),
   8092                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   8093                                      N0.getOperand(2).getOperand(0),
   8094                                      N0.getOperand(2).getOperand(1),
   8095                                      DAG.getNode(ISD::FNEG, SL, VT,
   8096                                                  N1)));
   8097     }
   8098 
   8099     // fold (fsub x, (fma y, z, (fmul u, v)))
   8100     //   -> (fma (fneg y), z, (fma (fneg u), v, x))
   8101     if (N1.getOpcode() == PreferredFusedOpcode &&
   8102         N1.getOperand(2).getOpcode() == ISD::FMUL) {
   8103       SDValue N20 = N1.getOperand(2).getOperand(0);
   8104       SDValue N21 = N1.getOperand(2).getOperand(1);
   8105       return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8106                          DAG.getNode(ISD::FNEG, SL, VT,
   8107                                      N1.getOperand(0)),
   8108                          N1.getOperand(1),
   8109                          DAG.getNode(PreferredFusedOpcode, SL, VT,
   8110                                      DAG.getNode(ISD::FNEG, SL, VT, N20),
   8111 
   8112                                      N21, N0));
   8113     }
   8114 
   8115     if (AllowFusion && LookThroughFPExt) {
   8116       // fold (fsub (fma x, y, (fpext (fmul u, v))), z)
   8117       //   -> (fma x, y (fma (fpext u), (fpext v), (fneg z)))
   8118       if (N0.getOpcode() == PreferredFusedOpcode) {
   8119         SDValue N02 = N0.getOperand(2);
   8120         if (N02.getOpcode() == ISD::FP_EXTEND) {
   8121           SDValue N020 = N02.getOperand(0);
   8122           if (N020.getOpcode() == ISD::FMUL)
   8123             return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8124                                N0.getOperand(0), N0.getOperand(1),
   8125                                DAG.getNode(PreferredFusedOpcode, SL, VT,
   8126                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8127                                                        N020.getOperand(0)),
   8128                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8129                                                        N020.getOperand(1)),
   8130                                            DAG.getNode(ISD::FNEG, SL, VT,
   8131                                                        N1)));
   8132         }
   8133       }
   8134 
   8135       // fold (fsub (fpext (fma x, y, (fmul u, v))), z)
   8136       //   -> (fma (fpext x), (fpext y),
   8137       //           (fma (fpext u), (fpext v), (fneg z)))
   8138       // FIXME: This turns two single-precision and one double-precision
   8139       // operation into two double-precision operations, which might not be
   8140       // interesting for all targets, especially GPUs.
   8141       if (N0.getOpcode() == ISD::FP_EXTEND) {
   8142         SDValue N00 = N0.getOperand(0);
   8143         if (N00.getOpcode() == PreferredFusedOpcode) {
   8144           SDValue N002 = N00.getOperand(2);
   8145           if (N002.getOpcode() == ISD::FMUL)
   8146             return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8147                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8148                                            N00.getOperand(0)),
   8149                                DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8150                                            N00.getOperand(1)),
   8151                                DAG.getNode(PreferredFusedOpcode, SL, VT,
   8152                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8153                                                        N002.getOperand(0)),
   8154                                            DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8155                                                        N002.getOperand(1)),
   8156                                            DAG.getNode(ISD::FNEG, SL, VT,
   8157                                                        N1)));
   8158         }
   8159       }
   8160 
   8161       // fold (fsub x, (fma y, z, (fpext (fmul u, v))))
   8162       //   -> (fma (fneg y), z, (fma (fneg (fpext u)), (fpext v), x))
   8163       if (N1.getOpcode() == PreferredFusedOpcode &&
   8164         N1.getOperand(2).getOpcode() == ISD::FP_EXTEND) {
   8165         SDValue N120 = N1.getOperand(2).getOperand(0);
   8166         if (N120.getOpcode() == ISD::FMUL) {
   8167           SDValue N1200 = N120.getOperand(0);
   8168           SDValue N1201 = N120.getOperand(1);
   8169           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8170                              DAG.getNode(ISD::FNEG, SL, VT, N1.getOperand(0)),
   8171                              N1.getOperand(1),
   8172                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8173                                          DAG.getNode(ISD::FNEG, SL, VT,
   8174                                              DAG.getNode(ISD::FP_EXTEND, SL,
   8175                                                          VT, N1200)),
   8176                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8177                                                      N1201),
   8178                                          N0));
   8179         }
   8180       }
   8181 
   8182       // fold (fsub x, (fpext (fma y, z, (fmul u, v))))
   8183       //   -> (fma (fneg (fpext y)), (fpext z),
   8184       //           (fma (fneg (fpext u)), (fpext v), x))
   8185       // FIXME: This turns two single-precision and one double-precision
   8186       // operation into two double-precision operations, which might not be
   8187       // interesting for all targets, especially GPUs.
   8188       if (N1.getOpcode() == ISD::FP_EXTEND &&
   8189         N1.getOperand(0).getOpcode() == PreferredFusedOpcode) {
   8190         SDValue N100 = N1.getOperand(0).getOperand(0);
   8191         SDValue N101 = N1.getOperand(0).getOperand(1);
   8192         SDValue N102 = N1.getOperand(0).getOperand(2);
   8193         if (N102.getOpcode() == ISD::FMUL) {
   8194           SDValue N1020 = N102.getOperand(0);
   8195           SDValue N1021 = N102.getOperand(1);
   8196           return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8197                              DAG.getNode(ISD::FNEG, SL, VT,
   8198                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8199                                                      N100)),
   8200                              DAG.getNode(ISD::FP_EXTEND, SL, VT, N101),
   8201                              DAG.getNode(PreferredFusedOpcode, SL, VT,
   8202                                          DAG.getNode(ISD::FNEG, SL, VT,
   8203                                              DAG.getNode(ISD::FP_EXTEND, SL,
   8204                                                          VT, N1020)),
   8205                                          DAG.getNode(ISD::FP_EXTEND, SL, VT,
   8206                                                      N1021),
   8207                                          N0));
   8208         }
   8209       }
   8210     }
   8211   }
   8212 
   8213   return SDValue();
   8214 }
   8215 
   8216 /// Try to perform FMA combining on a given FMUL node.
   8217 SDValue DAGCombiner::visitFMULForFMACombine(SDNode *N) {
   8218   SDValue N0 = N->getOperand(0);
   8219   SDValue N1 = N->getOperand(1);
   8220   EVT VT = N->getValueType(0);
   8221   SDLoc SL(N);
   8222 
   8223   assert(N->getOpcode() == ISD::FMUL && "Expected FMUL Operation");
   8224 
   8225   const TargetOptions &Options = DAG.getTarget().Options;
   8226   bool AllowFusion =
   8227       (Options.AllowFPOpFusion == FPOpFusion::Fast || Options.UnsafeFPMath);
   8228 
   8229   // Floating-point multiply-add with intermediate rounding.
   8230   bool HasFMAD = (LegalOperations && TLI.isOperationLegal(ISD::FMAD, VT));
   8231 
   8232   // Floating-point multiply-add without intermediate rounding.
   8233   bool HasFMA =
   8234       AllowFusion && TLI.isFMAFasterThanFMulAndFAdd(VT) &&
   8235       (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT));
   8236 
   8237   // No valid opcode, do not combine.
   8238   if (!HasFMAD && !HasFMA)
   8239     return SDValue();
   8240 
   8241   // Always prefer FMAD to FMA for precision.
   8242   unsigned PreferredFusedOpcode = HasFMAD ? ISD::FMAD : ISD::FMA;
   8243   bool Aggressive = TLI.enableAggressiveFMAFusion(VT);
   8244 
   8245   // fold (fmul (fadd x, +1.0), y) -> (fma x, y, y)
   8246   // fold (fmul (fadd x, -1.0), y) -> (fma x, y, (fneg y))
   8247   auto FuseFADD = [&](SDValue X, SDValue Y) {
   8248     if (X.getOpcode() == ISD::FADD && (Aggressive || X->hasOneUse())) {
   8249       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   8250       if (XC1 && XC1->isExactlyValue(+1.0))
   8251         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
   8252       if (XC1 && XC1->isExactlyValue(-1.0))
   8253         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   8254                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8255     }
   8256     return SDValue();
   8257   };
   8258 
   8259   if (SDValue FMA = FuseFADD(N0, N1))
   8260     return FMA;
   8261   if (SDValue FMA = FuseFADD(N1, N0))
   8262     return FMA;
   8263 
   8264   // fold (fmul (fsub +1.0, x), y) -> (fma (fneg x), y, y)
   8265   // fold (fmul (fsub -1.0, x), y) -> (fma (fneg x), y, (fneg y))
   8266   // fold (fmul (fsub x, +1.0), y) -> (fma x, y, (fneg y))
   8267   // fold (fmul (fsub x, -1.0), y) -> (fma x, y, y)
   8268   auto FuseFSUB = [&](SDValue X, SDValue Y) {
   8269     if (X.getOpcode() == ISD::FSUB && (Aggressive || X->hasOneUse())) {
   8270       auto XC0 = isConstOrConstSplatFP(X.getOperand(0));
   8271       if (XC0 && XC0->isExactlyValue(+1.0))
   8272         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8273                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   8274                            Y);
   8275       if (XC0 && XC0->isExactlyValue(-1.0))
   8276         return DAG.getNode(PreferredFusedOpcode, SL, VT,
   8277                            DAG.getNode(ISD::FNEG, SL, VT, X.getOperand(1)), Y,
   8278                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8279 
   8280       auto XC1 = isConstOrConstSplatFP(X.getOperand(1));
   8281       if (XC1 && XC1->isExactlyValue(+1.0))
   8282         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y,
   8283                            DAG.getNode(ISD::FNEG, SL, VT, Y));
   8284       if (XC1 && XC1->isExactlyValue(-1.0))
   8285         return DAG.getNode(PreferredFusedOpcode, SL, VT, X.getOperand(0), Y, Y);
   8286     }
   8287     return SDValue();
   8288   };
   8289 
   8290   if (SDValue FMA = FuseFSUB(N0, N1))
   8291     return FMA;
   8292   if (SDValue FMA = FuseFSUB(N1, N0))
   8293     return FMA;
   8294 
   8295   return SDValue();
   8296 }
   8297 
   8298 SDValue DAGCombiner::visitFADD(SDNode *N) {
   8299   SDValue N0 = N->getOperand(0);
   8300   SDValue N1 = N->getOperand(1);
   8301   bool N0CFP = isConstantFPBuildVectorOrConstantFP(N0);
   8302   bool N1CFP = isConstantFPBuildVectorOrConstantFP(N1);
   8303   EVT VT = N->getValueType(0);
   8304   SDLoc DL(N);
   8305   const TargetOptions &Options = DAG.getTarget().Options;
   8306   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8307 
   8308   // fold vector ops
   8309   if (VT.isVector())
   8310     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8311       return FoldedVOp;
   8312 
   8313   // fold (fadd c1, c2) -> c1 + c2
   8314   if (N0CFP && N1CFP)
   8315     return DAG.getNode(ISD::FADD, DL, VT, N0, N1, Flags);
   8316 
   8317   // canonicalize constant to RHS
   8318   if (N0CFP && !N1CFP)
   8319     return DAG.getNode(ISD::FADD, DL, VT, N1, N0, Flags);
   8320 
   8321   // fold (fadd A, (fneg B)) -> (fsub A, B)
   8322   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   8323       isNegatibleForFree(N1, LegalOperations, TLI, &Options) == 2)
   8324     return DAG.getNode(ISD::FSUB, DL, VT, N0,
   8325                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   8326 
   8327   // fold (fadd (fneg A), B) -> (fsub B, A)
   8328   if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) &&
   8329       isNegatibleForFree(N0, LegalOperations, TLI, &Options) == 2)
   8330     return DAG.getNode(ISD::FSUB, DL, VT, N1,
   8331                        GetNegatedExpression(N0, DAG, LegalOperations), Flags);
   8332 
   8333   // If 'unsafe math' is enabled, fold lots of things.
   8334   if (Options.UnsafeFPMath) {
   8335     // No FP constant should be created after legalization as Instruction
   8336     // Selection pass has a hard time dealing with FP constants.
   8337     bool AllowNewConst = (Level < AfterLegalizeDAG);
   8338 
   8339     // fold (fadd A, 0) -> A
   8340     if (ConstantFPSDNode *N1C = isConstOrConstSplatFP(N1))
   8341       if (N1C->isZero())
   8342         return N0;
   8343 
   8344     // fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2))
   8345     if (N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() &&
   8346         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1)))
   8347       return DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(0),
   8348                          DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1), N1,
   8349                                      Flags),
   8350                          Flags);
   8351 
   8352     // If allowed, fold (fadd (fneg x), x) -> 0.0
   8353     if (AllowNewConst && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1)
   8354       return DAG.getConstantFP(0.0, DL, VT);
   8355 
   8356     // If allowed, fold (fadd x, (fneg x)) -> 0.0
   8357     if (AllowNewConst && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0)
   8358       return DAG.getConstantFP(0.0, DL, VT);
   8359 
   8360     // We can fold chains of FADD's of the same value into multiplications.
   8361     // This transform is not safe in general because we are reducing the number
   8362     // of rounding steps.
   8363     if (TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && !N0CFP && !N1CFP) {
   8364       if (N0.getOpcode() == ISD::FMUL) {
   8365         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   8366         bool CFP01 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(1));
   8367 
   8368         // (fadd (fmul x, c), x) -> (fmul x, c+1)
   8369         if (CFP01 && !CFP00 && N0.getOperand(0) == N1) {
   8370           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   8371                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   8372           return DAG.getNode(ISD::FMUL, DL, VT, N1, NewCFP, Flags);
   8373         }
   8374 
   8375         // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2)
   8376         if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD &&
   8377             N1.getOperand(0) == N1.getOperand(1) &&
   8378             N0.getOperand(0) == N1.getOperand(0)) {
   8379           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N0.getOperand(1),
   8380                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   8381           return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), NewCFP, Flags);
   8382         }
   8383       }
   8384 
   8385       if (N1.getOpcode() == ISD::FMUL) {
   8386         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   8387         bool CFP11 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(1));
   8388 
   8389         // (fadd x, (fmul x, c)) -> (fmul x, c+1)
   8390         if (CFP11 && !CFP10 && N1.getOperand(0) == N0) {
   8391           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   8392                                        DAG.getConstantFP(1.0, DL, VT), Flags);
   8393           return DAG.getNode(ISD::FMUL, DL, VT, N0, NewCFP, Flags);
   8394         }
   8395 
   8396         // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2)
   8397         if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD &&
   8398             N0.getOperand(0) == N0.getOperand(1) &&
   8399             N1.getOperand(0) == N0.getOperand(0)) {
   8400           SDValue NewCFP = DAG.getNode(ISD::FADD, DL, VT, N1.getOperand(1),
   8401                                        DAG.getConstantFP(2.0, DL, VT), Flags);
   8402           return DAG.getNode(ISD::FMUL, DL, VT, N1.getOperand(0), NewCFP, Flags);
   8403         }
   8404       }
   8405 
   8406       if (N0.getOpcode() == ISD::FADD && AllowNewConst) {
   8407         bool CFP00 = isConstantFPBuildVectorOrConstantFP(N0.getOperand(0));
   8408         // (fadd (fadd x, x), x) -> (fmul x, 3.0)
   8409         if (!CFP00 && N0.getOperand(0) == N0.getOperand(1) &&
   8410             (N0.getOperand(0) == N1)) {
   8411           return DAG.getNode(ISD::FMUL, DL, VT,
   8412                              N1, DAG.getConstantFP(3.0, DL, VT), Flags);
   8413         }
   8414       }
   8415 
   8416       if (N1.getOpcode() == ISD::FADD && AllowNewConst) {
   8417         bool CFP10 = isConstantFPBuildVectorOrConstantFP(N1.getOperand(0));
   8418         // (fadd x, (fadd x, x)) -> (fmul x, 3.0)
   8419         if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) &&
   8420             N1.getOperand(0) == N0) {
   8421           return DAG.getNode(ISD::FMUL, DL, VT,
   8422                              N0, DAG.getConstantFP(3.0, DL, VT), Flags);
   8423         }
   8424       }
   8425 
   8426       // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0)
   8427       if (AllowNewConst &&
   8428           N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD &&
   8429           N0.getOperand(0) == N0.getOperand(1) &&
   8430           N1.getOperand(0) == N1.getOperand(1) &&
   8431           N0.getOperand(0) == N1.getOperand(0)) {
   8432         return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0),
   8433                            DAG.getConstantFP(4.0, DL, VT), Flags);
   8434       }
   8435     }
   8436   } // enable-unsafe-fp-math
   8437 
   8438   // FADD -> FMA combines:
   8439   if (SDValue Fused = visitFADDForFMACombine(N)) {
   8440     AddToWorklist(Fused.getNode());
   8441     return Fused;
   8442   }
   8443   return SDValue();
   8444 }
   8445 
   8446 SDValue DAGCombiner::visitFSUB(SDNode *N) {
   8447   SDValue N0 = N->getOperand(0);
   8448   SDValue N1 = N->getOperand(1);
   8449   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   8450   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   8451   EVT VT = N->getValueType(0);
   8452   SDLoc dl(N);
   8453   const TargetOptions &Options = DAG.getTarget().Options;
   8454   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8455 
   8456   // fold vector ops
   8457   if (VT.isVector())
   8458     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8459       return FoldedVOp;
   8460 
   8461   // fold (fsub c1, c2) -> c1-c2
   8462   if (N0CFP && N1CFP)
   8463     return DAG.getNode(ISD::FSUB, dl, VT, N0, N1, Flags);
   8464 
   8465   // fold (fsub A, (fneg B)) -> (fadd A, B)
   8466   if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   8467     return DAG.getNode(ISD::FADD, dl, VT, N0,
   8468                        GetNegatedExpression(N1, DAG, LegalOperations), Flags);
   8469 
   8470   // If 'unsafe math' is enabled, fold lots of things.
   8471   if (Options.UnsafeFPMath) {
   8472     // (fsub A, 0) -> A
   8473     if (N1CFP && N1CFP->isZero())
   8474       return N0;
   8475 
   8476     // (fsub 0, B) -> -B
   8477     if (N0CFP && N0CFP->isZero()) {
   8478       if (isNegatibleForFree(N1, LegalOperations, TLI, &Options))
   8479         return GetNegatedExpression(N1, DAG, LegalOperations);
   8480       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8481         return DAG.getNode(ISD::FNEG, dl, VT, N1);
   8482     }
   8483 
   8484     // (fsub x, x) -> 0.0
   8485     if (N0 == N1)
   8486       return DAG.getConstantFP(0.0f, dl, VT);
   8487 
   8488     // (fsub x, (fadd x, y)) -> (fneg y)
   8489     // (fsub x, (fadd y, x)) -> (fneg y)
   8490     if (N1.getOpcode() == ISD::FADD) {
   8491       SDValue N10 = N1->getOperand(0);
   8492       SDValue N11 = N1->getOperand(1);
   8493 
   8494       if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &Options))
   8495         return GetNegatedExpression(N11, DAG, LegalOperations);
   8496 
   8497       if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, &Options))
   8498         return GetNegatedExpression(N10, DAG, LegalOperations);
   8499     }
   8500   }
   8501 
   8502   // FSUB -> FMA combines:
   8503   if (SDValue Fused = visitFSUBForFMACombine(N)) {
   8504     AddToWorklist(Fused.getNode());
   8505     return Fused;
   8506   }
   8507 
   8508   return SDValue();
   8509 }
   8510 
   8511 SDValue DAGCombiner::visitFMUL(SDNode *N) {
   8512   SDValue N0 = N->getOperand(0);
   8513   SDValue N1 = N->getOperand(1);
   8514   ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   8515   ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   8516   EVT VT = N->getValueType(0);
   8517   SDLoc DL(N);
   8518   const TargetOptions &Options = DAG.getTarget().Options;
   8519   const SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8520 
   8521   // fold vector ops
   8522   if (VT.isVector()) {
   8523     // This just handles C1 * C2 for vectors. Other vector folds are below.
   8524     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8525       return FoldedVOp;
   8526   }
   8527 
   8528   // fold (fmul c1, c2) -> c1*c2
   8529   if (N0CFP && N1CFP)
   8530     return DAG.getNode(ISD::FMUL, DL, VT, N0, N1, Flags);
   8531 
   8532   // canonicalize constant to RHS
   8533   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   8534      !isConstantFPBuildVectorOrConstantFP(N1))
   8535     return DAG.getNode(ISD::FMUL, DL, VT, N1, N0, Flags);
   8536 
   8537   // fold (fmul A, 1.0) -> A
   8538   if (N1CFP && N1CFP->isExactlyValue(1.0))
   8539     return N0;
   8540 
   8541   if (Options.UnsafeFPMath) {
   8542     // fold (fmul A, 0) -> 0
   8543     if (N1CFP && N1CFP->isZero())
   8544       return N1;
   8545 
   8546     // fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2))
   8547     if (N0.getOpcode() == ISD::FMUL) {
   8548       // Fold scalars or any vector constants (not just splats).
   8549       // This fold is done in general by InstCombine, but extra fmul insts
   8550       // may have been generated during lowering.
   8551       SDValue N00 = N0.getOperand(0);
   8552       SDValue N01 = N0.getOperand(1);
   8553       auto *BV1 = dyn_cast<BuildVectorSDNode>(N1);
   8554       auto *BV00 = dyn_cast<BuildVectorSDNode>(N00);
   8555       auto *BV01 = dyn_cast<BuildVectorSDNode>(N01);
   8556 
   8557       // Check 1: Make sure that the first operand of the inner multiply is NOT
   8558       // a constant. Otherwise, we may induce infinite looping.
   8559       if (!(isConstOrConstSplatFP(N00) || (BV00 && BV00->isConstant()))) {
   8560         // Check 2: Make sure that the second operand of the inner multiply and
   8561         // the second operand of the outer multiply are constants.
   8562         if ((N1CFP && isConstOrConstSplatFP(N01)) ||
   8563             (BV1 && BV01 && BV1->isConstant() && BV01->isConstant())) {
   8564           SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, N01, N1, Flags);
   8565           return DAG.getNode(ISD::FMUL, DL, VT, N00, MulConsts, Flags);
   8566         }
   8567       }
   8568     }
   8569 
   8570     // fold (fmul (fadd x, x), c) -> (fmul x, (fmul 2.0, c))
   8571     // Undo the fmul 2.0, x -> fadd x, x transformation, since if it occurs
   8572     // during an early run of DAGCombiner can prevent folding with fmuls
   8573     // inserted during lowering.
   8574     if (N0.getOpcode() == ISD::FADD &&
   8575         (N0.getOperand(0) == N0.getOperand(1)) &&
   8576         N0.hasOneUse()) {
   8577       const SDValue Two = DAG.getConstantFP(2.0, DL, VT);
   8578       SDValue MulConsts = DAG.getNode(ISD::FMUL, DL, VT, Two, N1, Flags);
   8579       return DAG.getNode(ISD::FMUL, DL, VT, N0.getOperand(0), MulConsts, Flags);
   8580     }
   8581   }
   8582 
   8583   // fold (fmul X, 2.0) -> (fadd X, X)
   8584   if (N1CFP && N1CFP->isExactlyValue(+2.0))
   8585     return DAG.getNode(ISD::FADD, DL, VT, N0, N0, Flags);
   8586 
   8587   // fold (fmul X, -1.0) -> (fneg X)
   8588   if (N1CFP && N1CFP->isExactlyValue(-1.0))
   8589     if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8590       return DAG.getNode(ISD::FNEG, DL, VT, N0);
   8591 
   8592   // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y)
   8593   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   8594     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   8595       // Both can be negated for free, check to see if at least one is cheaper
   8596       // negated.
   8597       if (LHSNeg == 2 || RHSNeg == 2)
   8598         return DAG.getNode(ISD::FMUL, DL, VT,
   8599                            GetNegatedExpression(N0, DAG, LegalOperations),
   8600                            GetNegatedExpression(N1, DAG, LegalOperations),
   8601                            Flags);
   8602     }
   8603   }
   8604 
   8605   // FMUL -> FMA combines:
   8606   if (SDValue Fused = visitFMULForFMACombine(N)) {
   8607     AddToWorklist(Fused.getNode());
   8608     return Fused;
   8609   }
   8610 
   8611   return SDValue();
   8612 }
   8613 
   8614 SDValue DAGCombiner::visitFMA(SDNode *N) {
   8615   SDValue N0 = N->getOperand(0);
   8616   SDValue N1 = N->getOperand(1);
   8617   SDValue N2 = N->getOperand(2);
   8618   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8619   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8620   EVT VT = N->getValueType(0);
   8621   SDLoc dl(N);
   8622   const TargetOptions &Options = DAG.getTarget().Options;
   8623 
   8624   // Constant fold FMA.
   8625   if (isa<ConstantFPSDNode>(N0) &&
   8626       isa<ConstantFPSDNode>(N1) &&
   8627       isa<ConstantFPSDNode>(N2)) {
   8628     return DAG.getNode(ISD::FMA, dl, VT, N0, N1, N2);
   8629   }
   8630 
   8631   if (Options.UnsafeFPMath) {
   8632     if (N0CFP && N0CFP->isZero())
   8633       return N2;
   8634     if (N1CFP && N1CFP->isZero())
   8635       return N2;
   8636   }
   8637   // TODO: The FMA node should have flags that propagate to these nodes.
   8638   if (N0CFP && N0CFP->isExactlyValue(1.0))
   8639     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2);
   8640   if (N1CFP && N1CFP->isExactlyValue(1.0))
   8641     return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2);
   8642 
   8643   // Canonicalize (fma c, x, y) -> (fma x, c, y)
   8644   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   8645      !isConstantFPBuildVectorOrConstantFP(N1))
   8646     return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2);
   8647 
   8648   // TODO: FMA nodes should have flags that propagate to the created nodes.
   8649   // For now, create a Flags object for use with all unsafe math transforms.
   8650   SDNodeFlags Flags;
   8651   Flags.setUnsafeAlgebra(true);
   8652 
   8653   if (Options.UnsafeFPMath) {
   8654     // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2)
   8655     if (N2.getOpcode() == ISD::FMUL && N0 == N2.getOperand(0) &&
   8656         isConstantFPBuildVectorOrConstantFP(N1) &&
   8657         isConstantFPBuildVectorOrConstantFP(N2.getOperand(1))) {
   8658       return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8659                          DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1),
   8660                                      &Flags), &Flags);
   8661     }
   8662 
   8663     // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y)
   8664     if (N0.getOpcode() == ISD::FMUL &&
   8665         isConstantFPBuildVectorOrConstantFP(N1) &&
   8666         isConstantFPBuildVectorOrConstantFP(N0.getOperand(1))) {
   8667       return DAG.getNode(ISD::FMA, dl, VT,
   8668                          N0.getOperand(0),
   8669                          DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1),
   8670                                      &Flags),
   8671                          N2);
   8672     }
   8673   }
   8674 
   8675   // (fma x, 1, y) -> (fadd x, y)
   8676   // (fma x, -1, y) -> (fadd (fneg x), y)
   8677   if (N1CFP) {
   8678     if (N1CFP->isExactlyValue(1.0))
   8679       // TODO: The FMA node should have flags that propagate to this node.
   8680       return DAG.getNode(ISD::FADD, dl, VT, N0, N2);
   8681 
   8682     if (N1CFP->isExactlyValue(-1.0) &&
   8683         (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) {
   8684       SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0);
   8685       AddToWorklist(RHSNeg.getNode());
   8686       // TODO: The FMA node should have flags that propagate to this node.
   8687       return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg);
   8688     }
   8689   }
   8690 
   8691   if (Options.UnsafeFPMath) {
   8692     // (fma x, c, x) -> (fmul x, (c+1))
   8693     if (N1CFP && N0 == N2) {
   8694     return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8695                          DAG.getNode(ISD::FADD, dl, VT,
   8696                                      N1, DAG.getConstantFP(1.0, dl, VT),
   8697                                      &Flags), &Flags);
   8698     }
   8699 
   8700     // (fma x, c, (fneg x)) -> (fmul x, (c-1))
   8701     if (N1CFP && N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) {
   8702       return DAG.getNode(ISD::FMUL, dl, VT, N0,
   8703                          DAG.getNode(ISD::FADD, dl, VT,
   8704                                      N1, DAG.getConstantFP(-1.0, dl, VT),
   8705                                      &Flags), &Flags);
   8706     }
   8707   }
   8708 
   8709   return SDValue();
   8710 }
   8711 
   8712 // Combine multiple FDIVs with the same divisor into multiple FMULs by the
   8713 // reciprocal.
   8714 // E.g., (a / D; b / D;) -> (recip = 1.0 / D; a * recip; b * recip)
   8715 // Notice that this is not always beneficial. One reason is different target
   8716 // may have different costs for FDIV and FMUL, so sometimes the cost of two
   8717 // FDIVs may be lower than the cost of one FDIV and two FMULs. Another reason
   8718 // is the critical path is increased from "one FDIV" to "one FDIV + one FMUL".
   8719 SDValue DAGCombiner::combineRepeatedFPDivisors(SDNode *N) {
   8720   bool UnsafeMath = DAG.getTarget().Options.UnsafeFPMath;
   8721   const SDNodeFlags *Flags = N->getFlags();
   8722   if (!UnsafeMath && !Flags->hasAllowReciprocal())
   8723     return SDValue();
   8724 
   8725   // Skip if current node is a reciprocal.
   8726   SDValue N0 = N->getOperand(0);
   8727   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8728   if (N0CFP && N0CFP->isExactlyValue(1.0))
   8729     return SDValue();
   8730 
   8731   // Exit early if the target does not want this transform or if there can't
   8732   // possibly be enough uses of the divisor to make the transform worthwhile.
   8733   SDValue N1 = N->getOperand(1);
   8734   unsigned MinUses = TLI.combineRepeatedFPDivisors();
   8735   if (!MinUses || N1->use_size() < MinUses)
   8736     return SDValue();
   8737 
   8738   // Find all FDIV users of the same divisor.
   8739   // Use a set because duplicates may be present in the user list.
   8740   SetVector<SDNode *> Users;
   8741   for (auto *U : N1->uses()) {
   8742     if (U->getOpcode() == ISD::FDIV && U->getOperand(1) == N1) {
   8743       // This division is eligible for optimization only if global unsafe math
   8744       // is enabled or if this division allows reciprocal formation.
   8745       if (UnsafeMath || U->getFlags()->hasAllowReciprocal())
   8746         Users.insert(U);
   8747     }
   8748   }
   8749 
   8750   // Now that we have the actual number of divisor uses, make sure it meets
   8751   // the minimum threshold specified by the target.
   8752   if (Users.size() < MinUses)
   8753     return SDValue();
   8754 
   8755   EVT VT = N->getValueType(0);
   8756   SDLoc DL(N);
   8757   SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   8758   SDValue Reciprocal = DAG.getNode(ISD::FDIV, DL, VT, FPOne, N1, Flags);
   8759 
   8760   // Dividend / Divisor -> Dividend * Reciprocal
   8761   for (auto *U : Users) {
   8762     SDValue Dividend = U->getOperand(0);
   8763     if (Dividend != FPOne) {
   8764       SDValue NewNode = DAG.getNode(ISD::FMUL, SDLoc(U), VT, Dividend,
   8765                                     Reciprocal, Flags);
   8766       CombineTo(U, NewNode);
   8767     } else if (U != Reciprocal.getNode()) {
   8768       // In the absence of fast-math-flags, this user node is always the
   8769       // same node as Reciprocal, but with FMF they may be different nodes.
   8770       CombineTo(U, Reciprocal);
   8771     }
   8772   }
   8773   return SDValue(N, 0);  // N was replaced.
   8774 }
   8775 
   8776 SDValue DAGCombiner::visitFDIV(SDNode *N) {
   8777   SDValue N0 = N->getOperand(0);
   8778   SDValue N1 = N->getOperand(1);
   8779   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8780   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8781   EVT VT = N->getValueType(0);
   8782   SDLoc DL(N);
   8783   const TargetOptions &Options = DAG.getTarget().Options;
   8784   SDNodeFlags *Flags = &cast<BinaryWithFlagsSDNode>(N)->Flags;
   8785 
   8786   // fold vector ops
   8787   if (VT.isVector())
   8788     if (SDValue FoldedVOp = SimplifyVBinOp(N))
   8789       return FoldedVOp;
   8790 
   8791   // fold (fdiv c1, c2) -> c1/c2
   8792   if (N0CFP && N1CFP)
   8793     return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1, Flags);
   8794 
   8795   if (Options.UnsafeFPMath) {
   8796     // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable.
   8797     if (N1CFP) {
   8798       // Compute the reciprocal 1.0 / c2.
   8799       const APFloat &N1APF = N1CFP->getValueAPF();
   8800       APFloat Recip(N1APF.getSemantics(), 1); // 1.0
   8801       APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven);
   8802       // Only do the transform if the reciprocal is a legal fp immediate that
   8803       // isn't too nasty (eg NaN, denormal, ...).
   8804       if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty
   8805           (!LegalOperations ||
   8806            // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM
   8807            // backend)... we should handle this gracefully after Legalize.
   8808            // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) ||
   8809            TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) ||
   8810            TLI.isFPImmLegal(Recip, VT)))
   8811         return DAG.getNode(ISD::FMUL, DL, VT, N0,
   8812                            DAG.getConstantFP(Recip, DL, VT), Flags);
   8813     }
   8814 
   8815     // If this FDIV is part of a reciprocal square root, it may be folded
   8816     // into a target-specific square root estimate instruction.
   8817     if (N1.getOpcode() == ISD::FSQRT) {
   8818       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0), Flags)) {
   8819         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8820       }
   8821     } else if (N1.getOpcode() == ISD::FP_EXTEND &&
   8822                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8823       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   8824                                           Flags)) {
   8825         RV = DAG.getNode(ISD::FP_EXTEND, SDLoc(N1), VT, RV);
   8826         AddToWorklist(RV.getNode());
   8827         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8828       }
   8829     } else if (N1.getOpcode() == ISD::FP_ROUND &&
   8830                N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8831       if (SDValue RV = buildRsqrtEstimate(N1.getOperand(0).getOperand(0),
   8832                                           Flags)) {
   8833         RV = DAG.getNode(ISD::FP_ROUND, SDLoc(N1), VT, RV, N1.getOperand(1));
   8834         AddToWorklist(RV.getNode());
   8835         return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8836       }
   8837     } else if (N1.getOpcode() == ISD::FMUL) {
   8838       // Look through an FMUL. Even though this won't remove the FDIV directly,
   8839       // it's still worthwhile to get rid of the FSQRT if possible.
   8840       SDValue SqrtOp;
   8841       SDValue OtherOp;
   8842       if (N1.getOperand(0).getOpcode() == ISD::FSQRT) {
   8843         SqrtOp = N1.getOperand(0);
   8844         OtherOp = N1.getOperand(1);
   8845       } else if (N1.getOperand(1).getOpcode() == ISD::FSQRT) {
   8846         SqrtOp = N1.getOperand(1);
   8847         OtherOp = N1.getOperand(0);
   8848       }
   8849       if (SqrtOp.getNode()) {
   8850         // We found a FSQRT, so try to make this fold:
   8851         // x / (y * sqrt(z)) -> x * (rsqrt(z) / y)
   8852         if (SDValue RV = buildRsqrtEstimate(SqrtOp.getOperand(0), Flags)) {
   8853           RV = DAG.getNode(ISD::FDIV, SDLoc(N1), VT, RV, OtherOp, Flags);
   8854           AddToWorklist(RV.getNode());
   8855           return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8856         }
   8857       }
   8858     }
   8859 
   8860     // Fold into a reciprocal estimate and multiply instead of a real divide.
   8861     if (SDValue RV = BuildReciprocalEstimate(N1, Flags)) {
   8862       AddToWorklist(RV.getNode());
   8863       return DAG.getNode(ISD::FMUL, DL, VT, N0, RV, Flags);
   8864     }
   8865   }
   8866 
   8867   // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y)
   8868   if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &Options)) {
   8869     if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &Options)) {
   8870       // Both can be negated for free, check to see if at least one is cheaper
   8871       // negated.
   8872       if (LHSNeg == 2 || RHSNeg == 2)
   8873         return DAG.getNode(ISD::FDIV, SDLoc(N), VT,
   8874                            GetNegatedExpression(N0, DAG, LegalOperations),
   8875                            GetNegatedExpression(N1, DAG, LegalOperations),
   8876                            Flags);
   8877     }
   8878   }
   8879 
   8880   if (SDValue CombineRepeatedDivisors = combineRepeatedFPDivisors(N))
   8881     return CombineRepeatedDivisors;
   8882 
   8883   return SDValue();
   8884 }
   8885 
   8886 SDValue DAGCombiner::visitFREM(SDNode *N) {
   8887   SDValue N0 = N->getOperand(0);
   8888   SDValue N1 = N->getOperand(1);
   8889   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8890   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8891   EVT VT = N->getValueType(0);
   8892 
   8893   // fold (frem c1, c2) -> fmod(c1,c2)
   8894   if (N0CFP && N1CFP)
   8895     return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1,
   8896                        &cast<BinaryWithFlagsSDNode>(N)->Flags);
   8897 
   8898   return SDValue();
   8899 }
   8900 
   8901 SDValue DAGCombiner::visitFSQRT(SDNode *N) {
   8902   if (!DAG.getTarget().Options.UnsafeFPMath || TLI.isFsqrtCheap())
   8903     return SDValue();
   8904 
   8905   // TODO: FSQRT nodes should have flags that propagate to the created nodes.
   8906   // For now, create a Flags object for use with all unsafe math transforms.
   8907   SDNodeFlags Flags;
   8908   Flags.setUnsafeAlgebra(true);
   8909   return buildSqrtEstimate(N->getOperand(0), &Flags);
   8910 }
   8911 
   8912 /// copysign(x, fp_extend(y)) -> copysign(x, y)
   8913 /// copysign(x, fp_round(y)) -> copysign(x, y)
   8914 static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) {
   8915   SDValue N1 = N->getOperand(1);
   8916   if ((N1.getOpcode() == ISD::FP_EXTEND ||
   8917        N1.getOpcode() == ISD::FP_ROUND)) {
   8918     // Do not optimize out type conversion of f128 type yet.
   8919     // For some targets like x86_64, configuration is changed to keep one f128
   8920     // value in one SSE register, but instruction selection cannot handle
   8921     // FCOPYSIGN on SSE registers yet.
   8922     EVT N1VT = N1->getValueType(0);
   8923     EVT N1Op0VT = N1->getOperand(0)->getValueType(0);
   8924     return (N1VT == N1Op0VT || N1Op0VT != MVT::f128);
   8925   }
   8926   return false;
   8927 }
   8928 
   8929 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) {
   8930   SDValue N0 = N->getOperand(0);
   8931   SDValue N1 = N->getOperand(1);
   8932   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   8933   ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1);
   8934   EVT VT = N->getValueType(0);
   8935 
   8936   if (N0CFP && N1CFP)  // Constant fold
   8937     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1);
   8938 
   8939   if (N1CFP) {
   8940     const APFloat& V = N1CFP->getValueAPF();
   8941     // copysign(x, c1) -> fabs(x)       iff ispos(c1)
   8942     // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1)
   8943     if (!V.isNegative()) {
   8944       if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT))
   8945         return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   8946     } else {
   8947       if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))
   8948         return DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   8949                            DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0));
   8950     }
   8951   }
   8952 
   8953   // copysign(fabs(x), y) -> copysign(x, y)
   8954   // copysign(fneg(x), y) -> copysign(x, y)
   8955   // copysign(copysign(x,z), y) -> copysign(x, y)
   8956   if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG ||
   8957       N0.getOpcode() == ISD::FCOPYSIGN)
   8958     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8959                        N0.getOperand(0), N1);
   8960 
   8961   // copysign(x, abs(y)) -> abs(x)
   8962   if (N1.getOpcode() == ISD::FABS)
   8963     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   8964 
   8965   // copysign(x, copysign(y,z)) -> copysign(x, z)
   8966   if (N1.getOpcode() == ISD::FCOPYSIGN)
   8967     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8968                        N0, N1.getOperand(1));
   8969 
   8970   // copysign(x, fp_extend(y)) -> copysign(x, y)
   8971   // copysign(x, fp_round(y)) -> copysign(x, y)
   8972   if (CanCombineFCOPYSIGN_EXTEND_ROUND(N))
   8973     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   8974                        N0, N1.getOperand(0));
   8975 
   8976   return SDValue();
   8977 }
   8978 
   8979 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) {
   8980   SDValue N0 = N->getOperand(0);
   8981   EVT VT = N->getValueType(0);
   8982   EVT OpVT = N0.getValueType();
   8983 
   8984   // fold (sint_to_fp c1) -> c1fp
   8985   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   8986       // ...but only if the target supports immediate floating-point values
   8987       (!LegalOperations ||
   8988        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   8989     return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   8990 
   8991   // If the input is a legal type, and SINT_TO_FP is not legal on this target,
   8992   // but UINT_TO_FP is legal on this target, try to convert.
   8993   if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) &&
   8994       TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) {
   8995     // If the sign bit is known to be zero, we can change this to UINT_TO_FP.
   8996     if (DAG.SignBitIsZero(N0))
   8997       return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   8998   }
   8999 
   9000   // The next optimizations are desirable only if SELECT_CC can be lowered.
   9001   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   9002     // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   9003     if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 &&
   9004         !VT.isVector() &&
   9005         (!LegalOperations ||
   9006          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   9007       SDLoc DL(N);
   9008       SDValue Ops[] =
   9009         { N0.getOperand(0), N0.getOperand(1),
   9010           DAG.getConstantFP(-1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   9011           N0.getOperand(2) };
   9012       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   9013     }
   9014 
   9015     // fold (sint_to_fp (zext (setcc x, y, cc))) ->
   9016     //      (select_cc x, y, 1.0, 0.0,, cc)
   9017     if (N0.getOpcode() == ISD::ZERO_EXTEND &&
   9018         N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() &&
   9019         (!LegalOperations ||
   9020          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   9021       SDLoc DL(N);
   9022       SDValue Ops[] =
   9023         { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1),
   9024           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   9025           N0.getOperand(0).getOperand(2) };
   9026       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   9027     }
   9028   }
   9029 
   9030   return SDValue();
   9031 }
   9032 
   9033 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) {
   9034   SDValue N0 = N->getOperand(0);
   9035   EVT VT = N->getValueType(0);
   9036   EVT OpVT = N0.getValueType();
   9037 
   9038   // fold (uint_to_fp c1) -> c1fp
   9039   if (DAG.isConstantIntBuildVectorOrConstantInt(N0) &&
   9040       // ...but only if the target supports immediate floating-point values
   9041       (!LegalOperations ||
   9042        TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT)))
   9043     return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0);
   9044 
   9045   // If the input is a legal type, and UINT_TO_FP is not legal on this target,
   9046   // but SINT_TO_FP is legal on this target, try to convert.
   9047   if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) &&
   9048       TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) {
   9049     // If the sign bit is known to be zero, we can change this to SINT_TO_FP.
   9050     if (DAG.SignBitIsZero(N0))
   9051       return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0);
   9052   }
   9053 
   9054   // The next optimizations are desirable only if SELECT_CC can be lowered.
   9055   if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) {
   9056     // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc)
   9057 
   9058     if (N0.getOpcode() == ISD::SETCC && !VT.isVector() &&
   9059         (!LegalOperations ||
   9060          TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) {
   9061       SDLoc DL(N);
   9062       SDValue Ops[] =
   9063         { N0.getOperand(0), N0.getOperand(1),
   9064           DAG.getConstantFP(1.0, DL, VT), DAG.getConstantFP(0.0, DL, VT),
   9065           N0.getOperand(2) };
   9066       return DAG.getNode(ISD::SELECT_CC, DL, VT, Ops);
   9067     }
   9068   }
   9069 
   9070   return SDValue();
   9071 }
   9072 
   9073 // Fold (fp_to_{s/u}int ({s/u}int_to_fpx)) -> zext x, sext x, trunc x, or x
   9074 static SDValue FoldIntToFPToInt(SDNode *N, SelectionDAG &DAG) {
   9075   SDValue N0 = N->getOperand(0);
   9076   EVT VT = N->getValueType(0);
   9077 
   9078   if (N0.getOpcode() != ISD::UINT_TO_FP && N0.getOpcode() != ISD::SINT_TO_FP)
   9079     return SDValue();
   9080 
   9081   SDValue Src = N0.getOperand(0);
   9082   EVT SrcVT = Src.getValueType();
   9083   bool IsInputSigned = N0.getOpcode() == ISD::SINT_TO_FP;
   9084   bool IsOutputSigned = N->getOpcode() == ISD::FP_TO_SINT;
   9085 
   9086   // We can safely assume the conversion won't overflow the output range,
   9087   // because (for example) (uint8_t)18293.f is undefined behavior.
   9088 
   9089   // Since we can assume the conversion won't overflow, our decision as to
   9090   // whether the input will fit in the float should depend on the minimum
   9091   // of the input range and output range.
   9092 
   9093   // This means this is also safe for a signed input and unsigned output, since
   9094   // a negative input would lead to undefined behavior.
   9095   unsigned InputSize = (int)SrcVT.getScalarSizeInBits() - IsInputSigned;
   9096   unsigned OutputSize = (int)VT.getScalarSizeInBits() - IsOutputSigned;
   9097   unsigned ActualSize = std::min(InputSize, OutputSize);
   9098   const fltSemantics &sem = DAG.EVTToAPFloatSemantics(N0.getValueType());
   9099 
   9100   // We can only fold away the float conversion if the input range can be
   9101   // represented exactly in the float range.
   9102   if (APFloat::semanticsPrecision(sem) >= ActualSize) {
   9103     if (VT.getScalarSizeInBits() > SrcVT.getScalarSizeInBits()) {
   9104       unsigned ExtOp = IsInputSigned && IsOutputSigned ? ISD::SIGN_EXTEND
   9105                                                        : ISD::ZERO_EXTEND;
   9106       return DAG.getNode(ExtOp, SDLoc(N), VT, Src);
   9107     }
   9108     if (VT.getScalarSizeInBits() < SrcVT.getScalarSizeInBits())
   9109       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Src);
   9110     return DAG.getBitcast(VT, Src);
   9111   }
   9112   return SDValue();
   9113 }
   9114 
   9115 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) {
   9116   SDValue N0 = N->getOperand(0);
   9117   EVT VT = N->getValueType(0);
   9118 
   9119   // fold (fp_to_sint c1fp) -> c1
   9120   if (isConstantFPBuildVectorOrConstantFP(N0))
   9121     return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0);
   9122 
   9123   return FoldIntToFPToInt(N, DAG);
   9124 }
   9125 
   9126 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) {
   9127   SDValue N0 = N->getOperand(0);
   9128   EVT VT = N->getValueType(0);
   9129 
   9130   // fold (fp_to_uint c1fp) -> c1
   9131   if (isConstantFPBuildVectorOrConstantFP(N0))
   9132     return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0);
   9133 
   9134   return FoldIntToFPToInt(N, DAG);
   9135 }
   9136 
   9137 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) {
   9138   SDValue N0 = N->getOperand(0);
   9139   SDValue N1 = N->getOperand(1);
   9140   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   9141   EVT VT = N->getValueType(0);
   9142 
   9143   // fold (fp_round c1fp) -> c1fp
   9144   if (N0CFP)
   9145     return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1);
   9146 
   9147   // fold (fp_round (fp_extend x)) -> x
   9148   if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType())
   9149     return N0.getOperand(0);
   9150 
   9151   // fold (fp_round (fp_round x)) -> (fp_round x)
   9152   if (N0.getOpcode() == ISD::FP_ROUND) {
   9153     const bool NIsTrunc = N->getConstantOperandVal(1) == 1;
   9154     const bool N0IsTrunc = N0.getNode()->getConstantOperandVal(1) == 1;
   9155 
   9156     // Skip this folding if it results in an fp_round from f80 to f16.
   9157     //
   9158     // f80 to f16 always generates an expensive (and as yet, unimplemented)
   9159     // libcall to __truncxfhf2 instead of selecting native f16 conversion
   9160     // instructions from f32 or f64.  Moreover, the first (value-preserving)
   9161     // fp_round from f80 to either f32 or f64 may become a NOP in platforms like
   9162     // x86.
   9163     if (N0.getOperand(0).getValueType() == MVT::f80 && VT == MVT::f16)
   9164       return SDValue();
   9165 
   9166     // If the first fp_round isn't a value preserving truncation, it might
   9167     // introduce a tie in the second fp_round, that wouldn't occur in the
   9168     // single-step fp_round we want to fold to.
   9169     // In other words, double rounding isn't the same as rounding.
   9170     // Also, this is a value preserving truncation iff both fp_round's are.
   9171     if (DAG.getTarget().Options.UnsafeFPMath || N0IsTrunc) {
   9172       SDLoc DL(N);
   9173       return DAG.getNode(ISD::FP_ROUND, DL, VT, N0.getOperand(0),
   9174                          DAG.getIntPtrConstant(NIsTrunc && N0IsTrunc, DL));
   9175     }
   9176   }
   9177 
   9178   // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y)
   9179   if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) {
   9180     SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT,
   9181                               N0.getOperand(0), N1);
   9182     AddToWorklist(Tmp.getNode());
   9183     return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT,
   9184                        Tmp, N0.getOperand(1));
   9185   }
   9186 
   9187   return SDValue();
   9188 }
   9189 
   9190 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) {
   9191   SDValue N0 = N->getOperand(0);
   9192   EVT VT = N->getValueType(0);
   9193   EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT();
   9194   ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0);
   9195 
   9196   // fold (fp_round_inreg c1fp) -> c1fp
   9197   if (N0CFP && isTypeLegal(EVT)) {
   9198     SDLoc DL(N);
   9199     SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), DL, EVT);
   9200     return DAG.getNode(ISD::FP_EXTEND, DL, VT, Round);
   9201   }
   9202 
   9203   return SDValue();
   9204 }
   9205 
   9206 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) {
   9207   SDValue N0 = N->getOperand(0);
   9208   EVT VT = N->getValueType(0);
   9209 
   9210   // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded.
   9211   if (N->hasOneUse() &&
   9212       N->use_begin()->getOpcode() == ISD::FP_ROUND)
   9213     return SDValue();
   9214 
   9215   // fold (fp_extend c1fp) -> c1fp
   9216   if (isConstantFPBuildVectorOrConstantFP(N0))
   9217     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0);
   9218 
   9219   // fold (fp_extend (fp16_to_fp op)) -> (fp16_to_fp op)
   9220   if (N0.getOpcode() == ISD::FP16_TO_FP &&
   9221       TLI.getOperationAction(ISD::FP16_TO_FP, VT) == TargetLowering::Legal)
   9222     return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), VT, N0.getOperand(0));
   9223 
   9224   // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the
   9225   // value of X.
   9226   if (N0.getOpcode() == ISD::FP_ROUND
   9227       && N0.getNode()->getConstantOperandVal(1) == 1) {
   9228     SDValue In = N0.getOperand(0);
   9229     if (In.getValueType() == VT) return In;
   9230     if (VT.bitsLT(In.getValueType()))
   9231       return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT,
   9232                          In, N0.getOperand(1));
   9233     return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In);
   9234   }
   9235 
   9236   // fold (fpext (load x)) -> (fpext (fptrunc (extload x)))
   9237   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   9238        TLI.isLoadExtLegal(ISD::EXTLOAD, VT, N0.getValueType())) {
   9239     LoadSDNode *LN0 = cast<LoadSDNode>(N0);
   9240     SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT,
   9241                                      LN0->getChain(),
   9242                                      LN0->getBasePtr(), N0.getValueType(),
   9243                                      LN0->getMemOperand());
   9244     CombineTo(N, ExtLoad);
   9245     CombineTo(N0.getNode(),
   9246               DAG.getNode(ISD::FP_ROUND, SDLoc(N0),
   9247                           N0.getValueType(), ExtLoad,
   9248                           DAG.getIntPtrConstant(1, SDLoc(N0))),
   9249               ExtLoad.getValue(1));
   9250     return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9251   }
   9252 
   9253   return SDValue();
   9254 }
   9255 
   9256 SDValue DAGCombiner::visitFCEIL(SDNode *N) {
   9257   SDValue N0 = N->getOperand(0);
   9258   EVT VT = N->getValueType(0);
   9259 
   9260   // fold (fceil c1) -> fceil(c1)
   9261   if (isConstantFPBuildVectorOrConstantFP(N0))
   9262     return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0);
   9263 
   9264   return SDValue();
   9265 }
   9266 
   9267 SDValue DAGCombiner::visitFTRUNC(SDNode *N) {
   9268   SDValue N0 = N->getOperand(0);
   9269   EVT VT = N->getValueType(0);
   9270 
   9271   // fold (ftrunc c1) -> ftrunc(c1)
   9272   if (isConstantFPBuildVectorOrConstantFP(N0))
   9273     return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0);
   9274 
   9275   return SDValue();
   9276 }
   9277 
   9278 SDValue DAGCombiner::visitFFLOOR(SDNode *N) {
   9279   SDValue N0 = N->getOperand(0);
   9280   EVT VT = N->getValueType(0);
   9281 
   9282   // fold (ffloor c1) -> ffloor(c1)
   9283   if (isConstantFPBuildVectorOrConstantFP(N0))
   9284     return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0);
   9285 
   9286   return SDValue();
   9287 }
   9288 
   9289 // FIXME: FNEG and FABS have a lot in common; refactor.
   9290 SDValue DAGCombiner::visitFNEG(SDNode *N) {
   9291   SDValue N0 = N->getOperand(0);
   9292   EVT VT = N->getValueType(0);
   9293 
   9294   // Constant fold FNEG.
   9295   if (isConstantFPBuildVectorOrConstantFP(N0))
   9296     return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0);
   9297 
   9298   if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(),
   9299                          &DAG.getTarget().Options))
   9300     return GetNegatedExpression(N0, DAG, LegalOperations);
   9301 
   9302   // Transform fneg(bitconvert(x)) -> bitconvert(x ^ sign) to avoid loading
   9303   // constant pool values.
   9304   if (!TLI.isFNegFree(VT) &&
   9305       N0.getOpcode() == ISD::BITCAST &&
   9306       N0.getNode()->hasOneUse()) {
   9307     SDValue Int = N0.getOperand(0);
   9308     EVT IntVT = Int.getValueType();
   9309     if (IntVT.isInteger() && !IntVT.isVector()) {
   9310       APInt SignMask;
   9311       if (N0.getValueType().isVector()) {
   9312         // For a vector, get a mask such as 0x80... per scalar element
   9313         // and splat it.
   9314         SignMask = APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
   9315         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   9316       } else {
   9317         // For a scalar, just generate 0x80...
   9318         SignMask = APInt::getSignBit(IntVT.getSizeInBits());
   9319       }
   9320       SDLoc DL0(N0);
   9321       Int = DAG.getNode(ISD::XOR, DL0, IntVT, Int,
   9322                         DAG.getConstant(SignMask, DL0, IntVT));
   9323       AddToWorklist(Int.getNode());
   9324       return DAG.getBitcast(VT, Int);
   9325     }
   9326   }
   9327 
   9328   // (fneg (fmul c, x)) -> (fmul -c, x)
   9329   if (N0.getOpcode() == ISD::FMUL &&
   9330       (N0.getNode()->hasOneUse() || !TLI.isFNegFree(VT))) {
   9331     ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1));
   9332     if (CFP1) {
   9333       APFloat CVal = CFP1->getValueAPF();
   9334       CVal.changeSign();
   9335       if (Level >= AfterLegalizeDAG &&
   9336           (TLI.isFPImmLegal(CVal, VT) ||
   9337            TLI.isOperationLegal(ISD::ConstantFP, VT)))
   9338         return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0),
   9339                            DAG.getNode(ISD::FNEG, SDLoc(N), VT,
   9340                                        N0.getOperand(1)),
   9341                            &cast<BinaryWithFlagsSDNode>(N0)->Flags);
   9342     }
   9343   }
   9344 
   9345   return SDValue();
   9346 }
   9347 
   9348 SDValue DAGCombiner::visitFMINNUM(SDNode *N) {
   9349   SDValue N0 = N->getOperand(0);
   9350   SDValue N1 = N->getOperand(1);
   9351   EVT VT = N->getValueType(0);
   9352   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   9353   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   9354 
   9355   if (N0CFP && N1CFP) {
   9356     const APFloat &C0 = N0CFP->getValueAPF();
   9357     const APFloat &C1 = N1CFP->getValueAPF();
   9358     return DAG.getConstantFP(minnum(C0, C1), SDLoc(N), VT);
   9359   }
   9360 
   9361   // Canonicalize to constant on RHS.
   9362   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   9363      !isConstantFPBuildVectorOrConstantFP(N1))
   9364     return DAG.getNode(ISD::FMINNUM, SDLoc(N), VT, N1, N0);
   9365 
   9366   return SDValue();
   9367 }
   9368 
   9369 SDValue DAGCombiner::visitFMAXNUM(SDNode *N) {
   9370   SDValue N0 = N->getOperand(0);
   9371   SDValue N1 = N->getOperand(1);
   9372   EVT VT = N->getValueType(0);
   9373   const ConstantFPSDNode *N0CFP = isConstOrConstSplatFP(N0);
   9374   const ConstantFPSDNode *N1CFP = isConstOrConstSplatFP(N1);
   9375 
   9376   if (N0CFP && N1CFP) {
   9377     const APFloat &C0 = N0CFP->getValueAPF();
   9378     const APFloat &C1 = N1CFP->getValueAPF();
   9379     return DAG.getConstantFP(maxnum(C0, C1), SDLoc(N), VT);
   9380   }
   9381 
   9382   // Canonicalize to constant on RHS.
   9383   if (isConstantFPBuildVectorOrConstantFP(N0) &&
   9384      !isConstantFPBuildVectorOrConstantFP(N1))
   9385     return DAG.getNode(ISD::FMAXNUM, SDLoc(N), VT, N1, N0);
   9386 
   9387   return SDValue();
   9388 }
   9389 
   9390 SDValue DAGCombiner::visitFABS(SDNode *N) {
   9391   SDValue N0 = N->getOperand(0);
   9392   EVT VT = N->getValueType(0);
   9393 
   9394   // fold (fabs c1) -> fabs(c1)
   9395   if (isConstantFPBuildVectorOrConstantFP(N0))
   9396     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0);
   9397 
   9398   // fold (fabs (fabs x)) -> (fabs x)
   9399   if (N0.getOpcode() == ISD::FABS)
   9400     return N->getOperand(0);
   9401 
   9402   // fold (fabs (fneg x)) -> (fabs x)
   9403   // fold (fabs (fcopysign x, y)) -> (fabs x)
   9404   if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN)
   9405     return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0));
   9406 
   9407   // Transform fabs(bitconvert(x)) -> bitconvert(x & ~sign) to avoid loading
   9408   // constant pool values.
   9409   if (!TLI.isFAbsFree(VT) &&
   9410       N0.getOpcode() == ISD::BITCAST &&
   9411       N0.getNode()->hasOneUse()) {
   9412     SDValue Int = N0.getOperand(0);
   9413     EVT IntVT = Int.getValueType();
   9414     if (IntVT.isInteger() && !IntVT.isVector()) {
   9415       APInt SignMask;
   9416       if (N0.getValueType().isVector()) {
   9417         // For a vector, get a mask such as 0x7f... per scalar element
   9418         // and splat it.
   9419         SignMask = ~APInt::getSignBit(N0.getValueType().getScalarSizeInBits());
   9420         SignMask = APInt::getSplat(IntVT.getSizeInBits(), SignMask);
   9421       } else {
   9422         // For a scalar, just generate 0x7f...
   9423         SignMask = ~APInt::getSignBit(IntVT.getSizeInBits());
   9424       }
   9425       SDLoc DL(N0);
   9426       Int = DAG.getNode(ISD::AND, DL, IntVT, Int,
   9427                         DAG.getConstant(SignMask, DL, IntVT));
   9428       AddToWorklist(Int.getNode());
   9429       return DAG.getBitcast(N->getValueType(0), Int);
   9430     }
   9431   }
   9432 
   9433   return SDValue();
   9434 }
   9435 
   9436 SDValue DAGCombiner::visitBRCOND(SDNode *N) {
   9437   SDValue Chain = N->getOperand(0);
   9438   SDValue N1 = N->getOperand(1);
   9439   SDValue N2 = N->getOperand(2);
   9440 
   9441   // If N is a constant we could fold this into a fallthrough or unconditional
   9442   // branch. However that doesn't happen very often in normal code, because
   9443   // Instcombine/SimplifyCFG should have handled the available opportunities.
   9444   // If we did this folding here, it would be necessary to update the
   9445   // MachineBasicBlock CFG, which is awkward.
   9446 
   9447   // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
   9448   // on the target.
   9449   if (N1.getOpcode() == ISD::SETCC &&
   9450       TLI.isOperationLegalOrCustom(ISD::BR_CC,
   9451                                    N1.getOperand(0).getValueType())) {
   9452     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   9453                        Chain, N1.getOperand(2),
   9454                        N1.getOperand(0), N1.getOperand(1), N2);
   9455   }
   9456 
   9457   if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) ||
   9458       ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) &&
   9459        (N1.getOperand(0).hasOneUse() &&
   9460         N1.getOperand(0).getOpcode() == ISD::SRL))) {
   9461     SDNode *Trunc = nullptr;
   9462     if (N1.getOpcode() == ISD::TRUNCATE) {
   9463       // Look pass the truncate.
   9464       Trunc = N1.getNode();
   9465       N1 = N1.getOperand(0);
   9466     }
   9467 
   9468     // Match this pattern so that we can generate simpler code:
   9469     //
   9470     //   %a = ...
   9471     //   %b = and i32 %a, 2
   9472     //   %c = srl i32 %b, 1
   9473     //   brcond i32 %c ...
   9474     //
   9475     // into
   9476     //
   9477     //   %a = ...
   9478     //   %b = and i32 %a, 2
   9479     //   %c = setcc eq %b, 0
   9480     //   brcond %c ...
   9481     //
   9482     // This applies only when the AND constant value has one bit set and the
   9483     // SRL constant is equal to the log2 of the AND constant. The back-end is
   9484     // smart enough to convert the result into a TEST/JMP sequence.
   9485     SDValue Op0 = N1.getOperand(0);
   9486     SDValue Op1 = N1.getOperand(1);
   9487 
   9488     if (Op0.getOpcode() == ISD::AND &&
   9489         Op1.getOpcode() == ISD::Constant) {
   9490       SDValue AndOp1 = Op0.getOperand(1);
   9491 
   9492       if (AndOp1.getOpcode() == ISD::Constant) {
   9493         const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue();
   9494 
   9495         if (AndConst.isPowerOf2() &&
   9496             cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) {
   9497           SDLoc DL(N);
   9498           SDValue SetCC =
   9499             DAG.getSetCC(DL,
   9500                          getSetCCResultType(Op0.getValueType()),
   9501                          Op0, DAG.getConstant(0, DL, Op0.getValueType()),
   9502                          ISD::SETNE);
   9503 
   9504           SDValue NewBRCond = DAG.getNode(ISD::BRCOND, DL,
   9505                                           MVT::Other, Chain, SetCC, N2);
   9506           // Don't add the new BRCond into the worklist or else SimplifySelectCC
   9507           // will convert it back to (X & C1) >> C2.
   9508           CombineTo(N, NewBRCond, false);
   9509           // Truncate is dead.
   9510           if (Trunc)
   9511             deleteAndRecombine(Trunc);
   9512           // Replace the uses of SRL with SETCC
   9513           WorklistRemover DeadNodes(*this);
   9514           DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   9515           deleteAndRecombine(N1.getNode());
   9516           return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   9517         }
   9518       }
   9519     }
   9520 
   9521     if (Trunc)
   9522       // Restore N1 if the above transformation doesn't match.
   9523       N1 = N->getOperand(1);
   9524   }
   9525 
   9526   // Transform br(xor(x, y)) -> br(x != y)
   9527   // Transform br(xor(xor(x,y), 1)) -> br (x == y)
   9528   if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
   9529     SDNode *TheXor = N1.getNode();
   9530     SDValue Op0 = TheXor->getOperand(0);
   9531     SDValue Op1 = TheXor->getOperand(1);
   9532     if (Op0.getOpcode() == Op1.getOpcode()) {
   9533       // Avoid missing important xor optimizations.
   9534       if (SDValue Tmp = visitXOR(TheXor)) {
   9535         if (Tmp.getNode() != TheXor) {
   9536           DEBUG(dbgs() << "\nReplacing.8 ";
   9537                 TheXor->dump(&DAG);
   9538                 dbgs() << "\nWith: ";
   9539                 Tmp.getNode()->dump(&DAG);
   9540                 dbgs() << '\n');
   9541           WorklistRemover DeadNodes(*this);
   9542           DAG.ReplaceAllUsesOfValueWith(N1, Tmp);
   9543           deleteAndRecombine(TheXor);
   9544           return DAG.getNode(ISD::BRCOND, SDLoc(N),
   9545                              MVT::Other, Chain, Tmp, N2);
   9546         }
   9547 
   9548         // visitXOR has changed XOR's operands or replaced the XOR completely,
   9549         // bail out.
   9550         return SDValue(N, 0);
   9551       }
   9552     }
   9553 
   9554     if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
   9555       bool Equal = false;
   9556       if (isOneConstant(Op0) && Op0.hasOneUse() &&
   9557           Op0.getOpcode() == ISD::XOR) {
   9558         TheXor = Op0.getNode();
   9559         Equal = true;
   9560       }
   9561 
   9562       EVT SetCCVT = N1.getValueType();
   9563       if (LegalTypes)
   9564         SetCCVT = getSetCCResultType(SetCCVT);
   9565       SDValue SetCC = DAG.getSetCC(SDLoc(TheXor),
   9566                                    SetCCVT,
   9567                                    Op0, Op1,
   9568                                    Equal ? ISD::SETEQ : ISD::SETNE);
   9569       // Replace the uses of XOR with SETCC
   9570       WorklistRemover DeadNodes(*this);
   9571       DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
   9572       deleteAndRecombine(N1.getNode());
   9573       return DAG.getNode(ISD::BRCOND, SDLoc(N),
   9574                          MVT::Other, Chain, SetCC, N2);
   9575     }
   9576   }
   9577 
   9578   return SDValue();
   9579 }
   9580 
   9581 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB.
   9582 //
   9583 SDValue DAGCombiner::visitBR_CC(SDNode *N) {
   9584   CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
   9585   SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
   9586 
   9587   // If N is a constant we could fold this into a fallthrough or unconditional
   9588   // branch. However that doesn't happen very often in normal code, because
   9589   // Instcombine/SimplifyCFG should have handled the available opportunities.
   9590   // If we did this folding here, it would be necessary to update the
   9591   // MachineBasicBlock CFG, which is awkward.
   9592 
   9593   // Use SimplifySetCC to simplify SETCC's.
   9594   SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()),
   9595                                CondLHS, CondRHS, CC->get(), SDLoc(N),
   9596                                false);
   9597   if (Simp.getNode()) AddToWorklist(Simp.getNode());
   9598 
   9599   // fold to a simpler setcc
   9600   if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
   9601     return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other,
   9602                        N->getOperand(0), Simp.getOperand(2),
   9603                        Simp.getOperand(0), Simp.getOperand(1),
   9604                        N->getOperand(4));
   9605 
   9606   return SDValue();
   9607 }
   9608 
   9609 /// Return true if 'Use' is a load or a store that uses N as its base pointer
   9610 /// and that N may be folded in the load / store addressing mode.
   9611 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use,
   9612                                     SelectionDAG &DAG,
   9613                                     const TargetLowering &TLI) {
   9614   EVT VT;
   9615   unsigned AS;
   9616 
   9617   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(Use)) {
   9618     if (LD->isIndexed() || LD->getBasePtr().getNode() != N)
   9619       return false;
   9620     VT = LD->getMemoryVT();
   9621     AS = LD->getAddressSpace();
   9622   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(Use)) {
   9623     if (ST->isIndexed() || ST->getBasePtr().getNode() != N)
   9624       return false;
   9625     VT = ST->getMemoryVT();
   9626     AS = ST->getAddressSpace();
   9627   } else
   9628     return false;
   9629 
   9630   TargetLowering::AddrMode AM;
   9631   if (N->getOpcode() == ISD::ADD) {
   9632     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   9633     if (Offset)
   9634       // [reg +/- imm]
   9635       AM.BaseOffs = Offset->getSExtValue();
   9636     else
   9637       // [reg +/- reg]
   9638       AM.Scale = 1;
   9639   } else if (N->getOpcode() == ISD::SUB) {
   9640     ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1));
   9641     if (Offset)
   9642       // [reg +/- imm]
   9643       AM.BaseOffs = -Offset->getSExtValue();
   9644     else
   9645       // [reg +/- reg]
   9646       AM.Scale = 1;
   9647   } else
   9648     return false;
   9649 
   9650   return TLI.isLegalAddressingMode(DAG.getDataLayout(), AM,
   9651                                    VT.getTypeForEVT(*DAG.getContext()), AS);
   9652 }
   9653 
   9654 /// Try turning a load/store into a pre-indexed load/store when the base
   9655 /// pointer is an add or subtract and it has other uses besides the load/store.
   9656 /// After the transformation, the new indexed load/store has effectively folded
   9657 /// the add/subtract in and all of its other uses are redirected to the
   9658 /// new load/store.
   9659 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) {
   9660   if (Level < AfterLegalizeDAG)
   9661     return false;
   9662 
   9663   bool isLoad = true;
   9664   SDValue Ptr;
   9665   EVT VT;
   9666   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   9667     if (LD->isIndexed())
   9668       return false;
   9669     VT = LD->getMemoryVT();
   9670     if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) &&
   9671         !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT))
   9672       return false;
   9673     Ptr = LD->getBasePtr();
   9674   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   9675     if (ST->isIndexed())
   9676       return false;
   9677     VT = ST->getMemoryVT();
   9678     if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) &&
   9679         !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT))
   9680       return false;
   9681     Ptr = ST->getBasePtr();
   9682     isLoad = false;
   9683   } else {
   9684     return false;
   9685   }
   9686 
   9687   // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail
   9688   // out.  There is no reason to make this a preinc/predec.
   9689   if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) ||
   9690       Ptr.getNode()->hasOneUse())
   9691     return false;
   9692 
   9693   // Ask the target to do addressing mode selection.
   9694   SDValue BasePtr;
   9695   SDValue Offset;
   9696   ISD::MemIndexedMode AM = ISD::UNINDEXED;
   9697   if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG))
   9698     return false;
   9699 
   9700   // Backends without true r+i pre-indexed forms may need to pass a
   9701   // constant base with a variable offset so that constant coercion
   9702   // will work with the patterns in canonical form.
   9703   bool Swapped = false;
   9704   if (isa<ConstantSDNode>(BasePtr)) {
   9705     std::swap(BasePtr, Offset);
   9706     Swapped = true;
   9707   }
   9708 
   9709   // Don't create a indexed load / store with zero offset.
   9710   if (isNullConstant(Offset))
   9711     return false;
   9712 
   9713   // Try turning it into a pre-indexed load / store except when:
   9714   // 1) The new base ptr is a frame index.
   9715   // 2) If N is a store and the new base ptr is either the same as or is a
   9716   //    predecessor of the value being stored.
   9717   // 3) Another use of old base ptr is a predecessor of N. If ptr is folded
   9718   //    that would create a cycle.
   9719   // 4) All uses are load / store ops that use it as old base ptr.
   9720 
   9721   // Check #1.  Preinc'ing a frame index would require copying the stack pointer
   9722   // (plus the implicit offset) to a register to preinc anyway.
   9723   if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   9724     return false;
   9725 
   9726   // Check #2.
   9727   if (!isLoad) {
   9728     SDValue Val = cast<StoreSDNode>(N)->getValue();
   9729     if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode()))
   9730       return false;
   9731   }
   9732 
   9733   // Caches for hasPredecessorHelper.
   9734   SmallPtrSet<const SDNode *, 32> Visited;
   9735   SmallVector<const SDNode *, 16> Worklist;
   9736   Worklist.push_back(N);
   9737 
   9738   // If the offset is a constant, there may be other adds of constants that
   9739   // can be folded with this one. We should do this to avoid having to keep
   9740   // a copy of the original base pointer.
   9741   SmallVector<SDNode *, 16> OtherUses;
   9742   if (isa<ConstantSDNode>(Offset))
   9743     for (SDNode::use_iterator UI = BasePtr.getNode()->use_begin(),
   9744                               UE = BasePtr.getNode()->use_end();
   9745          UI != UE; ++UI) {
   9746       SDUse &Use = UI.getUse();
   9747       // Skip the use that is Ptr and uses of other results from BasePtr's
   9748       // node (important for nodes that return multiple results).
   9749       if (Use.getUser() == Ptr.getNode() || Use != BasePtr)
   9750         continue;
   9751 
   9752       if (SDNode::hasPredecessorHelper(Use.getUser(), Visited, Worklist))
   9753         continue;
   9754 
   9755       if (Use.getUser()->getOpcode() != ISD::ADD &&
   9756           Use.getUser()->getOpcode() != ISD::SUB) {
   9757         OtherUses.clear();
   9758         break;
   9759       }
   9760 
   9761       SDValue Op1 = Use.getUser()->getOperand((UI.getOperandNo() + 1) & 1);
   9762       if (!isa<ConstantSDNode>(Op1)) {
   9763         OtherUses.clear();
   9764         break;
   9765       }
   9766 
   9767       // FIXME: In some cases, we can be smarter about this.
   9768       if (Op1.getValueType() != Offset.getValueType()) {
   9769         OtherUses.clear();
   9770         break;
   9771       }
   9772 
   9773       OtherUses.push_back(Use.getUser());
   9774     }
   9775 
   9776   if (Swapped)
   9777     std::swap(BasePtr, Offset);
   9778 
   9779   // Now check for #3 and #4.
   9780   bool RealUse = false;
   9781 
   9782   for (SDNode *Use : Ptr.getNode()->uses()) {
   9783     if (Use == N)
   9784       continue;
   9785     if (SDNode::hasPredecessorHelper(Use, Visited, Worklist))
   9786       return false;
   9787 
   9788     // If Ptr may be folded in addressing mode of other use, then it's
   9789     // not profitable to do this transformation.
   9790     if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI))
   9791       RealUse = true;
   9792   }
   9793 
   9794   if (!RealUse)
   9795     return false;
   9796 
   9797   SDValue Result;
   9798   if (isLoad)
   9799     Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   9800                                 BasePtr, Offset, AM);
   9801   else
   9802     Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   9803                                  BasePtr, Offset, AM);
   9804   ++PreIndexedNodes;
   9805   ++NodesCombined;
   9806   DEBUG(dbgs() << "\nReplacing.4 ";
   9807         N->dump(&DAG);
   9808         dbgs() << "\nWith: ";
   9809         Result.getNode()->dump(&DAG);
   9810         dbgs() << '\n');
   9811   WorklistRemover DeadNodes(*this);
   9812   if (isLoad) {
   9813     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   9814     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   9815   } else {
   9816     DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   9817   }
   9818 
   9819   // Finally, since the node is now dead, remove it from the graph.
   9820   deleteAndRecombine(N);
   9821 
   9822   if (Swapped)
   9823     std::swap(BasePtr, Offset);
   9824 
   9825   // Replace other uses of BasePtr that can be updated to use Ptr
   9826   for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) {
   9827     unsigned OffsetIdx = 1;
   9828     if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode())
   9829       OffsetIdx = 0;
   9830     assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() ==
   9831            BasePtr.getNode() && "Expected BasePtr operand");
   9832 
   9833     // We need to replace ptr0 in the following expression:
   9834     //   x0 * offset0 + y0 * ptr0 = t0
   9835     // knowing that
   9836     //   x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store)
   9837     //
   9838     // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the
   9839     // indexed load/store and the expresion that needs to be re-written.
   9840     //
   9841     // Therefore, we have:
   9842     //   t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1
   9843 
   9844     ConstantSDNode *CN =
   9845       cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx));
   9846     int X0, X1, Y0, Y1;
   9847     const APInt &Offset0 = CN->getAPIntValue();
   9848     APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue();
   9849 
   9850     X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1;
   9851     Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1;
   9852     X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1;
   9853     Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1;
   9854 
   9855     unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD;
   9856 
   9857     APInt CNV = Offset0;
   9858     if (X0 < 0) CNV = -CNV;
   9859     if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1;
   9860     else CNV = CNV - Offset1;
   9861 
   9862     SDLoc DL(OtherUses[i]);
   9863 
   9864     // We can now generate the new expression.
   9865     SDValue NewOp1 = DAG.getConstant(CNV, DL, CN->getValueType(0));
   9866     SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0);
   9867 
   9868     SDValue NewUse = DAG.getNode(Opcode,
   9869                                  DL,
   9870                                  OtherUses[i]->getValueType(0), NewOp1, NewOp2);
   9871     DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse);
   9872     deleteAndRecombine(OtherUses[i]);
   9873   }
   9874 
   9875   // Replace the uses of Ptr with uses of the updated base value.
   9876   DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0));
   9877   deleteAndRecombine(Ptr.getNode());
   9878 
   9879   return true;
   9880 }
   9881 
   9882 /// Try to combine a load/store with a add/sub of the base pointer node into a
   9883 /// post-indexed load/store. The transformation folded the add/subtract into the
   9884 /// new indexed load/store effectively and all of its uses are redirected to the
   9885 /// new load/store.
   9886 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
   9887   if (Level < AfterLegalizeDAG)
   9888     return false;
   9889 
   9890   bool isLoad = true;
   9891   SDValue Ptr;
   9892   EVT VT;
   9893   if (LoadSDNode *LD  = dyn_cast<LoadSDNode>(N)) {
   9894     if (LD->isIndexed())
   9895       return false;
   9896     VT = LD->getMemoryVT();
   9897     if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) &&
   9898         !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT))
   9899       return false;
   9900     Ptr = LD->getBasePtr();
   9901   } else if (StoreSDNode *ST  = dyn_cast<StoreSDNode>(N)) {
   9902     if (ST->isIndexed())
   9903       return false;
   9904     VT = ST->getMemoryVT();
   9905     if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) &&
   9906         !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT))
   9907       return false;
   9908     Ptr = ST->getBasePtr();
   9909     isLoad = false;
   9910   } else {
   9911     return false;
   9912   }
   9913 
   9914   if (Ptr.getNode()->hasOneUse())
   9915     return false;
   9916 
   9917   for (SDNode *Op : Ptr.getNode()->uses()) {
   9918     if (Op == N ||
   9919         (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB))
   9920       continue;
   9921 
   9922     SDValue BasePtr;
   9923     SDValue Offset;
   9924     ISD::MemIndexedMode AM = ISD::UNINDEXED;
   9925     if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) {
   9926       // Don't create a indexed load / store with zero offset.
   9927       if (isNullConstant(Offset))
   9928         continue;
   9929 
   9930       // Try turning it into a post-indexed load / store except when
   9931       // 1) All uses are load / store ops that use it as base ptr (and
   9932       //    it may be folded as addressing mmode).
   9933       // 2) Op must be independent of N, i.e. Op is neither a predecessor
   9934       //    nor a successor of N. Otherwise, if Op is folded that would
   9935       //    create a cycle.
   9936 
   9937       if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr))
   9938         continue;
   9939 
   9940       // Check for #1.
   9941       bool TryNext = false;
   9942       for (SDNode *Use : BasePtr.getNode()->uses()) {
   9943         if (Use == Ptr.getNode())
   9944           continue;
   9945 
   9946         // If all the uses are load / store addresses, then don't do the
   9947         // transformation.
   9948         if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){
   9949           bool RealUse = false;
   9950           for (SDNode *UseUse : Use->uses()) {
   9951             if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI))
   9952               RealUse = true;
   9953           }
   9954 
   9955           if (!RealUse) {
   9956             TryNext = true;
   9957             break;
   9958           }
   9959         }
   9960       }
   9961 
   9962       if (TryNext)
   9963         continue;
   9964 
   9965       // Check for #2
   9966       if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) {
   9967         SDValue Result = isLoad
   9968           ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N),
   9969                                BasePtr, Offset, AM)
   9970           : DAG.getIndexedStore(SDValue(N,0), SDLoc(N),
   9971                                 BasePtr, Offset, AM);
   9972         ++PostIndexedNodes;
   9973         ++NodesCombined;
   9974         DEBUG(dbgs() << "\nReplacing.5 ";
   9975               N->dump(&DAG);
   9976               dbgs() << "\nWith: ";
   9977               Result.getNode()->dump(&DAG);
   9978               dbgs() << '\n');
   9979         WorklistRemover DeadNodes(*this);
   9980         if (isLoad) {
   9981           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0));
   9982           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2));
   9983         } else {
   9984           DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1));
   9985         }
   9986 
   9987         // Finally, since the node is now dead, remove it from the graph.
   9988         deleteAndRecombine(N);
   9989 
   9990         // Replace the uses of Use with uses of the updated base value.
   9991         DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0),
   9992                                       Result.getValue(isLoad ? 1 : 0));
   9993         deleteAndRecombine(Op);
   9994         return true;
   9995       }
   9996     }
   9997   }
   9998 
   9999   return false;
   10000 }
   10001 
   10002 /// \brief Return the base-pointer arithmetic from an indexed \p LD.
   10003 SDValue DAGCombiner::SplitIndexingFromLoad(LoadSDNode *LD) {
   10004   ISD::MemIndexedMode AM = LD->getAddressingMode();
   10005   assert(AM != ISD::UNINDEXED);
   10006   SDValue BP = LD->getOperand(1);
   10007   SDValue Inc = LD->getOperand(2);
   10008 
   10009   // Some backends use TargetConstants for load offsets, but don't expect
   10010   // TargetConstants in general ADD nodes. We can convert these constants into
   10011   // regular Constants (if the constant is not opaque).
   10012   assert((Inc.getOpcode() != ISD::TargetConstant ||
   10013           !cast<ConstantSDNode>(Inc)->isOpaque()) &&
   10014          "Cannot split out indexing using opaque target constants");
   10015   if (Inc.getOpcode() == ISD::TargetConstant) {
   10016     ConstantSDNode *ConstInc = cast<ConstantSDNode>(Inc);
   10017     Inc = DAG.getConstant(*ConstInc->getConstantIntValue(), SDLoc(Inc),
   10018                           ConstInc->getValueType(0));
   10019   }
   10020 
   10021   unsigned Opc =
   10022       (AM == ISD::PRE_INC || AM == ISD::POST_INC ? ISD::ADD : ISD::SUB);
   10023   return DAG.getNode(Opc, SDLoc(LD), BP.getSimpleValueType(), BP, Inc);
   10024 }
   10025 
   10026 SDValue DAGCombiner::visitLOAD(SDNode *N) {
   10027   LoadSDNode *LD  = cast<LoadSDNode>(N);
   10028   SDValue Chain = LD->getChain();
   10029   SDValue Ptr   = LD->getBasePtr();
   10030 
   10031   // If load is not volatile and there are no uses of the loaded value (and
   10032   // the updated indexed value in case of indexed loads), change uses of the
   10033   // chain value into uses of the chain input (i.e. delete the dead load).
   10034   if (!LD->isVolatile()) {
   10035     if (N->getValueType(1) == MVT::Other) {
   10036       // Unindexed loads.
   10037       if (!N->hasAnyUseOfValue(0)) {
   10038         // It's not safe to use the two value CombineTo variant here. e.g.
   10039         // v1, chain2 = load chain1, loc
   10040         // v2, chain3 = load chain2, loc
   10041         // v3         = add v2, c
   10042         // Now we replace use of chain2 with chain1.  This makes the second load
   10043         // isomorphic to the one we are deleting, and thus makes this load live.
   10044         DEBUG(dbgs() << "\nReplacing.6 ";
   10045               N->dump(&DAG);
   10046               dbgs() << "\nWith chain: ";
   10047               Chain.getNode()->dump(&DAG);
   10048               dbgs() << "\n");
   10049         WorklistRemover DeadNodes(*this);
   10050         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   10051 
   10052         if (N->use_empty())
   10053           deleteAndRecombine(N);
   10054 
   10055         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   10056       }
   10057     } else {
   10058       // Indexed loads.
   10059       assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
   10060 
   10061       // If this load has an opaque TargetConstant offset, then we cannot split
   10062       // the indexing into an add/sub directly (that TargetConstant may not be
   10063       // valid for a different type of node, and we cannot convert an opaque
   10064       // target constant into a regular constant).
   10065       bool HasOTCInc = LD->getOperand(2).getOpcode() == ISD::TargetConstant &&
   10066                        cast<ConstantSDNode>(LD->getOperand(2))->isOpaque();
   10067 
   10068       if (!N->hasAnyUseOfValue(0) &&
   10069           ((MaySplitLoadIndex && !HasOTCInc) || !N->hasAnyUseOfValue(1))) {
   10070         SDValue Undef = DAG.getUNDEF(N->getValueType(0));
   10071         SDValue Index;
   10072         if (N->hasAnyUseOfValue(1) && MaySplitLoadIndex && !HasOTCInc) {
   10073           Index = SplitIndexingFromLoad(LD);
   10074           // Try to fold the base pointer arithmetic into subsequent loads and
   10075           // stores.
   10076           AddUsersToWorklist(N);
   10077         } else
   10078           Index = DAG.getUNDEF(N->getValueType(1));
   10079         DEBUG(dbgs() << "\nReplacing.7 ";
   10080               N->dump(&DAG);
   10081               dbgs() << "\nWith: ";
   10082               Undef.getNode()->dump(&DAG);
   10083               dbgs() << " and 2 other values\n");
   10084         WorklistRemover DeadNodes(*this);
   10085         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef);
   10086         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Index);
   10087         DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain);
   10088         deleteAndRecombine(N);
   10089         return SDValue(N, 0);   // Return N so it doesn't get rechecked!
   10090       }
   10091     }
   10092   }
   10093 
   10094   // If this load is directly stored, replace the load value with the stored
   10095   // value.
   10096   // TODO: Handle store large -> read small portion.
   10097   // TODO: Handle TRUNCSTORE/LOADEXT
   10098   if (ISD::isNormalLoad(N) && !LD->isVolatile()) {
   10099     if (ISD::isNON_TRUNCStore(Chain.getNode())) {
   10100       StoreSDNode *PrevST = cast<StoreSDNode>(Chain);
   10101       if (PrevST->getBasePtr() == Ptr &&
   10102           PrevST->getValue().getValueType() == N->getValueType(0))
   10103       return CombineTo(N, Chain.getOperand(1), Chain);
   10104     }
   10105   }
   10106 
   10107   // Try to infer better alignment information than the load already has.
   10108   if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
   10109     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   10110       if (Align > LD->getMemOperand()->getBaseAlignment()) {
   10111         SDValue NewLoad =
   10112                DAG.getExtLoad(LD->getExtensionType(), SDLoc(N),
   10113                               LD->getValueType(0),
   10114                               Chain, Ptr, LD->getPointerInfo(),
   10115                               LD->getMemoryVT(),
   10116                               LD->isVolatile(), LD->isNonTemporal(),
   10117                               LD->isInvariant(), Align, LD->getAAInfo());
   10118         if (NewLoad.getNode() != N)
   10119           return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true);
   10120       }
   10121     }
   10122   }
   10123 
   10124   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   10125                                                   : DAG.getSubtarget().useAA();
   10126 #ifndef NDEBUG
   10127   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   10128       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   10129     UseAA = false;
   10130 #endif
   10131   if (UseAA && LD->isUnindexed()) {
   10132     // Walk up chain skipping non-aliasing memory nodes.
   10133     SDValue BetterChain = FindBetterChain(N, Chain);
   10134 
   10135     // If there is a better chain.
   10136     if (Chain != BetterChain) {
   10137       SDValue ReplLoad;
   10138 
   10139       // Replace the chain to void dependency.
   10140       if (LD->getExtensionType() == ISD::NON_EXTLOAD) {
   10141         ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD),
   10142                                BetterChain, Ptr, LD->getMemOperand());
   10143       } else {
   10144         ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD),
   10145                                   LD->getValueType(0),
   10146                                   BetterChain, Ptr, LD->getMemoryVT(),
   10147                                   LD->getMemOperand());
   10148       }
   10149 
   10150       // Create token factor to keep old chain connected.
   10151       SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N),
   10152                                   MVT::Other, Chain, ReplLoad.getValue(1));
   10153 
   10154       // Make sure the new and old chains are cleaned up.
   10155       AddToWorklist(Token.getNode());
   10156 
   10157       // Replace uses with load result and token factor. Don't add users
   10158       // to work list.
   10159       return CombineTo(N, ReplLoad.getValue(0), Token, false);
   10160     }
   10161   }
   10162 
   10163   // Try transforming N to an indexed load.
   10164   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   10165     return SDValue(N, 0);
   10166 
   10167   // Try to slice up N to more direct loads if the slices are mapped to
   10168   // different register banks or pairing can take place.
   10169   if (SliceUpLoad(N))
   10170     return SDValue(N, 0);
   10171 
   10172   return SDValue();
   10173 }
   10174 
   10175 namespace {
   10176 /// \brief Helper structure used to slice a load in smaller loads.
   10177 /// Basically a slice is obtained from the following sequence:
   10178 /// Origin = load Ty1, Base
   10179 /// Shift = srl Ty1 Origin, CstTy Amount
   10180 /// Inst = trunc Shift to Ty2
   10181 ///
   10182 /// Then, it will be rewriten into:
   10183 /// Slice = load SliceTy, Base + SliceOffset
   10184 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2
   10185 ///
   10186 /// SliceTy is deduced from the number of bits that are actually used to
   10187 /// build Inst.
   10188 struct LoadedSlice {
   10189   /// \brief Helper structure used to compute the cost of a slice.
   10190   struct Cost {
   10191     /// Are we optimizing for code size.
   10192     bool ForCodeSize;
   10193     /// Various cost.
   10194     unsigned Loads;
   10195     unsigned Truncates;
   10196     unsigned CrossRegisterBanksCopies;
   10197     unsigned ZExts;
   10198     unsigned Shift;
   10199 
   10200     Cost(bool ForCodeSize = false)
   10201         : ForCodeSize(ForCodeSize), Loads(0), Truncates(0),
   10202           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {}
   10203 
   10204     /// \brief Get the cost of one isolated slice.
   10205     Cost(const LoadedSlice &LS, bool ForCodeSize = false)
   10206         : ForCodeSize(ForCodeSize), Loads(1), Truncates(0),
   10207           CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {
   10208       EVT TruncType = LS.Inst->getValueType(0);
   10209       EVT LoadedType = LS.getLoadedType();
   10210       if (TruncType != LoadedType &&
   10211           !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType))
   10212         ZExts = 1;
   10213     }
   10214 
   10215     /// \brief Account for slicing gain in the current cost.
   10216     /// Slicing provide a few gains like removing a shift or a
   10217     /// truncate. This method allows to grow the cost of the original
   10218     /// load with the gain from this slice.
   10219     void addSliceGain(const LoadedSlice &LS) {
   10220       // Each slice saves a truncate.
   10221       const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo();
   10222       if (!TLI.isTruncateFree(LS.Inst->getOperand(0).getValueType(),
   10223                               LS.Inst->getValueType(0)))
   10224         ++Truncates;
   10225       // If there is a shift amount, this slice gets rid of it.
   10226       if (LS.Shift)
   10227         ++Shift;
   10228       // If this slice can merge a cross register bank copy, account for it.
   10229       if (LS.canMergeExpensiveCrossRegisterBankCopy())
   10230         ++CrossRegisterBanksCopies;
   10231     }
   10232 
   10233     Cost &operator+=(const Cost &RHS) {
   10234       Loads += RHS.Loads;
   10235       Truncates += RHS.Truncates;
   10236       CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies;
   10237       ZExts += RHS.ZExts;
   10238       Shift += RHS.Shift;
   10239       return *this;
   10240     }
   10241 
   10242     bool operator==(const Cost &RHS) const {
   10243       return Loads == RHS.Loads && Truncates == RHS.Truncates &&
   10244              CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies &&
   10245              ZExts == RHS.ZExts && Shift == RHS.Shift;
   10246     }
   10247 
   10248     bool operator!=(const Cost &RHS) const { return !(*this == RHS); }
   10249 
   10250     bool operator<(const Cost &RHS) const {
   10251       // Assume cross register banks copies are as expensive as loads.
   10252       // FIXME: Do we want some more target hooks?
   10253       unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies;
   10254       unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies;
   10255       // Unless we are optimizing for code size, consider the
   10256       // expensive operation first.
   10257       if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS)
   10258         return ExpensiveOpsLHS < ExpensiveOpsRHS;
   10259       return (Truncates + ZExts + Shift + ExpensiveOpsLHS) <
   10260              (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS);
   10261     }
   10262 
   10263     bool operator>(const Cost &RHS) const { return RHS < *this; }
   10264 
   10265     bool operator<=(const Cost &RHS) const { return !(RHS < *this); }
   10266 
   10267     bool operator>=(const Cost &RHS) const { return !(*this < RHS); }
   10268   };
   10269   // The last instruction that represent the slice. This should be a
   10270   // truncate instruction.
   10271   SDNode *Inst;
   10272   // The original load instruction.
   10273   LoadSDNode *Origin;
   10274   // The right shift amount in bits from the original load.
   10275   unsigned Shift;
   10276   // The DAG from which Origin came from.
   10277   // This is used to get some contextual information about legal types, etc.
   10278   SelectionDAG *DAG;
   10279 
   10280   LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr,
   10281               unsigned Shift = 0, SelectionDAG *DAG = nullptr)
   10282       : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {}
   10283 
   10284   /// \brief Get the bits used in a chunk of bits \p BitWidth large.
   10285   /// \return Result is \p BitWidth and has used bits set to 1 and
   10286   ///         not used bits set to 0.
   10287   APInt getUsedBits() const {
   10288     // Reproduce the trunc(lshr) sequence:
   10289     // - Start from the truncated value.
   10290     // - Zero extend to the desired bit width.
   10291     // - Shift left.
   10292     assert(Origin && "No original load to compare against.");
   10293     unsigned BitWidth = Origin->getValueSizeInBits(0);
   10294     assert(Inst && "This slice is not bound to an instruction");
   10295     assert(Inst->getValueSizeInBits(0) <= BitWidth &&
   10296            "Extracted slice is bigger than the whole type!");
   10297     APInt UsedBits(Inst->getValueSizeInBits(0), 0);
   10298     UsedBits.setAllBits();
   10299     UsedBits = UsedBits.zext(BitWidth);
   10300     UsedBits <<= Shift;
   10301     return UsedBits;
   10302   }
   10303 
   10304   /// \brief Get the size of the slice to be loaded in bytes.
   10305   unsigned getLoadedSize() const {
   10306     unsigned SliceSize = getUsedBits().countPopulation();
   10307     assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte.");
   10308     return SliceSize / 8;
   10309   }
   10310 
   10311   /// \brief Get the type that will be loaded for this slice.
   10312   /// Note: This may not be the final type for the slice.
   10313   EVT getLoadedType() const {
   10314     assert(DAG && "Missing context");
   10315     LLVMContext &Ctxt = *DAG->getContext();
   10316     return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8);
   10317   }
   10318 
   10319   /// \brief Get the alignment of the load used for this slice.
   10320   unsigned getAlignment() const {
   10321     unsigned Alignment = Origin->getAlignment();
   10322     unsigned Offset = getOffsetFromBase();
   10323     if (Offset != 0)
   10324       Alignment = MinAlign(Alignment, Alignment + Offset);
   10325     return Alignment;
   10326   }
   10327 
   10328   /// \brief Check if this slice can be rewritten with legal operations.
   10329   bool isLegal() const {
   10330     // An invalid slice is not legal.
   10331     if (!Origin || !Inst || !DAG)
   10332       return false;
   10333 
   10334     // Offsets are for indexed load only, we do not handle that.
   10335     if (!Origin->getOffset().isUndef())
   10336       return false;
   10337 
   10338     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   10339 
   10340     // Check that the type is legal.
   10341     EVT SliceType = getLoadedType();
   10342     if (!TLI.isTypeLegal(SliceType))
   10343       return false;
   10344 
   10345     // Check that the load is legal for this type.
   10346     if (!TLI.isOperationLegal(ISD::LOAD, SliceType))
   10347       return false;
   10348 
   10349     // Check that the offset can be computed.
   10350     // 1. Check its type.
   10351     EVT PtrType = Origin->getBasePtr().getValueType();
   10352     if (PtrType == MVT::Untyped || PtrType.isExtended())
   10353       return false;
   10354 
   10355     // 2. Check that it fits in the immediate.
   10356     if (!TLI.isLegalAddImmediate(getOffsetFromBase()))
   10357       return false;
   10358 
   10359     // 3. Check that the computation is legal.
   10360     if (!TLI.isOperationLegal(ISD::ADD, PtrType))
   10361       return false;
   10362 
   10363     // Check that the zext is legal if it needs one.
   10364     EVT TruncateType = Inst->getValueType(0);
   10365     if (TruncateType != SliceType &&
   10366         !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType))
   10367       return false;
   10368 
   10369     return true;
   10370   }
   10371 
   10372   /// \brief Get the offset in bytes of this slice in the original chunk of
   10373   /// bits.
   10374   /// \pre DAG != nullptr.
   10375   uint64_t getOffsetFromBase() const {
   10376     assert(DAG && "Missing context.");
   10377     bool IsBigEndian = DAG->getDataLayout().isBigEndian();
   10378     assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported.");
   10379     uint64_t Offset = Shift / 8;
   10380     unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8;
   10381     assert(!(Origin->getValueSizeInBits(0) & 0x7) &&
   10382            "The size of the original loaded type is not a multiple of a"
   10383            " byte.");
   10384     // If Offset is bigger than TySizeInBytes, it means we are loading all
   10385     // zeros. This should have been optimized before in the process.
   10386     assert(TySizeInBytes > Offset &&
   10387            "Invalid shift amount for given loaded size");
   10388     if (IsBigEndian)
   10389       Offset = TySizeInBytes - Offset - getLoadedSize();
   10390     return Offset;
   10391   }
   10392 
   10393   /// \brief Generate the sequence of instructions to load the slice
   10394   /// represented by this object and redirect the uses of this slice to
   10395   /// this new sequence of instructions.
   10396   /// \pre this->Inst && this->Origin are valid Instructions and this
   10397   /// object passed the legal check: LoadedSlice::isLegal returned true.
   10398   /// \return The last instruction of the sequence used to load the slice.
   10399   SDValue loadSlice() const {
   10400     assert(Inst && Origin && "Unable to replace a non-existing slice.");
   10401     const SDValue &OldBaseAddr = Origin->getBasePtr();
   10402     SDValue BaseAddr = OldBaseAddr;
   10403     // Get the offset in that chunk of bytes w.r.t. the endianess.
   10404     int64_t Offset = static_cast<int64_t>(getOffsetFromBase());
   10405     assert(Offset >= 0 && "Offset too big to fit in int64_t!");
   10406     if (Offset) {
   10407       // BaseAddr = BaseAddr + Offset.
   10408       EVT ArithType = BaseAddr.getValueType();
   10409       SDLoc DL(Origin);
   10410       BaseAddr = DAG->getNode(ISD::ADD, DL, ArithType, BaseAddr,
   10411                               DAG->getConstant(Offset, DL, ArithType));
   10412     }
   10413 
   10414     // Create the type of the loaded slice according to its size.
   10415     EVT SliceType = getLoadedType();
   10416 
   10417     // Create the load for the slice.
   10418     SDValue LastInst = DAG->getLoad(
   10419         SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr,
   10420         Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(),
   10421         Origin->isNonTemporal(), Origin->isInvariant(), getAlignment());
   10422     // If the final type is not the same as the loaded type, this means that
   10423     // we have to pad with zero. Create a zero extend for that.
   10424     EVT FinalType = Inst->getValueType(0);
   10425     if (SliceType != FinalType)
   10426       LastInst =
   10427           DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst);
   10428     return LastInst;
   10429   }
   10430 
   10431   /// \brief Check if this slice can be merged with an expensive cross register
   10432   /// bank copy. E.g.,
   10433   /// i = load i32
   10434   /// f = bitcast i32 i to float
   10435   bool canMergeExpensiveCrossRegisterBankCopy() const {
   10436     if (!Inst || !Inst->hasOneUse())
   10437       return false;
   10438     SDNode *Use = *Inst->use_begin();
   10439     if (Use->getOpcode() != ISD::BITCAST)
   10440       return false;
   10441     assert(DAG && "Missing context");
   10442     const TargetLowering &TLI = DAG->getTargetLoweringInfo();
   10443     EVT ResVT = Use->getValueType(0);
   10444     const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT());
   10445     const TargetRegisterClass *ArgRC =
   10446         TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT());
   10447     if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT))
   10448       return false;
   10449 
   10450     // At this point, we know that we perform a cross-register-bank copy.
   10451     // Check if it is expensive.
   10452     const TargetRegisterInfo *TRI = DAG->getSubtarget().getRegisterInfo();
   10453     // Assume bitcasts are cheap, unless both register classes do not
   10454     // explicitly share a common sub class.
   10455     if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC))
   10456       return false;
   10457 
   10458     // Check if it will be merged with the load.
   10459     // 1. Check the alignment constraint.
   10460     unsigned RequiredAlignment = DAG->getDataLayout().getABITypeAlignment(
   10461         ResVT.getTypeForEVT(*DAG->getContext()));
   10462 
   10463     if (RequiredAlignment > getAlignment())
   10464       return false;
   10465 
   10466     // 2. Check that the load is a legal operation for that type.
   10467     if (!TLI.isOperationLegal(ISD::LOAD, ResVT))
   10468       return false;
   10469 
   10470     // 3. Check that we do not have a zext in the way.
   10471     if (Inst->getValueType(0) != getLoadedType())
   10472       return false;
   10473 
   10474     return true;
   10475   }
   10476 };
   10477 }
   10478 
   10479 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e.,
   10480 /// \p UsedBits looks like 0..0 1..1 0..0.
   10481 static bool areUsedBitsDense(const APInt &UsedBits) {
   10482   // If all the bits are one, this is dense!
   10483   if (UsedBits.isAllOnesValue())
   10484     return true;
   10485 
   10486   // Get rid of the unused bits on the right.
   10487   APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros());
   10488   // Get rid of the unused bits on the left.
   10489   if (NarrowedUsedBits.countLeadingZeros())
   10490     NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits());
   10491   // Check that the chunk of bits is completely used.
   10492   return NarrowedUsedBits.isAllOnesValue();
   10493 }
   10494 
   10495 /// \brief Check whether or not \p First and \p Second are next to each other
   10496 /// in memory. This means that there is no hole between the bits loaded
   10497 /// by \p First and the bits loaded by \p Second.
   10498 static bool areSlicesNextToEachOther(const LoadedSlice &First,
   10499                                      const LoadedSlice &Second) {
   10500   assert(First.Origin == Second.Origin && First.Origin &&
   10501          "Unable to match different memory origins.");
   10502   APInt UsedBits = First.getUsedBits();
   10503   assert((UsedBits & Second.getUsedBits()) == 0 &&
   10504          "Slices are not supposed to overlap.");
   10505   UsedBits |= Second.getUsedBits();
   10506   return areUsedBitsDense(UsedBits);
   10507 }
   10508 
   10509 /// \brief Adjust the \p GlobalLSCost according to the target
   10510 /// paring capabilities and the layout of the slices.
   10511 /// \pre \p GlobalLSCost should account for at least as many loads as
   10512 /// there is in the slices in \p LoadedSlices.
   10513 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   10514                                  LoadedSlice::Cost &GlobalLSCost) {
   10515   unsigned NumberOfSlices = LoadedSlices.size();
   10516   // If there is less than 2 elements, no pairing is possible.
   10517   if (NumberOfSlices < 2)
   10518     return;
   10519 
   10520   // Sort the slices so that elements that are likely to be next to each
   10521   // other in memory are next to each other in the list.
   10522   std::sort(LoadedSlices.begin(), LoadedSlices.end(),
   10523             [](const LoadedSlice &LHS, const LoadedSlice &RHS) {
   10524     assert(LHS.Origin == RHS.Origin && "Different bases not implemented.");
   10525     return LHS.getOffsetFromBase() < RHS.getOffsetFromBase();
   10526   });
   10527   const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo();
   10528   // First (resp. Second) is the first (resp. Second) potentially candidate
   10529   // to be placed in a paired load.
   10530   const LoadedSlice *First = nullptr;
   10531   const LoadedSlice *Second = nullptr;
   10532   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice,
   10533                 // Set the beginning of the pair.
   10534                                                            First = Second) {
   10535 
   10536     Second = &LoadedSlices[CurrSlice];
   10537 
   10538     // If First is NULL, it means we start a new pair.
   10539     // Get to the next slice.
   10540     if (!First)
   10541       continue;
   10542 
   10543     EVT LoadedType = First->getLoadedType();
   10544 
   10545     // If the types of the slices are different, we cannot pair them.
   10546     if (LoadedType != Second->getLoadedType())
   10547       continue;
   10548 
   10549     // Check if the target supplies paired loads for this type.
   10550     unsigned RequiredAlignment = 0;
   10551     if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) {
   10552       // move to the next pair, this type is hopeless.
   10553       Second = nullptr;
   10554       continue;
   10555     }
   10556     // Check if we meet the alignment requirement.
   10557     if (RequiredAlignment > First->getAlignment())
   10558       continue;
   10559 
   10560     // Check that both loads are next to each other in memory.
   10561     if (!areSlicesNextToEachOther(*First, *Second))
   10562       continue;
   10563 
   10564     assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!");
   10565     --GlobalLSCost.Loads;
   10566     // Move to the next pair.
   10567     Second = nullptr;
   10568   }
   10569 }
   10570 
   10571 /// \brief Check the profitability of all involved LoadedSlice.
   10572 /// Currently, it is considered profitable if there is exactly two
   10573 /// involved slices (1) which are (2) next to each other in memory, and
   10574 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3).
   10575 ///
   10576 /// Note: The order of the elements in \p LoadedSlices may be modified, but not
   10577 /// the elements themselves.
   10578 ///
   10579 /// FIXME: When the cost model will be mature enough, we can relax
   10580 /// constraints (1) and (2).
   10581 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices,
   10582                                 const APInt &UsedBits, bool ForCodeSize) {
   10583   unsigned NumberOfSlices = LoadedSlices.size();
   10584   if (StressLoadSlicing)
   10585     return NumberOfSlices > 1;
   10586 
   10587   // Check (1).
   10588   if (NumberOfSlices != 2)
   10589     return false;
   10590 
   10591   // Check (2).
   10592   if (!areUsedBitsDense(UsedBits))
   10593     return false;
   10594 
   10595   // Check (3).
   10596   LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize);
   10597   // The original code has one big load.
   10598   OrigCost.Loads = 1;
   10599   for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) {
   10600     const LoadedSlice &LS = LoadedSlices[CurrSlice];
   10601     // Accumulate the cost of all the slices.
   10602     LoadedSlice::Cost SliceCost(LS, ForCodeSize);
   10603     GlobalSlicingCost += SliceCost;
   10604 
   10605     // Account as cost in the original configuration the gain obtained
   10606     // with the current slices.
   10607     OrigCost.addSliceGain(LS);
   10608   }
   10609 
   10610   // If the target supports paired load, adjust the cost accordingly.
   10611   adjustCostForPairing(LoadedSlices, GlobalSlicingCost);
   10612   return OrigCost > GlobalSlicingCost;
   10613 }
   10614 
   10615 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr)
   10616 /// operations, split it in the various pieces being extracted.
   10617 ///
   10618 /// This sort of thing is introduced by SROA.
   10619 /// This slicing takes care not to insert overlapping loads.
   10620 /// \pre LI is a simple load (i.e., not an atomic or volatile load).
   10621 bool DAGCombiner::SliceUpLoad(SDNode *N) {
   10622   if (Level < AfterLegalizeDAG)
   10623     return false;
   10624 
   10625   LoadSDNode *LD = cast<LoadSDNode>(N);
   10626   if (LD->isVolatile() || !ISD::isNormalLoad(LD) ||
   10627       !LD->getValueType(0).isInteger())
   10628     return false;
   10629 
   10630   // Keep track of already used bits to detect overlapping values.
   10631   // In that case, we will just abort the transformation.
   10632   APInt UsedBits(LD->getValueSizeInBits(0), 0);
   10633 
   10634   SmallVector<LoadedSlice, 4> LoadedSlices;
   10635 
   10636   // Check if this load is used as several smaller chunks of bits.
   10637   // Basically, look for uses in trunc or trunc(lshr) and record a new chain
   10638   // of computation for each trunc.
   10639   for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end();
   10640        UI != UIEnd; ++UI) {
   10641     // Skip the uses of the chain.
   10642     if (UI.getUse().getResNo() != 0)
   10643       continue;
   10644 
   10645     SDNode *User = *UI;
   10646     unsigned Shift = 0;
   10647 
   10648     // Check if this is a trunc(lshr).
   10649     if (User->getOpcode() == ISD::SRL && User->hasOneUse() &&
   10650         isa<ConstantSDNode>(User->getOperand(1))) {
   10651       Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue();
   10652       User = *User->use_begin();
   10653     }
   10654 
   10655     // At this point, User is a Truncate, iff we encountered, trunc or
   10656     // trunc(lshr).
   10657     if (User->getOpcode() != ISD::TRUNCATE)
   10658       return false;
   10659 
   10660     // The width of the type must be a power of 2 and greater than 8-bits.
   10661     // Otherwise the load cannot be represented in LLVM IR.
   10662     // Moreover, if we shifted with a non-8-bits multiple, the slice
   10663     // will be across several bytes. We do not support that.
   10664     unsigned Width = User->getValueSizeInBits(0);
   10665     if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7))
   10666       return 0;
   10667 
   10668     // Build the slice for this chain of computations.
   10669     LoadedSlice LS(User, LD, Shift, &DAG);
   10670     APInt CurrentUsedBits = LS.getUsedBits();
   10671 
   10672     // Check if this slice overlaps with another.
   10673     if ((CurrentUsedBits & UsedBits) != 0)
   10674       return false;
   10675     // Update the bits used globally.
   10676     UsedBits |= CurrentUsedBits;
   10677 
   10678     // Check if the new slice would be legal.
   10679     if (!LS.isLegal())
   10680       return false;
   10681 
   10682     // Record the slice.
   10683     LoadedSlices.push_back(LS);
   10684   }
   10685 
   10686   // Abort slicing if it does not seem to be profitable.
   10687   if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize))
   10688     return false;
   10689 
   10690   ++SlicedLoads;
   10691 
   10692   // Rewrite each chain to use an independent load.
   10693   // By construction, each chain can be represented by a unique load.
   10694 
   10695   // Prepare the argument for the new token factor for all the slices.
   10696   SmallVector<SDValue, 8> ArgChains;
   10697   for (SmallVectorImpl<LoadedSlice>::const_iterator
   10698            LSIt = LoadedSlices.begin(),
   10699            LSItEnd = LoadedSlices.end();
   10700        LSIt != LSItEnd; ++LSIt) {
   10701     SDValue SliceInst = LSIt->loadSlice();
   10702     CombineTo(LSIt->Inst, SliceInst, true);
   10703     if (SliceInst.getNode()->getOpcode() != ISD::LOAD)
   10704       SliceInst = SliceInst.getOperand(0);
   10705     assert(SliceInst->getOpcode() == ISD::LOAD &&
   10706            "It takes more than a zext to get to the loaded slice!!");
   10707     ArgChains.push_back(SliceInst.getValue(1));
   10708   }
   10709 
   10710   SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other,
   10711                               ArgChains);
   10712   DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain);
   10713   return true;
   10714 }
   10715 
   10716 /// Check to see if V is (and load (ptr), imm), where the load is having
   10717 /// specific bytes cleared out.  If so, return the byte size being masked out
   10718 /// and the shift amount.
   10719 static std::pair<unsigned, unsigned>
   10720 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) {
   10721   std::pair<unsigned, unsigned> Result(0, 0);
   10722 
   10723   // Check for the structure we're looking for.
   10724   if (V->getOpcode() != ISD::AND ||
   10725       !isa<ConstantSDNode>(V->getOperand(1)) ||
   10726       !ISD::isNormalLoad(V->getOperand(0).getNode()))
   10727     return Result;
   10728 
   10729   // Check the chain and pointer.
   10730   LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0));
   10731   if (LD->getBasePtr() != Ptr) return Result;  // Not from same pointer.
   10732 
   10733   // The store should be chained directly to the load or be an operand of a
   10734   // tokenfactor.
   10735   if (LD == Chain.getNode())
   10736     ; // ok.
   10737   else if (Chain->getOpcode() != ISD::TokenFactor)
   10738     return Result; // Fail.
   10739   else {
   10740     bool isOk = false;
   10741     for (const SDValue &ChainOp : Chain->op_values())
   10742       if (ChainOp.getNode() == LD) {
   10743         isOk = true;
   10744         break;
   10745       }
   10746     if (!isOk) return Result;
   10747   }
   10748 
   10749   // This only handles simple types.
   10750   if (V.getValueType() != MVT::i16 &&
   10751       V.getValueType() != MVT::i32 &&
   10752       V.getValueType() != MVT::i64)
   10753     return Result;
   10754 
   10755   // Check the constant mask.  Invert it so that the bits being masked out are
   10756   // 0 and the bits being kept are 1.  Use getSExtValue so that leading bits
   10757   // follow the sign bit for uniformity.
   10758   uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue();
   10759   unsigned NotMaskLZ = countLeadingZeros(NotMask);
   10760   if (NotMaskLZ & 7) return Result;  // Must be multiple of a byte.
   10761   unsigned NotMaskTZ = countTrailingZeros(NotMask);
   10762   if (NotMaskTZ & 7) return Result;  // Must be multiple of a byte.
   10763   if (NotMaskLZ == 64) return Result;  // All zero mask.
   10764 
   10765   // See if we have a continuous run of bits.  If so, we have 0*1+0*
   10766   if (countTrailingOnes(NotMask >> NotMaskTZ) + NotMaskTZ + NotMaskLZ != 64)
   10767     return Result;
   10768 
   10769   // Adjust NotMaskLZ down to be from the actual size of the int instead of i64.
   10770   if (V.getValueType() != MVT::i64 && NotMaskLZ)
   10771     NotMaskLZ -= 64-V.getValueSizeInBits();
   10772 
   10773   unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8;
   10774   switch (MaskedBytes) {
   10775   case 1:
   10776   case 2:
   10777   case 4: break;
   10778   default: return Result; // All one mask, or 5-byte mask.
   10779   }
   10780 
   10781   // Verify that the first bit starts at a multiple of mask so that the access
   10782   // is aligned the same as the access width.
   10783   if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result;
   10784 
   10785   Result.first = MaskedBytes;
   10786   Result.second = NotMaskTZ/8;
   10787   return Result;
   10788 }
   10789 
   10790 
   10791 /// Check to see if IVal is something that provides a value as specified by
   10792 /// MaskInfo. If so, replace the specified store with a narrower store of
   10793 /// truncated IVal.
   10794 static SDNode *
   10795 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo,
   10796                                 SDValue IVal, StoreSDNode *St,
   10797                                 DAGCombiner *DC) {
   10798   unsigned NumBytes = MaskInfo.first;
   10799   unsigned ByteShift = MaskInfo.second;
   10800   SelectionDAG &DAG = DC->getDAG();
   10801 
   10802   // Check to see if IVal is all zeros in the part being masked in by the 'or'
   10803   // that uses this.  If not, this is not a replacement.
   10804   APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(),
   10805                                   ByteShift*8, (ByteShift+NumBytes)*8);
   10806   if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr;
   10807 
   10808   // Check that it is legal on the target to do this.  It is legal if the new
   10809   // VT we're shrinking to (i8/i16/i32) is legal or we're still before type
   10810   // legalization.
   10811   MVT VT = MVT::getIntegerVT(NumBytes*8);
   10812   if (!DC->isTypeLegal(VT))
   10813     return nullptr;
   10814 
   10815   // Okay, we can do this!  Replace the 'St' store with a store of IVal that is
   10816   // shifted by ByteShift and truncated down to NumBytes.
   10817   if (ByteShift) {
   10818     SDLoc DL(IVal);
   10819     IVal = DAG.getNode(ISD::SRL, DL, IVal.getValueType(), IVal,
   10820                        DAG.getConstant(ByteShift*8, DL,
   10821                                     DC->getShiftAmountTy(IVal.getValueType())));
   10822   }
   10823 
   10824   // Figure out the offset for the store and the alignment of the access.
   10825   unsigned StOffset;
   10826   unsigned NewAlign = St->getAlignment();
   10827 
   10828   if (DAG.getDataLayout().isLittleEndian())
   10829     StOffset = ByteShift;
   10830   else
   10831     StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes;
   10832 
   10833   SDValue Ptr = St->getBasePtr();
   10834   if (StOffset) {
   10835     SDLoc DL(IVal);
   10836     Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(),
   10837                       Ptr, DAG.getConstant(StOffset, DL, Ptr.getValueType()));
   10838     NewAlign = MinAlign(NewAlign, StOffset);
   10839   }
   10840 
   10841   // Truncate down to the new size.
   10842   IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal);
   10843 
   10844   ++OpsNarrowed;
   10845   return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr,
   10846                       St->getPointerInfo().getWithOffset(StOffset),
   10847                       false, false, NewAlign).getNode();
   10848 }
   10849 
   10850 
   10851 /// Look for sequence of load / op / store where op is one of 'or', 'xor', and
   10852 /// 'and' of immediates. If 'op' is only touching some of the loaded bits, try
   10853 /// narrowing the load and store if it would end up being a win for performance
   10854 /// or code size.
   10855 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) {
   10856   StoreSDNode *ST  = cast<StoreSDNode>(N);
   10857   if (ST->isVolatile())
   10858     return SDValue();
   10859 
   10860   SDValue Chain = ST->getChain();
   10861   SDValue Value = ST->getValue();
   10862   SDValue Ptr   = ST->getBasePtr();
   10863   EVT VT = Value.getValueType();
   10864 
   10865   if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse())
   10866     return SDValue();
   10867 
   10868   unsigned Opc = Value.getOpcode();
   10869 
   10870   // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst
   10871   // is a byte mask indicating a consecutive number of bytes, check to see if
   10872   // Y is known to provide just those bytes.  If so, we try to replace the
   10873   // load + replace + store sequence with a single (narrower) store, which makes
   10874   // the load dead.
   10875   if (Opc == ISD::OR) {
   10876     std::pair<unsigned, unsigned> MaskedLoad;
   10877     MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain);
   10878     if (MaskedLoad.first)
   10879       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   10880                                                   Value.getOperand(1), ST,this))
   10881         return SDValue(NewST, 0);
   10882 
   10883     // Or is commutative, so try swapping X and Y.
   10884     MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain);
   10885     if (MaskedLoad.first)
   10886       if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad,
   10887                                                   Value.getOperand(0), ST,this))
   10888         return SDValue(NewST, 0);
   10889   }
   10890 
   10891   if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) ||
   10892       Value.getOperand(1).getOpcode() != ISD::Constant)
   10893     return SDValue();
   10894 
   10895   SDValue N0 = Value.getOperand(0);
   10896   if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() &&
   10897       Chain == SDValue(N0.getNode(), 1)) {
   10898     LoadSDNode *LD = cast<LoadSDNode>(N0);
   10899     if (LD->getBasePtr() != Ptr ||
   10900         LD->getPointerInfo().getAddrSpace() !=
   10901         ST->getPointerInfo().getAddrSpace())
   10902       return SDValue();
   10903 
   10904     // Find the type to narrow it the load / op / store to.
   10905     SDValue N1 = Value.getOperand(1);
   10906     unsigned BitWidth = N1.getValueSizeInBits();
   10907     APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue();
   10908     if (Opc == ISD::AND)
   10909       Imm ^= APInt::getAllOnesValue(BitWidth);
   10910     if (Imm == 0 || Imm.isAllOnesValue())
   10911       return SDValue();
   10912     unsigned ShAmt = Imm.countTrailingZeros();
   10913     unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1;
   10914     unsigned NewBW = NextPowerOf2(MSB - ShAmt);
   10915     EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   10916     // The narrowing should be profitable, the load/store operation should be
   10917     // legal (or custom) and the store size should be equal to the NewVT width.
   10918     while (NewBW < BitWidth &&
   10919            (NewVT.getStoreSizeInBits() != NewBW ||
   10920             !TLI.isOperationLegalOrCustom(Opc, NewVT) ||
   10921             !TLI.isNarrowingProfitable(VT, NewVT))) {
   10922       NewBW = NextPowerOf2(NewBW);
   10923       NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW);
   10924     }
   10925     if (NewBW >= BitWidth)
   10926       return SDValue();
   10927 
   10928     // If the lsb changed does not start at the type bitwidth boundary,
   10929     // start at the previous one.
   10930     if (ShAmt % NewBW)
   10931       ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW;
   10932     APInt Mask = APInt::getBitsSet(BitWidth, ShAmt,
   10933                                    std::min(BitWidth, ShAmt + NewBW));
   10934     if ((Imm & Mask) == Imm) {
   10935       APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW);
   10936       if (Opc == ISD::AND)
   10937         NewImm ^= APInt::getAllOnesValue(NewBW);
   10938       uint64_t PtrOff = ShAmt / 8;
   10939       // For big endian targets, we need to adjust the offset to the pointer to
   10940       // load the correct bytes.
   10941       if (DAG.getDataLayout().isBigEndian())
   10942         PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff;
   10943 
   10944       unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff);
   10945       Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext());
   10946       if (NewAlign < DAG.getDataLayout().getABITypeAlignment(NewVTTy))
   10947         return SDValue();
   10948 
   10949       SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD),
   10950                                    Ptr.getValueType(), Ptr,
   10951                                    DAG.getConstant(PtrOff, SDLoc(LD),
   10952                                                    Ptr.getValueType()));
   10953       SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0),
   10954                                   LD->getChain(), NewPtr,
   10955                                   LD->getPointerInfo().getWithOffset(PtrOff),
   10956                                   LD->isVolatile(), LD->isNonTemporal(),
   10957                                   LD->isInvariant(), NewAlign,
   10958                                   LD->getAAInfo());
   10959       SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD,
   10960                                    DAG.getConstant(NewImm, SDLoc(Value),
   10961                                                    NewVT));
   10962       SDValue NewST = DAG.getStore(Chain, SDLoc(N),
   10963                                    NewVal, NewPtr,
   10964                                    ST->getPointerInfo().getWithOffset(PtrOff),
   10965                                    false, false, NewAlign);
   10966 
   10967       AddToWorklist(NewPtr.getNode());
   10968       AddToWorklist(NewLD.getNode());
   10969       AddToWorklist(NewVal.getNode());
   10970       WorklistRemover DeadNodes(*this);
   10971       DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1));
   10972       ++OpsNarrowed;
   10973       return NewST;
   10974     }
   10975   }
   10976 
   10977   return SDValue();
   10978 }
   10979 
   10980 /// For a given floating point load / store pair, if the load value isn't used
   10981 /// by any other operations, then consider transforming the pair to integer
   10982 /// load / store operations if the target deems the transformation profitable.
   10983 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) {
   10984   StoreSDNode *ST  = cast<StoreSDNode>(N);
   10985   SDValue Chain = ST->getChain();
   10986   SDValue Value = ST->getValue();
   10987   if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) &&
   10988       Value.hasOneUse() &&
   10989       Chain == SDValue(Value.getNode(), 1)) {
   10990     LoadSDNode *LD = cast<LoadSDNode>(Value);
   10991     EVT VT = LD->getMemoryVT();
   10992     if (!VT.isFloatingPoint() ||
   10993         VT != ST->getMemoryVT() ||
   10994         LD->isNonTemporal() ||
   10995         ST->isNonTemporal() ||
   10996         LD->getPointerInfo().getAddrSpace() != 0 ||
   10997         ST->getPointerInfo().getAddrSpace() != 0)
   10998       return SDValue();
   10999 
   11000     EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits());
   11001     if (!TLI.isOperationLegal(ISD::LOAD, IntVT) ||
   11002         !TLI.isOperationLegal(ISD::STORE, IntVT) ||
   11003         !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) ||
   11004         !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT))
   11005       return SDValue();
   11006 
   11007     unsigned LDAlign = LD->getAlignment();
   11008     unsigned STAlign = ST->getAlignment();
   11009     Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext());
   11010     unsigned ABIAlign = DAG.getDataLayout().getABITypeAlignment(IntVTTy);
   11011     if (LDAlign < ABIAlign || STAlign < ABIAlign)
   11012       return SDValue();
   11013 
   11014     SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value),
   11015                                 LD->getChain(), LD->getBasePtr(),
   11016                                 LD->getPointerInfo(),
   11017                                 false, false, false, LDAlign);
   11018 
   11019     SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N),
   11020                                  NewLD, ST->getBasePtr(),
   11021                                  ST->getPointerInfo(),
   11022                                  false, false, STAlign);
   11023 
   11024     AddToWorklist(NewLD.getNode());
   11025     AddToWorklist(NewST.getNode());
   11026     WorklistRemover DeadNodes(*this);
   11027     DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1));
   11028     ++LdStFP2Int;
   11029     return NewST;
   11030   }
   11031 
   11032   return SDValue();
   11033 }
   11034 
   11035 namespace {
   11036 /// Helper struct to parse and store a memory address as base + index + offset.
   11037 /// We ignore sign extensions when it is safe to do so.
   11038 /// The following two expressions are not equivalent. To differentiate we need
   11039 /// to store whether there was a sign extension involved in the index
   11040 /// computation.
   11041 ///  (load (i64 add (i64 copyfromreg %c)
   11042 ///                 (i64 signextend (add (i8 load %index)
   11043 ///                                      (i8 1))))
   11044 /// vs
   11045 ///
   11046 /// (load (i64 add (i64 copyfromreg %c)
   11047 ///                (i64 signextend (i32 add (i32 signextend (i8 load %index))
   11048 ///                                         (i32 1)))))
   11049 struct BaseIndexOffset {
   11050   SDValue Base;
   11051   SDValue Index;
   11052   int64_t Offset;
   11053   bool IsIndexSignExt;
   11054 
   11055   BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {}
   11056 
   11057   BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset,
   11058                   bool IsIndexSignExt) :
   11059     Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {}
   11060 
   11061   bool equalBaseIndex(const BaseIndexOffset &Other) {
   11062     return Other.Base == Base && Other.Index == Index &&
   11063       Other.IsIndexSignExt == IsIndexSignExt;
   11064   }
   11065 
   11066   /// Parses tree in Ptr for base, index, offset addresses.
   11067   static BaseIndexOffset match(SDValue Ptr, SelectionDAG &DAG) {
   11068     bool IsIndexSignExt = false;
   11069 
   11070     // Split up a folded GlobalAddress+Offset into its component parts.
   11071     if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Ptr))
   11072       if (GA->getOpcode() == ISD::GlobalAddress && GA->getOffset() != 0) {
   11073         return BaseIndexOffset(DAG.getGlobalAddress(GA->getGlobal(),
   11074                                                     SDLoc(GA),
   11075                                                     GA->getValueType(0),
   11076                                                     /*Offset=*/0,
   11077                                                     /*isTargetGA=*/false,
   11078                                                     GA->getTargetFlags()),
   11079                                SDValue(),
   11080                                GA->getOffset(),
   11081                                IsIndexSignExt);
   11082       }
   11083 
   11084     // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD
   11085     // instruction, then it could be just the BASE or everything else we don't
   11086     // know how to handle. Just use Ptr as BASE and give up.
   11087     if (Ptr->getOpcode() != ISD::ADD)
   11088       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   11089 
   11090     // We know that we have at least an ADD instruction. Try to pattern match
   11091     // the simple case of BASE + OFFSET.
   11092     if (isa<ConstantSDNode>(Ptr->getOperand(1))) {
   11093       int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue();
   11094       return  BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset,
   11095                               IsIndexSignExt);
   11096     }
   11097 
   11098     // Inside a loop the current BASE pointer is calculated using an ADD and a
   11099     // MUL instruction. In this case Ptr is the actual BASE pointer.
   11100     // (i64 add (i64 %array_ptr)
   11101     //          (i64 mul (i64 %induction_var)
   11102     //                   (i64 %element_size)))
   11103     if (Ptr->getOperand(1)->getOpcode() == ISD::MUL)
   11104       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   11105 
   11106     // Look at Base + Index + Offset cases.
   11107     SDValue Base = Ptr->getOperand(0);
   11108     SDValue IndexOffset = Ptr->getOperand(1);
   11109 
   11110     // Skip signextends.
   11111     if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) {
   11112       IndexOffset = IndexOffset->getOperand(0);
   11113       IsIndexSignExt = true;
   11114     }
   11115 
   11116     // Either the case of Base + Index (no offset) or something else.
   11117     if (IndexOffset->getOpcode() != ISD::ADD)
   11118       return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt);
   11119 
   11120     // Now we have the case of Base + Index + offset.
   11121     SDValue Index = IndexOffset->getOperand(0);
   11122     SDValue Offset = IndexOffset->getOperand(1);
   11123 
   11124     if (!isa<ConstantSDNode>(Offset))
   11125       return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt);
   11126 
   11127     // Ignore signextends.
   11128     if (Index->getOpcode() == ISD::SIGN_EXTEND) {
   11129       Index = Index->getOperand(0);
   11130       IsIndexSignExt = true;
   11131     } else IsIndexSignExt = false;
   11132 
   11133     int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue();
   11134     return BaseIndexOffset(Base, Index, Off, IsIndexSignExt);
   11135   }
   11136 };
   11137 } // namespace
   11138 
   11139 // This is a helper function for visitMUL to check the profitability
   11140 // of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2).
   11141 // MulNode is the original multiply, AddNode is (add x, c1),
   11142 // and ConstNode is c2.
   11143 //
   11144 // If the (add x, c1) has multiple uses, we could increase
   11145 // the number of adds if we make this transformation.
   11146 // It would only be worth doing this if we can remove a
   11147 // multiply in the process. Check for that here.
   11148 // To illustrate:
   11149 //     (A + c1) * c3
   11150 //     (A + c2) * c3
   11151 // We're checking for cases where we have common "c3 * A" expressions.
   11152 bool DAGCombiner::isMulAddWithConstProfitable(SDNode *MulNode,
   11153                                               SDValue &AddNode,
   11154                                               SDValue &ConstNode) {
   11155   APInt Val;
   11156 
   11157   // If the add only has one use, this would be OK to do.
   11158   if (AddNode.getNode()->hasOneUse())
   11159     return true;
   11160 
   11161   // Walk all the users of the constant with which we're multiplying.
   11162   for (SDNode *Use : ConstNode->uses()) {
   11163 
   11164     if (Use == MulNode) // This use is the one we're on right now. Skip it.
   11165       continue;
   11166 
   11167     if (Use->getOpcode() == ISD::MUL) { // We have another multiply use.
   11168       SDNode *OtherOp;
   11169       SDNode *MulVar = AddNode.getOperand(0).getNode();
   11170 
   11171       // OtherOp is what we're multiplying against the constant.
   11172       if (Use->getOperand(0) == ConstNode)
   11173         OtherOp = Use->getOperand(1).getNode();
   11174       else
   11175         OtherOp = Use->getOperand(0).getNode();
   11176 
   11177       // Check to see if multiply is with the same operand of our "add".
   11178       //
   11179       //     ConstNode  = CONST
   11180       //     Use = ConstNode * A  <-- visiting Use. OtherOp is A.
   11181       //     ...
   11182       //     AddNode  = (A + c1)  <-- MulVar is A.
   11183       //         = AddNode * ConstNode   <-- current visiting instruction.
   11184       //
   11185       // If we make this transformation, we will have a common
   11186       // multiply (ConstNode * A) that we can save.
   11187       if (OtherOp == MulVar)
   11188         return true;
   11189 
   11190       // Now check to see if a future expansion will give us a common
   11191       // multiply.
   11192       //
   11193       //     ConstNode  = CONST
   11194       //     AddNode    = (A + c1)
   11195       //     ...   = AddNode * ConstNode <-- current visiting instruction.
   11196       //     ...
   11197       //     OtherOp = (A + c2)
   11198       //     Use     = OtherOp * ConstNode <-- visiting Use.
   11199       //
   11200       // If we make this transformation, we will have a common
   11201       // multiply (CONST * A) after we also do the same transformation
   11202       // to the "t2" instruction.
   11203       if (OtherOp->getOpcode() == ISD::ADD &&
   11204           DAG.isConstantIntBuildVectorOrConstantInt(OtherOp->getOperand(1)) &&
   11205           OtherOp->getOperand(0).getNode() == MulVar)
   11206         return true;
   11207     }
   11208   }
   11209 
   11210   // Didn't find a case where this would be profitable.
   11211   return false;
   11212 }
   11213 
   11214 SDValue DAGCombiner::getMergedConstantVectorStore(
   11215     SelectionDAG &DAG, const SDLoc &SL, ArrayRef<MemOpLink> Stores,
   11216     SmallVectorImpl<SDValue> &Chains, EVT Ty) const {
   11217   SmallVector<SDValue, 8> BuildVector;
   11218 
   11219   for (unsigned I = 0, E = Ty.getVectorNumElements(); I != E; ++I) {
   11220     StoreSDNode *St = cast<StoreSDNode>(Stores[I].MemNode);
   11221     Chains.push_back(St->getChain());
   11222     BuildVector.push_back(St->getValue());
   11223   }
   11224 
   11225   return DAG.getBuildVector(Ty, SL, BuildVector);
   11226 }
   11227 
   11228 bool DAGCombiner::MergeStoresOfConstantsOrVecElts(
   11229                   SmallVectorImpl<MemOpLink> &StoreNodes, EVT MemVT,
   11230                   unsigned NumStores, bool IsConstantSrc, bool UseVector) {
   11231   // Make sure we have something to merge.
   11232   if (NumStores < 2)
   11233     return false;
   11234 
   11235   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   11236   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   11237   unsigned LatestNodeUsed = 0;
   11238 
   11239   for (unsigned i=0; i < NumStores; ++i) {
   11240     // Find a chain for the new wide-store operand. Notice that some
   11241     // of the store nodes that we found may not be selected for inclusion
   11242     // in the wide store. The chain we use needs to be the chain of the
   11243     // latest store node which is *used* and replaced by the wide store.
   11244     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
   11245       LatestNodeUsed = i;
   11246   }
   11247 
   11248   SmallVector<SDValue, 8> Chains;
   11249 
   11250   // The latest Node in the DAG.
   11251   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   11252   SDLoc DL(StoreNodes[0].MemNode);
   11253 
   11254   SDValue StoredVal;
   11255   if (UseVector) {
   11256     bool IsVec = MemVT.isVector();
   11257     unsigned Elts = NumStores;
   11258     if (IsVec) {
   11259       // When merging vector stores, get the total number of elements.
   11260       Elts *= MemVT.getVectorNumElements();
   11261     }
   11262     // Get the type for the merged vector store.
   11263     EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   11264     assert(TLI.isTypeLegal(Ty) && "Illegal vector store");
   11265 
   11266     if (IsConstantSrc) {
   11267       StoredVal = getMergedConstantVectorStore(DAG, DL, StoreNodes, Chains, Ty);
   11268     } else {
   11269       SmallVector<SDValue, 8> Ops;
   11270       for (unsigned i = 0; i < NumStores; ++i) {
   11271         StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11272         SDValue Val = St->getValue();
   11273         // All operands of BUILD_VECTOR / CONCAT_VECTOR must have the same type.
   11274         if (Val.getValueType() != MemVT)
   11275           return false;
   11276         Ops.push_back(Val);
   11277         Chains.push_back(St->getChain());
   11278       }
   11279 
   11280       // Build the extracted vector elements back into a vector.
   11281       StoredVal = DAG.getNode(IsVec ? ISD::CONCAT_VECTORS : ISD::BUILD_VECTOR,
   11282                               DL, Ty, Ops);    }
   11283   } else {
   11284     // We should always use a vector store when merging extracted vector
   11285     // elements, so this path implies a store of constants.
   11286     assert(IsConstantSrc && "Merged vector elements should use vector store");
   11287 
   11288     unsigned SizeInBits = NumStores * ElementSizeBytes * 8;
   11289     APInt StoreInt(SizeInBits, 0);
   11290 
   11291     // Construct a single integer constant which is made of the smaller
   11292     // constant inputs.
   11293     bool IsLE = DAG.getDataLayout().isLittleEndian();
   11294     for (unsigned i = 0; i < NumStores; ++i) {
   11295       unsigned Idx = IsLE ? (NumStores - 1 - i) : i;
   11296       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[Idx].MemNode);
   11297       Chains.push_back(St->getChain());
   11298 
   11299       SDValue Val = St->getValue();
   11300       StoreInt <<= ElementSizeBytes * 8;
   11301       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) {
   11302         StoreInt |= C->getAPIntValue().zext(SizeInBits);
   11303       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) {
   11304         StoreInt |= C->getValueAPF().bitcastToAPInt().zext(SizeInBits);
   11305       } else {
   11306         llvm_unreachable("Invalid constant element type");
   11307       }
   11308     }
   11309 
   11310     // Create the new Load and Store operations.
   11311     EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), SizeInBits);
   11312     StoredVal = DAG.getConstant(StoreInt, DL, StoreTy);
   11313   }
   11314 
   11315   assert(!Chains.empty());
   11316 
   11317   SDValue NewChain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains);
   11318   SDValue NewStore = DAG.getStore(NewChain, DL, StoredVal,
   11319                                   FirstInChain->getBasePtr(),
   11320                                   FirstInChain->getPointerInfo(),
   11321                                   false, false,
   11322                                   FirstInChain->getAlignment());
   11323 
   11324   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   11325                                                   : DAG.getSubtarget().useAA();
   11326   if (UseAA) {
   11327     // Replace all merged stores with the new store.
   11328     for (unsigned i = 0; i < NumStores; ++i)
   11329       CombineTo(StoreNodes[i].MemNode, NewStore);
   11330   } else {
   11331     // Replace the last store with the new store.
   11332     CombineTo(LatestOp, NewStore);
   11333     // Erase all other stores.
   11334     for (unsigned i = 0; i < NumStores; ++i) {
   11335       if (StoreNodes[i].MemNode == LatestOp)
   11336         continue;
   11337       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11338       // ReplaceAllUsesWith will replace all uses that existed when it was
   11339       // called, but graph optimizations may cause new ones to appear. For
   11340       // example, the case in pr14333 looks like
   11341       //
   11342       //  St's chain -> St -> another store -> X
   11343       //
   11344       // And the only difference from St to the other store is the chain.
   11345       // When we change it's chain to be St's chain they become identical,
   11346       // get CSEed and the net result is that X is now a use of St.
   11347       // Since we know that St is redundant, just iterate.
   11348       while (!St->use_empty())
   11349         DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain());
   11350       deleteAndRecombine(St);
   11351     }
   11352   }
   11353 
   11354   return true;
   11355 }
   11356 
   11357 void DAGCombiner::getStoreMergeAndAliasCandidates(
   11358     StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes,
   11359     SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes) {
   11360   // This holds the base pointer, index, and the offset in bytes from the base
   11361   // pointer.
   11362   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
   11363 
   11364   // We must have a base and an offset.
   11365   if (!BasePtr.Base.getNode())
   11366     return;
   11367 
   11368   // Do not handle stores to undef base pointers.
   11369   if (BasePtr.Base.isUndef())
   11370     return;
   11371 
   11372   // Walk up the chain and look for nodes with offsets from the same
   11373   // base pointer. Stop when reaching an instruction with a different kind
   11374   // or instruction which has a different base pointer.
   11375   EVT MemVT = St->getMemoryVT();
   11376   unsigned Seq = 0;
   11377   StoreSDNode *Index = St;
   11378 
   11379 
   11380   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   11381                                                   : DAG.getSubtarget().useAA();
   11382 
   11383   if (UseAA) {
   11384     // Look at other users of the same chain. Stores on the same chain do not
   11385     // alias. If combiner-aa is enabled, non-aliasing stores are canonicalized
   11386     // to be on the same chain, so don't bother looking at adjacent chains.
   11387 
   11388     SDValue Chain = St->getChain();
   11389     for (auto I = Chain->use_begin(), E = Chain->use_end(); I != E; ++I) {
   11390       if (StoreSDNode *OtherST = dyn_cast<StoreSDNode>(*I)) {
   11391         if (I.getOperandNo() != 0)
   11392           continue;
   11393 
   11394         if (OtherST->isVolatile() || OtherST->isIndexed())
   11395           continue;
   11396 
   11397         if (OtherST->getMemoryVT() != MemVT)
   11398           continue;
   11399 
   11400         BaseIndexOffset Ptr = BaseIndexOffset::match(OtherST->getBasePtr(), DAG);
   11401 
   11402         if (Ptr.equalBaseIndex(BasePtr))
   11403           StoreNodes.push_back(MemOpLink(OtherST, Ptr.Offset, Seq++));
   11404       }
   11405     }
   11406 
   11407     return;
   11408   }
   11409 
   11410   while (Index) {
   11411     // If the chain has more than one use, then we can't reorder the mem ops.
   11412     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   11413       break;
   11414 
   11415     // Find the base pointer and offset for this memory node.
   11416     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
   11417 
   11418     // Check that the base pointer is the same as the original one.
   11419     if (!Ptr.equalBaseIndex(BasePtr))
   11420       break;
   11421 
   11422     // The memory operands must not be volatile.
   11423     if (Index->isVolatile() || Index->isIndexed())
   11424       break;
   11425 
   11426     // No truncation.
   11427     if (Index->isTruncatingStore())
   11428       break;
   11429 
   11430     // The stored memory type must be the same.
   11431     if (Index->getMemoryVT() != MemVT)
   11432       break;
   11433 
   11434     // We do not allow under-aligned stores in order to prevent
   11435     // overriding stores. NOTE: this is a bad hack. Alignment SHOULD
   11436     // be irrelevant here; what MATTERS is that we not move memory
   11437     // operations that potentially overlap past each-other.
   11438     if (Index->getAlignment() < MemVT.getStoreSize())
   11439       break;
   11440 
   11441     // We found a potential memory operand to merge.
   11442     StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++));
   11443 
   11444     // Find the next memory operand in the chain. If the next operand in the
   11445     // chain is a store then move up and continue the scan with the next
   11446     // memory operand. If the next operand is a load save it and use alias
   11447     // information to check if it interferes with anything.
   11448     SDNode *NextInChain = Index->getChain().getNode();
   11449     while (1) {
   11450       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   11451         // We found a store node. Use it for the next iteration.
   11452         Index = STn;
   11453         break;
   11454       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   11455         if (Ldn->isVolatile()) {
   11456           Index = nullptr;
   11457           break;
   11458         }
   11459 
   11460         // Save the load node for later. Continue the scan.
   11461         AliasLoadNodes.push_back(Ldn);
   11462         NextInChain = Ldn->getChain().getNode();
   11463         continue;
   11464       } else {
   11465         Index = nullptr;
   11466         break;
   11467       }
   11468     }
   11469   }
   11470 }
   11471 
   11472 // We need to check that merging these stores does not cause a loop
   11473 // in the DAG. Any store candidate may depend on another candidate
   11474 // indirectly through its operand (we already consider dependencies
   11475 // through the chain). Check in parallel by searching up from
   11476 // non-chain operands of candidates.
   11477 bool DAGCombiner::checkMergeStoreCandidatesForDependencies(
   11478     SmallVectorImpl<MemOpLink> &StoreNodes) {
   11479   SmallPtrSet<const SDNode *, 16> Visited;
   11480   SmallVector<const SDNode *, 8> Worklist;
   11481   // search ops of store candidates
   11482   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
   11483     SDNode *n = StoreNodes[i].MemNode;
   11484     // Potential loops may happen only through non-chain operands
   11485     for (unsigned j = 1; j < n->getNumOperands(); ++j)
   11486       Worklist.push_back(n->getOperand(j).getNode());
   11487   }
   11488   // search through DAG. We can stop early if we find a storenode
   11489   for (unsigned i = 0; i < StoreNodes.size(); ++i) {
   11490     if (SDNode::hasPredecessorHelper(StoreNodes[i].MemNode, Visited, Worklist))
   11491       return false;
   11492   }
   11493   return true;
   11494 }
   11495 
   11496 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) {
   11497   if (OptLevel == CodeGenOpt::None)
   11498     return false;
   11499 
   11500   EVT MemVT = St->getMemoryVT();
   11501   int64_t ElementSizeBytes = MemVT.getSizeInBits() / 8;
   11502   bool NoVectors = DAG.getMachineFunction().getFunction()->hasFnAttribute(
   11503       Attribute::NoImplicitFloat);
   11504 
   11505   // This function cannot currently deal with non-byte-sized memory sizes.
   11506   if (ElementSizeBytes * 8 != MemVT.getSizeInBits())
   11507     return false;
   11508 
   11509   if (!MemVT.isSimple())
   11510     return false;
   11511 
   11512   // Perform an early exit check. Do not bother looking at stored values that
   11513   // are not constants, loads, or extracted vector elements.
   11514   SDValue StoredVal = St->getValue();
   11515   bool IsLoadSrc = isa<LoadSDNode>(StoredVal);
   11516   bool IsConstantSrc = isa<ConstantSDNode>(StoredVal) ||
   11517                        isa<ConstantFPSDNode>(StoredVal);
   11518   bool IsExtractVecSrc = (StoredVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT ||
   11519                           StoredVal.getOpcode() == ISD::EXTRACT_SUBVECTOR);
   11520 
   11521   if (!IsConstantSrc && !IsLoadSrc && !IsExtractVecSrc)
   11522     return false;
   11523 
   11524   // Don't merge vectors into wider vectors if the source data comes from loads.
   11525   // TODO: This restriction can be lifted by using logic similar to the
   11526   // ExtractVecSrc case.
   11527   if (MemVT.isVector() && IsLoadSrc)
   11528     return false;
   11529 
   11530   // Only look at ends of store sequences.
   11531   SDValue Chain = SDValue(St, 0);
   11532   if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE)
   11533     return false;
   11534 
   11535   // Save the LoadSDNodes that we find in the chain.
   11536   // We need to make sure that these nodes do not interfere with
   11537   // any of the store nodes.
   11538   SmallVector<LSBaseSDNode*, 8> AliasLoadNodes;
   11539 
   11540   // Save the StoreSDNodes that we find in the chain.
   11541   SmallVector<MemOpLink, 8> StoreNodes;
   11542 
   11543   getStoreMergeAndAliasCandidates(St, StoreNodes, AliasLoadNodes);
   11544 
   11545   // Check if there is anything to merge.
   11546   if (StoreNodes.size() < 2)
   11547     return false;
   11548 
   11549   // only do dep endence check in AA case
   11550   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   11551                                                   : DAG.getSubtarget().useAA();
   11552   if (UseAA && !checkMergeStoreCandidatesForDependencies(StoreNodes))
   11553     return false;
   11554 
   11555   // Sort the memory operands according to their distance from the
   11556   // base pointer.  As a secondary criteria: make sure stores coming
   11557   // later in the code come first in the list. This is important for
   11558   // the non-UseAA case, because we're merging stores into the FINAL
   11559   // store along a chain which potentially contains aliasing stores.
   11560   // Thus, if there are multiple stores to the same address, the last
   11561   // one can be considered for merging but not the others.
   11562   std::sort(StoreNodes.begin(), StoreNodes.end(),
   11563             [](MemOpLink LHS, MemOpLink RHS) {
   11564     return LHS.OffsetFromBase < RHS.OffsetFromBase ||
   11565            (LHS.OffsetFromBase == RHS.OffsetFromBase &&
   11566             LHS.SequenceNum < RHS.SequenceNum);
   11567   });
   11568 
   11569   // Scan the memory operations on the chain and find the first non-consecutive
   11570   // store memory address.
   11571   unsigned LastConsecutiveStore = 0;
   11572   int64_t StartAddress = StoreNodes[0].OffsetFromBase;
   11573   for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) {
   11574 
   11575     // Check that the addresses are consecutive starting from the second
   11576     // element in the list of stores.
   11577     if (i > 0) {
   11578       int64_t CurrAddress = StoreNodes[i].OffsetFromBase;
   11579       if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   11580         break;
   11581     }
   11582 
   11583     // Check if this store interferes with any of the loads that we found.
   11584     // If we find a load that alias with this store. Stop the sequence.
   11585     if (std::any_of(AliasLoadNodes.begin(), AliasLoadNodes.end(),
   11586                     [&](LSBaseSDNode* Ldn) {
   11587                       return isAlias(Ldn, StoreNodes[i].MemNode);
   11588                     }))
   11589       break;
   11590 
   11591     // Mark this node as useful.
   11592     LastConsecutiveStore = i;
   11593   }
   11594 
   11595   // The node with the lowest store address.
   11596   LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode;
   11597   unsigned FirstStoreAS = FirstInChain->getAddressSpace();
   11598   unsigned FirstStoreAlign = FirstInChain->getAlignment();
   11599   LLVMContext &Context = *DAG.getContext();
   11600   const DataLayout &DL = DAG.getDataLayout();
   11601 
   11602   // Store the constants into memory as one consecutive store.
   11603   if (IsConstantSrc) {
   11604     unsigned LastLegalType = 0;
   11605     unsigned LastLegalVectorType = 0;
   11606     bool NonZero = false;
   11607     for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   11608       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11609       SDValue StoredVal = St->getValue();
   11610 
   11611       if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) {
   11612         NonZero |= !C->isNullValue();
   11613       } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) {
   11614         NonZero |= !C->getConstantFPValue()->isNullValue();
   11615       } else {
   11616         // Non-constant.
   11617         break;
   11618       }
   11619 
   11620       // Find a legal type for the constant store.
   11621       unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
   11622       EVT StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   11623       bool IsFast;
   11624       if (TLI.isTypeLegal(StoreTy) &&
   11625           TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11626                                  FirstStoreAlign, &IsFast) && IsFast) {
   11627         LastLegalType = i+1;
   11628       // Or check whether a truncstore is legal.
   11629       } else if (TLI.getTypeAction(Context, StoreTy) ==
   11630                  TargetLowering::TypePromoteInteger) {
   11631         EVT LegalizedStoredValueTy =
   11632           TLI.getTypeToTransformTo(Context, StoredVal.getValueType());
   11633         if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
   11634             TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11635                                    FirstStoreAS, FirstStoreAlign, &IsFast) &&
   11636             IsFast) {
   11637           LastLegalType = i + 1;
   11638         }
   11639       }
   11640 
   11641       // We only use vectors if the constant is known to be zero or the target
   11642       // allows it and the function is not marked with the noimplicitfloat
   11643       // attribute.
   11644       if ((!NonZero || TLI.storeOfVectorConstantIsCheap(MemVT, i+1,
   11645                                                         FirstStoreAS)) &&
   11646           !NoVectors) {
   11647         // Find a legal type for the vector store.
   11648         EVT Ty = EVT::getVectorVT(Context, MemVT, i+1);
   11649         if (TLI.isTypeLegal(Ty) &&
   11650             TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   11651                                    FirstStoreAlign, &IsFast) && IsFast)
   11652           LastLegalVectorType = i + 1;
   11653       }
   11654     }
   11655 
   11656     // Check if we found a legal integer type to store.
   11657     if (LastLegalType == 0 && LastLegalVectorType == 0)
   11658       return false;
   11659 
   11660     bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors;
   11661     unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType;
   11662 
   11663     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumElem,
   11664                                            true, UseVector);
   11665   }
   11666 
   11667   // When extracting multiple vector elements, try to store them
   11668   // in one vector store rather than a sequence of scalar stores.
   11669   if (IsExtractVecSrc) {
   11670     unsigned NumStoresToMerge = 0;
   11671     bool IsVec = MemVT.isVector();
   11672     for (unsigned i = 0; i < LastConsecutiveStore + 1; ++i) {
   11673       StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11674       unsigned StoreValOpcode = St->getValue().getOpcode();
   11675       // This restriction could be loosened.
   11676       // Bail out if any stored values are not elements extracted from a vector.
   11677       // It should be possible to handle mixed sources, but load sources need
   11678       // more careful handling (see the block of code below that handles
   11679       // consecutive loads).
   11680       if (StoreValOpcode != ISD::EXTRACT_VECTOR_ELT &&
   11681           StoreValOpcode != ISD::EXTRACT_SUBVECTOR)
   11682         return false;
   11683 
   11684       // Find a legal type for the vector store.
   11685       unsigned Elts = i + 1;
   11686       if (IsVec) {
   11687         // When merging vector stores, get the total number of elements.
   11688         Elts *= MemVT.getVectorNumElements();
   11689       }
   11690       EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT.getScalarType(), Elts);
   11691       bool IsFast;
   11692       if (TLI.isTypeLegal(Ty) &&
   11693           TLI.allowsMemoryAccess(Context, DL, Ty, FirstStoreAS,
   11694                                  FirstStoreAlign, &IsFast) && IsFast)
   11695         NumStoresToMerge = i + 1;
   11696     }
   11697 
   11698     return MergeStoresOfConstantsOrVecElts(StoreNodes, MemVT, NumStoresToMerge,
   11699                                            false, true);
   11700   }
   11701 
   11702   // Below we handle the case of multiple consecutive stores that
   11703   // come from multiple consecutive loads. We merge them into a single
   11704   // wide load and a single wide store.
   11705 
   11706   // Look for load nodes which are used by the stored values.
   11707   SmallVector<MemOpLink, 8> LoadNodes;
   11708 
   11709   // Find acceptable loads. Loads need to have the same chain (token factor),
   11710   // must not be zext, volatile, indexed, and they must be consecutive.
   11711   BaseIndexOffset LdBasePtr;
   11712   for (unsigned i=0; i<LastConsecutiveStore+1; ++i) {
   11713     StoreSDNode *St  = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11714     LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue());
   11715     if (!Ld) break;
   11716 
   11717     // Loads must only have one use.
   11718     if (!Ld->hasNUsesOfValue(1, 0))
   11719       break;
   11720 
   11721     // The memory operands must not be volatile.
   11722     if (Ld->isVolatile() || Ld->isIndexed())
   11723       break;
   11724 
   11725     // We do not accept ext loads.
   11726     if (Ld->getExtensionType() != ISD::NON_EXTLOAD)
   11727       break;
   11728 
   11729     // The stored memory type must be the same.
   11730     if (Ld->getMemoryVT() != MemVT)
   11731       break;
   11732 
   11733     BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr(), DAG);
   11734     // If this is not the first ptr that we check.
   11735     if (LdBasePtr.Base.getNode()) {
   11736       // The base ptr must be the same.
   11737       if (!LdPtr.equalBaseIndex(LdBasePtr))
   11738         break;
   11739     } else {
   11740       // Check that all other base pointers are the same as this one.
   11741       LdBasePtr = LdPtr;
   11742     }
   11743 
   11744     // We found a potential memory operand to merge.
   11745     LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0));
   11746   }
   11747 
   11748   if (LoadNodes.size() < 2)
   11749     return false;
   11750 
   11751   // If we have load/store pair instructions and we only have two values,
   11752   // don't bother.
   11753   unsigned RequiredAlignment;
   11754   if (LoadNodes.size() == 2 && TLI.hasPairedLoad(MemVT, RequiredAlignment) &&
   11755       St->getAlignment() >= RequiredAlignment)
   11756     return false;
   11757 
   11758   LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode);
   11759   unsigned FirstLoadAS = FirstLoad->getAddressSpace();
   11760   unsigned FirstLoadAlign = FirstLoad->getAlignment();
   11761 
   11762   // Scan the memory operations on the chain and find the first non-consecutive
   11763   // load memory address. These variables hold the index in the store node
   11764   // array.
   11765   unsigned LastConsecutiveLoad = 0;
   11766   // This variable refers to the size and not index in the array.
   11767   unsigned LastLegalVectorType = 0;
   11768   unsigned LastLegalIntegerType = 0;
   11769   StartAddress = LoadNodes[0].OffsetFromBase;
   11770   SDValue FirstChain = FirstLoad->getChain();
   11771   for (unsigned i = 1; i < LoadNodes.size(); ++i) {
   11772     // All loads must share the same chain.
   11773     if (LoadNodes[i].MemNode->getChain() != FirstChain)
   11774       break;
   11775 
   11776     int64_t CurrAddress = LoadNodes[i].OffsetFromBase;
   11777     if (CurrAddress - StartAddress != (ElementSizeBytes * i))
   11778       break;
   11779     LastConsecutiveLoad = i;
   11780     // Find a legal type for the vector store.
   11781     EVT StoreTy = EVT::getVectorVT(Context, MemVT, i+1);
   11782     bool IsFastSt, IsFastLd;
   11783     if (TLI.isTypeLegal(StoreTy) &&
   11784         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11785                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
   11786         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   11787                                FirstLoadAlign, &IsFastLd) && IsFastLd) {
   11788       LastLegalVectorType = i + 1;
   11789     }
   11790 
   11791     // Find a legal type for the integer store.
   11792     unsigned SizeInBits = (i+1) * ElementSizeBytes * 8;
   11793     StoreTy = EVT::getIntegerVT(Context, SizeInBits);
   11794     if (TLI.isTypeLegal(StoreTy) &&
   11795         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstStoreAS,
   11796                                FirstStoreAlign, &IsFastSt) && IsFastSt &&
   11797         TLI.allowsMemoryAccess(Context, DL, StoreTy, FirstLoadAS,
   11798                                FirstLoadAlign, &IsFastLd) && IsFastLd)
   11799       LastLegalIntegerType = i + 1;
   11800     // Or check whether a truncstore and extload is legal.
   11801     else if (TLI.getTypeAction(Context, StoreTy) ==
   11802              TargetLowering::TypePromoteInteger) {
   11803       EVT LegalizedStoredValueTy =
   11804         TLI.getTypeToTransformTo(Context, StoreTy);
   11805       if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) &&
   11806           TLI.isLoadExtLegal(ISD::ZEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11807           TLI.isLoadExtLegal(ISD::SEXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11808           TLI.isLoadExtLegal(ISD::EXTLOAD, LegalizedStoredValueTy, StoreTy) &&
   11809           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11810                                  FirstStoreAS, FirstStoreAlign, &IsFastSt) &&
   11811           IsFastSt &&
   11812           TLI.allowsMemoryAccess(Context, DL, LegalizedStoredValueTy,
   11813                                  FirstLoadAS, FirstLoadAlign, &IsFastLd) &&
   11814           IsFastLd)
   11815         LastLegalIntegerType = i+1;
   11816     }
   11817   }
   11818 
   11819   // Only use vector types if the vector type is larger than the integer type.
   11820   // If they are the same, use integers.
   11821   bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors;
   11822   unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType);
   11823 
   11824   // We add +1 here because the LastXXX variables refer to location while
   11825   // the NumElem refers to array/index size.
   11826   unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1;
   11827   NumElem = std::min(LastLegalType, NumElem);
   11828 
   11829   if (NumElem < 2)
   11830     return false;
   11831 
   11832   // Collect the chains from all merged stores.
   11833   SmallVector<SDValue, 8> MergeStoreChains;
   11834   MergeStoreChains.push_back(StoreNodes[0].MemNode->getChain());
   11835 
   11836   // The latest Node in the DAG.
   11837   unsigned LatestNodeUsed = 0;
   11838   for (unsigned i=1; i<NumElem; ++i) {
   11839     // Find a chain for the new wide-store operand. Notice that some
   11840     // of the store nodes that we found may not be selected for inclusion
   11841     // in the wide store. The chain we use needs to be the chain of the
   11842     // latest store node which is *used* and replaced by the wide store.
   11843     if (StoreNodes[i].SequenceNum < StoreNodes[LatestNodeUsed].SequenceNum)
   11844       LatestNodeUsed = i;
   11845 
   11846     MergeStoreChains.push_back(StoreNodes[i].MemNode->getChain());
   11847   }
   11848 
   11849   LSBaseSDNode *LatestOp = StoreNodes[LatestNodeUsed].MemNode;
   11850 
   11851   // Find if it is better to use vectors or integers to load and store
   11852   // to memory.
   11853   EVT JointMemOpVT;
   11854   if (UseVectorTy) {
   11855     JointMemOpVT = EVT::getVectorVT(Context, MemVT, NumElem);
   11856   } else {
   11857     unsigned SizeInBits = NumElem * ElementSizeBytes * 8;
   11858     JointMemOpVT = EVT::getIntegerVT(Context, SizeInBits);
   11859   }
   11860 
   11861   SDLoc LoadDL(LoadNodes[0].MemNode);
   11862   SDLoc StoreDL(StoreNodes[0].MemNode);
   11863 
   11864   // The merged loads are required to have the same incoming chain, so
   11865   // using the first's chain is acceptable.
   11866   SDValue NewLoad = DAG.getLoad(
   11867       JointMemOpVT, LoadDL, FirstLoad->getChain(), FirstLoad->getBasePtr(),
   11868       FirstLoad->getPointerInfo(), false, false, false, FirstLoadAlign);
   11869 
   11870   SDValue NewStoreChain =
   11871     DAG.getNode(ISD::TokenFactor, StoreDL, MVT::Other, MergeStoreChains);
   11872 
   11873   SDValue NewStore = DAG.getStore(
   11874     NewStoreChain, StoreDL, NewLoad, FirstInChain->getBasePtr(),
   11875       FirstInChain->getPointerInfo(), false, false, FirstStoreAlign);
   11876 
   11877   // Transfer chain users from old loads to the new load.
   11878   for (unsigned i = 0; i < NumElem; ++i) {
   11879     LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode);
   11880     DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1),
   11881                                   SDValue(NewLoad.getNode(), 1));
   11882   }
   11883 
   11884   if (UseAA) {
   11885     // Replace the all stores with the new store.
   11886     for (unsigned i = 0; i < NumElem; ++i)
   11887       CombineTo(StoreNodes[i].MemNode, NewStore);
   11888   } else {
   11889     // Replace the last store with the new store.
   11890     CombineTo(LatestOp, NewStore);
   11891     // Erase all other stores.
   11892     for (unsigned i = 0; i < NumElem; ++i) {
   11893       // Remove all Store nodes.
   11894       if (StoreNodes[i].MemNode == LatestOp)
   11895         continue;
   11896       StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode);
   11897       DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain());
   11898       deleteAndRecombine(St);
   11899     }
   11900   }
   11901 
   11902   return true;
   11903 }
   11904 
   11905 SDValue DAGCombiner::replaceStoreChain(StoreSDNode *ST, SDValue BetterChain) {
   11906   SDLoc SL(ST);
   11907   SDValue ReplStore;
   11908 
   11909   // Replace the chain to avoid dependency.
   11910   if (ST->isTruncatingStore()) {
   11911     ReplStore = DAG.getTruncStore(BetterChain, SL, ST->getValue(),
   11912                                   ST->getBasePtr(), ST->getMemoryVT(),
   11913                                   ST->getMemOperand());
   11914   } else {
   11915     ReplStore = DAG.getStore(BetterChain, SL, ST->getValue(), ST->getBasePtr(),
   11916                              ST->getMemOperand());
   11917   }
   11918 
   11919   // Create token to keep both nodes around.
   11920   SDValue Token = DAG.getNode(ISD::TokenFactor, SL,
   11921                               MVT::Other, ST->getChain(), ReplStore);
   11922 
   11923   // Make sure the new and old chains are cleaned up.
   11924   AddToWorklist(Token.getNode());
   11925 
   11926   // Don't add users to work list.
   11927   return CombineTo(ST, Token, false);
   11928 }
   11929 
   11930 SDValue DAGCombiner::replaceStoreOfFPConstant(StoreSDNode *ST) {
   11931   SDValue Value = ST->getValue();
   11932   if (Value.getOpcode() == ISD::TargetConstantFP)
   11933     return SDValue();
   11934 
   11935   SDLoc DL(ST);
   11936 
   11937   SDValue Chain = ST->getChain();
   11938   SDValue Ptr = ST->getBasePtr();
   11939 
   11940   const ConstantFPSDNode *CFP = cast<ConstantFPSDNode>(Value);
   11941 
   11942   // NOTE: If the original store is volatile, this transform must not increase
   11943   // the number of stores.  For example, on x86-32 an f64 can be stored in one
   11944   // processor operation but an i64 (which is not legal) requires two.  So the
   11945   // transform should not be done in this case.
   11946 
   11947   SDValue Tmp;
   11948   switch (CFP->getSimpleValueType(0).SimpleTy) {
   11949   default:
   11950     llvm_unreachable("Unknown FP type");
   11951   case MVT::f16:    // We don't do this for these yet.
   11952   case MVT::f80:
   11953   case MVT::f128:
   11954   case MVT::ppcf128:
   11955     return SDValue();
   11956   case MVT::f32:
   11957     if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) ||
   11958         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   11959       ;
   11960       Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF().
   11961                             bitcastToAPInt().getZExtValue(), SDLoc(CFP),
   11962                             MVT::i32);
   11963       return DAG.getStore(Chain, DL, Tmp, Ptr, ST->getMemOperand());
   11964     }
   11965 
   11966     return SDValue();
   11967   case MVT::f64:
   11968     if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations &&
   11969          !ST->isVolatile()) ||
   11970         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) {
   11971       ;
   11972       Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt().
   11973                             getZExtValue(), SDLoc(CFP), MVT::i64);
   11974       return DAG.getStore(Chain, DL, Tmp,
   11975                           Ptr, ST->getMemOperand());
   11976     }
   11977 
   11978     if (!ST->isVolatile() &&
   11979         TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) {
   11980       // Many FP stores are not made apparent until after legalize, e.g. for
   11981       // argument passing.  Since this is so common, custom legalize the
   11982       // 64-bit integer store into two 32-bit stores.
   11983       uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue();
   11984       SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, SDLoc(CFP), MVT::i32);
   11985       SDValue Hi = DAG.getConstant(Val >> 32, SDLoc(CFP), MVT::i32);
   11986       if (DAG.getDataLayout().isBigEndian())
   11987         std::swap(Lo, Hi);
   11988 
   11989       unsigned Alignment = ST->getAlignment();
   11990       bool isVolatile = ST->isVolatile();
   11991       bool isNonTemporal = ST->isNonTemporal();
   11992       AAMDNodes AAInfo = ST->getAAInfo();
   11993 
   11994       SDValue St0 = DAG.getStore(Chain, DL, Lo,
   11995                                  Ptr, ST->getPointerInfo(),
   11996                                  isVolatile, isNonTemporal,
   11997                                  ST->getAlignment(), AAInfo);
   11998       Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
   11999                         DAG.getConstant(4, DL, Ptr.getValueType()));
   12000       Alignment = MinAlign(Alignment, 4U);
   12001       SDValue St1 = DAG.getStore(Chain, DL, Hi,
   12002                                  Ptr, ST->getPointerInfo().getWithOffset(4),
   12003                                  isVolatile, isNonTemporal,
   12004                                  Alignment, AAInfo);
   12005       return DAG.getNode(ISD::TokenFactor, DL, MVT::Other,
   12006                          St0, St1);
   12007     }
   12008 
   12009     return SDValue();
   12010   }
   12011 }
   12012 
   12013 SDValue DAGCombiner::visitSTORE(SDNode *N) {
   12014   StoreSDNode *ST  = cast<StoreSDNode>(N);
   12015   SDValue Chain = ST->getChain();
   12016   SDValue Value = ST->getValue();
   12017   SDValue Ptr   = ST->getBasePtr();
   12018 
   12019   // If this is a store of a bit convert, store the input value if the
   12020   // resultant store does not need a higher alignment than the original.
   12021   if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() &&
   12022       ST->isUnindexed()) {
   12023     EVT SVT = Value.getOperand(0).getValueType();
   12024     if (((!LegalOperations && !ST->isVolatile()) ||
   12025          TLI.isOperationLegalOrCustom(ISD::STORE, SVT)) &&
   12026         TLI.isStoreBitCastBeneficial(Value.getValueType(), SVT)) {
   12027       unsigned OrigAlign = ST->getAlignment();
   12028       bool Fast = false;
   12029       if (TLI.allowsMemoryAccess(*DAG.getContext(), DAG.getDataLayout(), SVT,
   12030                                  ST->getAddressSpace(), OrigAlign, &Fast) &&
   12031           Fast) {
   12032         return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0),
   12033                             Ptr, ST->getPointerInfo(), ST->isVolatile(),
   12034                             ST->isNonTemporal(), OrigAlign,
   12035                             ST->getAAInfo());
   12036       }
   12037     }
   12038   }
   12039 
   12040   // Turn 'store undef, Ptr' -> nothing.
   12041   if (Value.isUndef() && ST->isUnindexed())
   12042     return Chain;
   12043 
   12044   // Try to infer better alignment information than the store already has.
   12045   if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
   12046     if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
   12047       if (Align > ST->getAlignment()) {
   12048         SDValue NewStore =
   12049                DAG.getTruncStore(Chain, SDLoc(N), Value,
   12050                                  Ptr, ST->getPointerInfo(), ST->getMemoryVT(),
   12051                                  ST->isVolatile(), ST->isNonTemporal(), Align,
   12052                                  ST->getAAInfo());
   12053         if (NewStore.getNode() != N)
   12054           return CombineTo(ST, NewStore, true);
   12055       }
   12056     }
   12057   }
   12058 
   12059   // Try transforming a pair floating point load / store ops to integer
   12060   // load / store ops.
   12061   if (SDValue NewST = TransformFPLoadStorePair(N))
   12062     return NewST;
   12063 
   12064   bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA
   12065                                                   : DAG.getSubtarget().useAA();
   12066 #ifndef NDEBUG
   12067   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   12068       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   12069     UseAA = false;
   12070 #endif
   12071   if (UseAA && ST->isUnindexed()) {
   12072     // FIXME: We should do this even without AA enabled. AA will just allow
   12073     // FindBetterChain to work in more situations. The problem with this is that
   12074     // any combine that expects memory operations to be on consecutive chains
   12075     // first needs to be updated to look for users of the same chain.
   12076 
   12077     // Walk up chain skipping non-aliasing memory nodes, on this store and any
   12078     // adjacent stores.
   12079     if (findBetterNeighborChains(ST)) {
   12080       // replaceStoreChain uses CombineTo, which handled all of the worklist
   12081       // manipulation. Return the original node to not do anything else.
   12082       return SDValue(ST, 0);
   12083     }
   12084     Chain = ST->getChain();
   12085   }
   12086 
   12087   // Try transforming N to an indexed store.
   12088   if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N))
   12089     return SDValue(N, 0);
   12090 
   12091   // FIXME: is there such a thing as a truncating indexed store?
   12092   if (ST->isTruncatingStore() && ST->isUnindexed() &&
   12093       Value.getValueType().isInteger()) {
   12094     // See if we can simplify the input to this truncstore with knowledge that
   12095     // only the low bits are being used.  For example:
   12096     // "truncstore (or (shl x, 8), y), i8"  -> "truncstore y, i8"
   12097     SDValue Shorter =
   12098       GetDemandedBits(Value,
   12099                       APInt::getLowBitsSet(
   12100                         Value.getValueType().getScalarType().getSizeInBits(),
   12101                         ST->getMemoryVT().getScalarType().getSizeInBits()));
   12102     AddToWorklist(Value.getNode());
   12103     if (Shorter.getNode())
   12104       return DAG.getTruncStore(Chain, SDLoc(N), Shorter,
   12105                                Ptr, ST->getMemoryVT(), ST->getMemOperand());
   12106 
   12107     // Otherwise, see if we can simplify the operation with
   12108     // SimplifyDemandedBits, which only works if the value has a single use.
   12109     if (SimplifyDemandedBits(Value,
   12110                         APInt::getLowBitsSet(
   12111                           Value.getValueType().getScalarType().getSizeInBits(),
   12112                           ST->getMemoryVT().getScalarType().getSizeInBits())))
   12113       return SDValue(N, 0);
   12114   }
   12115 
   12116   // If this is a load followed by a store to the same location, then the store
   12117   // is dead/noop.
   12118   if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) {
   12119     if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() &&
   12120         ST->isUnindexed() && !ST->isVolatile() &&
   12121         // There can't be any side effects between the load and store, such as
   12122         // a call or store.
   12123         Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) {
   12124       // The store is dead, remove it.
   12125       return Chain;
   12126     }
   12127   }
   12128 
   12129   // If this is a store followed by a store with the same value to the same
   12130   // location, then the store is dead/noop.
   12131   if (StoreSDNode *ST1 = dyn_cast<StoreSDNode>(Chain)) {
   12132     if (ST1->getBasePtr() == Ptr && ST->getMemoryVT() == ST1->getMemoryVT() &&
   12133         ST1->getValue() == Value && ST->isUnindexed() && !ST->isVolatile() &&
   12134         ST1->isUnindexed() && !ST1->isVolatile()) {
   12135       // The store is dead, remove it.
   12136       return Chain;
   12137     }
   12138   }
   12139 
   12140   // If this is an FP_ROUND or TRUNC followed by a store, fold this into a
   12141   // truncating store.  We can do this even if this is already a truncstore.
   12142   if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE)
   12143       && Value.getNode()->hasOneUse() && ST->isUnindexed() &&
   12144       TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(),
   12145                             ST->getMemoryVT())) {
   12146     return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0),
   12147                              Ptr, ST->getMemoryVT(), ST->getMemOperand());
   12148   }
   12149 
   12150   // Only perform this optimization before the types are legal, because we
   12151   // don't want to perform this optimization on every DAGCombine invocation.
   12152   if (!LegalTypes) {
   12153     bool EverChanged = false;
   12154 
   12155     do {
   12156       // There can be multiple store sequences on the same chain.
   12157       // Keep trying to merge store sequences until we are unable to do so
   12158       // or until we merge the last store on the chain.
   12159       bool Changed = MergeConsecutiveStores(ST);
   12160       EverChanged |= Changed;
   12161       if (!Changed) break;
   12162     } while (ST->getOpcode() != ISD::DELETED_NODE);
   12163 
   12164     if (EverChanged)
   12165       return SDValue(N, 0);
   12166   }
   12167 
   12168   // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr'
   12169   //
   12170   // Make sure to do this only after attempting to merge stores in order to
   12171   //  avoid changing the types of some subset of stores due to visit order,
   12172   //  preventing their merging.
   12173   if (isa<ConstantFPSDNode>(Value)) {
   12174     if (SDValue NewSt = replaceStoreOfFPConstant(ST))
   12175       return NewSt;
   12176   }
   12177 
   12178   return ReduceLoadOpStoreWidth(N);
   12179 }
   12180 
   12181 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) {
   12182   SDValue InVec = N->getOperand(0);
   12183   SDValue InVal = N->getOperand(1);
   12184   SDValue EltNo = N->getOperand(2);
   12185   SDLoc dl(N);
   12186 
   12187   // If the inserted element is an UNDEF, just use the input vector.
   12188   if (InVal.isUndef())
   12189     return InVec;
   12190 
   12191   EVT VT = InVec.getValueType();
   12192 
   12193   // If we can't generate a legal BUILD_VECTOR, exit
   12194   if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT))
   12195     return SDValue();
   12196 
   12197   // Check that we know which element is being inserted
   12198   if (!isa<ConstantSDNode>(EltNo))
   12199     return SDValue();
   12200   unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   12201 
   12202   // Canonicalize insert_vector_elt dag nodes.
   12203   // Example:
   12204   // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1)
   12205   // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0)
   12206   //
   12207   // Do this only if the child insert_vector node has one use; also
   12208   // do this only if indices are both constants and Idx1 < Idx0.
   12209   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse()
   12210       && isa<ConstantSDNode>(InVec.getOperand(2))) {
   12211     unsigned OtherElt =
   12212       cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue();
   12213     if (Elt < OtherElt) {
   12214       // Swap nodes.
   12215       SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT,
   12216                                   InVec.getOperand(0), InVal, EltNo);
   12217       AddToWorklist(NewOp.getNode());
   12218       return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()),
   12219                          VT, NewOp, InVec.getOperand(1), InVec.getOperand(2));
   12220     }
   12221   }
   12222 
   12223   // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially
   12224   // be converted to a BUILD_VECTOR).  Fill in the Ops vector with the
   12225   // vector elements.
   12226   SmallVector<SDValue, 8> Ops;
   12227   // Do not combine these two vectors if the output vector will not replace
   12228   // the input vector.
   12229   if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) {
   12230     Ops.append(InVec.getNode()->op_begin(),
   12231                InVec.getNode()->op_end());
   12232   } else if (InVec.isUndef()) {
   12233     unsigned NElts = VT.getVectorNumElements();
   12234     Ops.append(NElts, DAG.getUNDEF(InVal.getValueType()));
   12235   } else {
   12236     return SDValue();
   12237   }
   12238 
   12239   // Insert the element
   12240   if (Elt < Ops.size()) {
   12241     // All the operands of BUILD_VECTOR must have the same type;
   12242     // we enforce that here.
   12243     EVT OpVT = Ops[0].getValueType();
   12244     if (InVal.getValueType() != OpVT)
   12245       InVal = OpVT.bitsGT(InVal.getValueType()) ?
   12246                 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) :
   12247                 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal);
   12248     Ops[Elt] = InVal;
   12249   }
   12250 
   12251   // Return the new vector
   12252   return DAG.getBuildVector(VT, dl, Ops);
   12253 }
   12254 
   12255 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad(
   12256     SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) {
   12257   assert(!OriginalLoad->isVolatile());
   12258 
   12259   EVT ResultVT = EVE->getValueType(0);
   12260   EVT VecEltVT = InVecVT.getVectorElementType();
   12261   unsigned Align = OriginalLoad->getAlignment();
   12262   unsigned NewAlign = DAG.getDataLayout().getABITypeAlignment(
   12263       VecEltVT.getTypeForEVT(*DAG.getContext()));
   12264 
   12265   if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT))
   12266     return SDValue();
   12267 
   12268   Align = NewAlign;
   12269 
   12270   SDValue NewPtr = OriginalLoad->getBasePtr();
   12271   SDValue Offset;
   12272   EVT PtrType = NewPtr.getValueType();
   12273   MachinePointerInfo MPI;
   12274   SDLoc DL(EVE);
   12275   if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) {
   12276     int Elt = ConstEltNo->getZExtValue();
   12277     unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8;
   12278     Offset = DAG.getConstant(PtrOff, DL, PtrType);
   12279     MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff);
   12280   } else {
   12281     Offset = DAG.getZExtOrTrunc(EltNo, DL, PtrType);
   12282     Offset = DAG.getNode(
   12283         ISD::MUL, DL, PtrType, Offset,
   12284         DAG.getConstant(VecEltVT.getStoreSize(), DL, PtrType));
   12285     MPI = OriginalLoad->getPointerInfo();
   12286   }
   12287   NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, NewPtr, Offset);
   12288 
   12289   // The replacement we need to do here is a little tricky: we need to
   12290   // replace an extractelement of a load with a load.
   12291   // Use ReplaceAllUsesOfValuesWith to do the replacement.
   12292   // Note that this replacement assumes that the extractvalue is the only
   12293   // use of the load; that's okay because we don't want to perform this
   12294   // transformation in other cases anyway.
   12295   SDValue Load;
   12296   SDValue Chain;
   12297   if (ResultVT.bitsGT(VecEltVT)) {
   12298     // If the result type of vextract is wider than the load, then issue an
   12299     // extending load instead.
   12300     ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, ResultVT,
   12301                                                   VecEltVT)
   12302                                    ? ISD::ZEXTLOAD
   12303                                    : ISD::EXTLOAD;
   12304     Load = DAG.getExtLoad(
   12305         ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), NewPtr, MPI,
   12306         VecEltVT, OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
   12307         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
   12308     Chain = Load.getValue(1);
   12309   } else {
   12310     Load = DAG.getLoad(
   12311         VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI,
   12312         OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(),
   12313         OriginalLoad->isInvariant(), Align, OriginalLoad->getAAInfo());
   12314     Chain = Load.getValue(1);
   12315     if (ResultVT.bitsLT(VecEltVT))
   12316       Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load);
   12317     else
   12318       Load = DAG.getBitcast(ResultVT, Load);
   12319   }
   12320   WorklistRemover DeadNodes(*this);
   12321   SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) };
   12322   SDValue To[] = { Load, Chain };
   12323   DAG.ReplaceAllUsesOfValuesWith(From, To, 2);
   12324   // Since we're explicitly calling ReplaceAllUses, add the new node to the
   12325   // worklist explicitly as well.
   12326   AddToWorklist(Load.getNode());
   12327   AddUsersToWorklist(Load.getNode()); // Add users too
   12328   // Make sure to revisit this node to clean it up; it will usually be dead.
   12329   AddToWorklist(EVE);
   12330   ++OpsNarrowed;
   12331   return SDValue(EVE, 0);
   12332 }
   12333 
   12334 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) {
   12335   // (vextract (scalar_to_vector val, 0) -> val
   12336   SDValue InVec = N->getOperand(0);
   12337   EVT VT = InVec.getValueType();
   12338   EVT NVT = N->getValueType(0);
   12339 
   12340   if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) {
   12341     // Check if the result type doesn't match the inserted element type. A
   12342     // SCALAR_TO_VECTOR may truncate the inserted element and the
   12343     // EXTRACT_VECTOR_ELT may widen the extracted vector.
   12344     SDValue InOp = InVec.getOperand(0);
   12345     if (InOp.getValueType() != NVT) {
   12346       assert(InOp.getValueType().isInteger() && NVT.isInteger());
   12347       return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT);
   12348     }
   12349     return InOp;
   12350   }
   12351 
   12352   SDValue EltNo = N->getOperand(1);
   12353   ConstantSDNode *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo);
   12354 
   12355   // extract_vector_elt (build_vector x, y), 1 -> y
   12356   if (ConstEltNo &&
   12357       InVec.getOpcode() == ISD::BUILD_VECTOR &&
   12358       TLI.isTypeLegal(VT) &&
   12359       (InVec.hasOneUse() ||
   12360        TLI.aggressivelyPreferBuildVectorSources(VT))) {
   12361     SDValue Elt = InVec.getOperand(ConstEltNo->getZExtValue());
   12362     EVT InEltVT = Elt.getValueType();
   12363 
   12364     // Sometimes build_vector's scalar input types do not match result type.
   12365     if (NVT == InEltVT)
   12366       return Elt;
   12367 
   12368     // TODO: It may be useful to truncate if free if the build_vector implicitly
   12369     // converts.
   12370   }
   12371 
   12372   // extract_vector_elt (v2i32 (bitcast i64:x)), 0 -> i32 (trunc i64:x)
   12373   if (ConstEltNo && InVec.getOpcode() == ISD::BITCAST && InVec.hasOneUse() &&
   12374       ConstEltNo->isNullValue() && VT.isInteger()) {
   12375     SDValue BCSrc = InVec.getOperand(0);
   12376     if (BCSrc.getValueType().isScalarInteger())
   12377       return DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, BCSrc);
   12378   }
   12379 
   12380   // extract_vector_elt (insert_vector_elt vec, val, idx), idx) -> val
   12381   //
   12382   // This only really matters if the index is non-constant since other combines
   12383   // on the constant elements already work.
   12384   if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT &&
   12385       EltNo == InVec.getOperand(2)) {
   12386     SDValue Elt = InVec.getOperand(1);
   12387     return VT.isInteger() ? DAG.getAnyExtOrTrunc(Elt, SDLoc(N), NVT) : Elt;
   12388   }
   12389 
   12390   // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT.
   12391   // We only perform this optimization before the op legalization phase because
   12392   // we may introduce new vector instructions which are not backed by TD
   12393   // patterns. For example on AVX, extracting elements from a wide vector
   12394   // without using extract_subvector. However, if we can find an underlying
   12395   // scalar value, then we can always use that.
   12396   if (ConstEltNo && InVec.getOpcode() == ISD::VECTOR_SHUFFLE) {
   12397     int NumElem = VT.getVectorNumElements();
   12398     ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec);
   12399     // Find the new index to extract from.
   12400     int OrigElt = SVOp->getMaskElt(ConstEltNo->getZExtValue());
   12401 
   12402     // Extracting an undef index is undef.
   12403     if (OrigElt == -1)
   12404       return DAG.getUNDEF(NVT);
   12405 
   12406     // Select the right vector half to extract from.
   12407     SDValue SVInVec;
   12408     if (OrigElt < NumElem) {
   12409       SVInVec = InVec->getOperand(0);
   12410     } else {
   12411       SVInVec = InVec->getOperand(1);
   12412       OrigElt -= NumElem;
   12413     }
   12414 
   12415     if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) {
   12416       SDValue InOp = SVInVec.getOperand(OrigElt);
   12417       if (InOp.getValueType() != NVT) {
   12418         assert(InOp.getValueType().isInteger() && NVT.isInteger());
   12419         InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT);
   12420       }
   12421 
   12422       return InOp;
   12423     }
   12424 
   12425     // FIXME: We should handle recursing on other vector shuffles and
   12426     // scalar_to_vector here as well.
   12427 
   12428     if (!LegalOperations) {
   12429       EVT IndexTy = TLI.getVectorIdxTy(DAG.getDataLayout());
   12430       return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, SVInVec,
   12431                          DAG.getConstant(OrigElt, SDLoc(SVOp), IndexTy));
   12432     }
   12433   }
   12434 
   12435   bool BCNumEltsChanged = false;
   12436   EVT ExtVT = VT.getVectorElementType();
   12437   EVT LVT = ExtVT;
   12438 
   12439   // If the result of load has to be truncated, then it's not necessarily
   12440   // profitable.
   12441   if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT))
   12442     return SDValue();
   12443 
   12444   if (InVec.getOpcode() == ISD::BITCAST) {
   12445     // Don't duplicate a load with other uses.
   12446     if (!InVec.hasOneUse())
   12447       return SDValue();
   12448 
   12449     EVT BCVT = InVec.getOperand(0).getValueType();
   12450     if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType()))
   12451       return SDValue();
   12452     if (VT.getVectorNumElements() != BCVT.getVectorNumElements())
   12453       BCNumEltsChanged = true;
   12454     InVec = InVec.getOperand(0);
   12455     ExtVT = BCVT.getVectorElementType();
   12456   }
   12457 
   12458   // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size)
   12459   if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() &&
   12460       ISD::isNormalLoad(InVec.getNode()) &&
   12461       !N->getOperand(1)->hasPredecessor(InVec.getNode())) {
   12462     SDValue Index = N->getOperand(1);
   12463     if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) {
   12464       if (!OrigLoad->isVolatile()) {
   12465         return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index,
   12466                                                              OrigLoad);
   12467       }
   12468     }
   12469   }
   12470 
   12471   // Perform only after legalization to ensure build_vector / vector_shuffle
   12472   // optimizations have already been done.
   12473   if (!LegalOperations) return SDValue();
   12474 
   12475   // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size)
   12476   // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size)
   12477   // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr)
   12478 
   12479   if (ConstEltNo) {
   12480     int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue();
   12481 
   12482     LoadSDNode *LN0 = nullptr;
   12483     const ShuffleVectorSDNode *SVN = nullptr;
   12484     if (ISD::isNormalLoad(InVec.getNode())) {
   12485       LN0 = cast<LoadSDNode>(InVec);
   12486     } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR &&
   12487                InVec.getOperand(0).getValueType() == ExtVT &&
   12488                ISD::isNormalLoad(InVec.getOperand(0).getNode())) {
   12489       // Don't duplicate a load with other uses.
   12490       if (!InVec.hasOneUse())
   12491         return SDValue();
   12492 
   12493       LN0 = cast<LoadSDNode>(InVec.getOperand(0));
   12494     } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) {
   12495       // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1)
   12496       // =>
   12497       // (load $addr+1*size)
   12498 
   12499       // Don't duplicate a load with other uses.
   12500       if (!InVec.hasOneUse())
   12501         return SDValue();
   12502 
   12503       // If the bit convert changed the number of elements, it is unsafe
   12504       // to examine the mask.
   12505       if (BCNumEltsChanged)
   12506         return SDValue();
   12507 
   12508       // Select the input vector, guarding against out of range extract vector.
   12509       unsigned NumElems = VT.getVectorNumElements();
   12510       int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt);
   12511       InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1);
   12512 
   12513       if (InVec.getOpcode() == ISD::BITCAST) {
   12514         // Don't duplicate a load with other uses.
   12515         if (!InVec.hasOneUse())
   12516           return SDValue();
   12517 
   12518         InVec = InVec.getOperand(0);
   12519       }
   12520       if (ISD::isNormalLoad(InVec.getNode())) {
   12521         LN0 = cast<LoadSDNode>(InVec);
   12522         Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems;
   12523         EltNo = DAG.getConstant(Elt, SDLoc(EltNo), EltNo.getValueType());
   12524       }
   12525     }
   12526 
   12527     // Make sure we found a non-volatile load and the extractelement is
   12528     // the only use.
   12529     if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile())
   12530       return SDValue();
   12531 
   12532     // If Idx was -1 above, Elt is going to be -1, so just return undef.
   12533     if (Elt == -1)
   12534       return DAG.getUNDEF(LVT);
   12535 
   12536     return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0);
   12537   }
   12538 
   12539   return SDValue();
   12540 }
   12541 
   12542 // Simplify (build_vec (ext )) to (bitcast (build_vec ))
   12543 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) {
   12544   // We perform this optimization post type-legalization because
   12545   // the type-legalizer often scalarizes integer-promoted vectors.
   12546   // Performing this optimization before may create bit-casts which
   12547   // will be type-legalized to complex code sequences.
   12548   // We perform this optimization only before the operation legalizer because we
   12549   // may introduce illegal operations.
   12550   if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes)
   12551     return SDValue();
   12552 
   12553   unsigned NumInScalars = N->getNumOperands();
   12554   SDLoc dl(N);
   12555   EVT VT = N->getValueType(0);
   12556 
   12557   // Check to see if this is a BUILD_VECTOR of a bunch of values
   12558   // which come from any_extend or zero_extend nodes. If so, we can create
   12559   // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR
   12560   // optimizations. We do not handle sign-extend because we can't fill the sign
   12561   // using shuffles.
   12562   EVT SourceType = MVT::Other;
   12563   bool AllAnyExt = true;
   12564 
   12565   for (unsigned i = 0; i != NumInScalars; ++i) {
   12566     SDValue In = N->getOperand(i);
   12567     // Ignore undef inputs.
   12568     if (In.isUndef()) continue;
   12569 
   12570     bool AnyExt  = In.getOpcode() == ISD::ANY_EXTEND;
   12571     bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND;
   12572 
   12573     // Abort if the element is not an extension.
   12574     if (!ZeroExt && !AnyExt) {
   12575       SourceType = MVT::Other;
   12576       break;
   12577     }
   12578 
   12579     // The input is a ZeroExt or AnyExt. Check the original type.
   12580     EVT InTy = In.getOperand(0).getValueType();
   12581 
   12582     // Check that all of the widened source types are the same.
   12583     if (SourceType == MVT::Other)
   12584       // First time.
   12585       SourceType = InTy;
   12586     else if (InTy != SourceType) {
   12587       // Multiple income types. Abort.
   12588       SourceType = MVT::Other;
   12589       break;
   12590     }
   12591 
   12592     // Check if all of the extends are ANY_EXTENDs.
   12593     AllAnyExt &= AnyExt;
   12594   }
   12595 
   12596   // In order to have valid types, all of the inputs must be extended from the
   12597   // same source type and all of the inputs must be any or zero extend.
   12598   // Scalar sizes must be a power of two.
   12599   EVT OutScalarTy = VT.getScalarType();
   12600   bool ValidTypes = SourceType != MVT::Other &&
   12601                  isPowerOf2_32(OutScalarTy.getSizeInBits()) &&
   12602                  isPowerOf2_32(SourceType.getSizeInBits());
   12603 
   12604   // Create a new simpler BUILD_VECTOR sequence which other optimizations can
   12605   // turn into a single shuffle instruction.
   12606   if (!ValidTypes)
   12607     return SDValue();
   12608 
   12609   bool isLE = DAG.getDataLayout().isLittleEndian();
   12610   unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits();
   12611   assert(ElemRatio > 1 && "Invalid element size ratio");
   12612   SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType):
   12613                                DAG.getConstant(0, SDLoc(N), SourceType);
   12614 
   12615   unsigned NewBVElems = ElemRatio * VT.getVectorNumElements();
   12616   SmallVector<SDValue, 8> Ops(NewBVElems, Filler);
   12617 
   12618   // Populate the new build_vector
   12619   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   12620     SDValue Cast = N->getOperand(i);
   12621     assert((Cast.getOpcode() == ISD::ANY_EXTEND ||
   12622             Cast.getOpcode() == ISD::ZERO_EXTEND ||
   12623             Cast.isUndef()) && "Invalid cast opcode");
   12624     SDValue In;
   12625     if (Cast.isUndef())
   12626       In = DAG.getUNDEF(SourceType);
   12627     else
   12628       In = Cast->getOperand(0);
   12629     unsigned Index = isLE ? (i * ElemRatio) :
   12630                             (i * ElemRatio + (ElemRatio - 1));
   12631 
   12632     assert(Index < Ops.size() && "Invalid index");
   12633     Ops[Index] = In;
   12634   }
   12635 
   12636   // The type of the new BUILD_VECTOR node.
   12637   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems);
   12638   assert(VecVT.getSizeInBits() == VT.getSizeInBits() &&
   12639          "Invalid vector size");
   12640   // Check if the new vector type is legal.
   12641   if (!isTypeLegal(VecVT)) return SDValue();
   12642 
   12643   // Make the new BUILD_VECTOR.
   12644   SDValue BV = DAG.getBuildVector(VecVT, dl, Ops);
   12645 
   12646   // The new BUILD_VECTOR node has the potential to be further optimized.
   12647   AddToWorklist(BV.getNode());
   12648   // Bitcast to the desired type.
   12649   return DAG.getBitcast(VT, BV);
   12650 }
   12651 
   12652 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) {
   12653   EVT VT = N->getValueType(0);
   12654 
   12655   unsigned NumInScalars = N->getNumOperands();
   12656   SDLoc dl(N);
   12657 
   12658   EVT SrcVT = MVT::Other;
   12659   unsigned Opcode = ISD::DELETED_NODE;
   12660   unsigned NumDefs = 0;
   12661 
   12662   for (unsigned i = 0; i != NumInScalars; ++i) {
   12663     SDValue In = N->getOperand(i);
   12664     unsigned Opc = In.getOpcode();
   12665 
   12666     if (Opc == ISD::UNDEF)
   12667       continue;
   12668 
   12669     // If all scalar values are floats and converted from integers.
   12670     if (Opcode == ISD::DELETED_NODE &&
   12671         (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) {
   12672       Opcode = Opc;
   12673     }
   12674 
   12675     if (Opc != Opcode)
   12676       return SDValue();
   12677 
   12678     EVT InVT = In.getOperand(0).getValueType();
   12679 
   12680     // If all scalar values are typed differently, bail out. It's chosen to
   12681     // simplify BUILD_VECTOR of integer types.
   12682     if (SrcVT == MVT::Other)
   12683       SrcVT = InVT;
   12684     if (SrcVT != InVT)
   12685       return SDValue();
   12686     NumDefs++;
   12687   }
   12688 
   12689   // If the vector has just one element defined, it's not worth to fold it into
   12690   // a vectorized one.
   12691   if (NumDefs < 2)
   12692     return SDValue();
   12693 
   12694   assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP)
   12695          && "Should only handle conversion from integer to float.");
   12696   assert(SrcVT != MVT::Other && "Cannot determine source type!");
   12697 
   12698   EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars);
   12699 
   12700   if (!TLI.isOperationLegalOrCustom(Opcode, NVT))
   12701     return SDValue();
   12702 
   12703   // Just because the floating-point vector type is legal does not necessarily
   12704   // mean that the corresponding integer vector type is.
   12705   if (!isTypeLegal(NVT))
   12706     return SDValue();
   12707 
   12708   SmallVector<SDValue, 8> Opnds;
   12709   for (unsigned i = 0; i != NumInScalars; ++i) {
   12710     SDValue In = N->getOperand(i);
   12711 
   12712     if (In.isUndef())
   12713       Opnds.push_back(DAG.getUNDEF(SrcVT));
   12714     else
   12715       Opnds.push_back(In.getOperand(0));
   12716   }
   12717   SDValue BV = DAG.getBuildVector(NVT, dl, Opnds);
   12718   AddToWorklist(BV.getNode());
   12719 
   12720   return DAG.getNode(Opcode, dl, VT, BV);
   12721 }
   12722 
   12723 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) {
   12724   unsigned NumInScalars = N->getNumOperands();
   12725   SDLoc dl(N);
   12726   EVT VT = N->getValueType(0);
   12727 
   12728   // A vector built entirely of undefs is undef.
   12729   if (ISD::allOperandsUndef(N))
   12730     return DAG.getUNDEF(VT);
   12731 
   12732   if (SDValue V = reduceBuildVecExtToExtBuildVec(N))
   12733     return V;
   12734 
   12735   if (SDValue V = reduceBuildVecConvertToConvertBuildVec(N))
   12736     return V;
   12737 
   12738   // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT
   12739   // operations.  If so, and if the EXTRACT_VECTOR_ELT vector inputs come from
   12740   // at most two distinct vectors, turn this into a shuffle node.
   12741 
   12742   // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes.
   12743   if (!isTypeLegal(VT))
   12744     return SDValue();
   12745 
   12746   // May only combine to shuffle after legalize if shuffle is legal.
   12747   if (LegalOperations && !TLI.isOperationLegal(ISD::VECTOR_SHUFFLE, VT))
   12748     return SDValue();
   12749 
   12750   SDValue VecIn1, VecIn2;
   12751   bool UsesZeroVector = false;
   12752   for (unsigned i = 0; i != NumInScalars; ++i) {
   12753     SDValue Op = N->getOperand(i);
   12754     // Ignore undef inputs.
   12755     if (Op.isUndef()) continue;
   12756 
   12757     // See if we can combine this build_vector into a blend with a zero vector.
   12758     if (!VecIn2.getNode() && (isNullConstant(Op) || isNullFPConstant(Op))) {
   12759       UsesZeroVector = true;
   12760       continue;
   12761     }
   12762 
   12763     // If this input is something other than a EXTRACT_VECTOR_ELT with a
   12764     // constant index, bail out.
   12765     if (Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT ||
   12766         !isa<ConstantSDNode>(Op.getOperand(1))) {
   12767       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   12768       break;
   12769     }
   12770 
   12771     // We allow up to two distinct input vectors.
   12772     SDValue ExtractedFromVec = Op.getOperand(0);
   12773     if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2)
   12774       continue;
   12775 
   12776     if (!VecIn1.getNode()) {
   12777       VecIn1 = ExtractedFromVec;
   12778     } else if (!VecIn2.getNode() && !UsesZeroVector) {
   12779       VecIn2 = ExtractedFromVec;
   12780     } else {
   12781       // Too many inputs.
   12782       VecIn1 = VecIn2 = SDValue(nullptr, 0);
   12783       break;
   12784     }
   12785   }
   12786 
   12787   // If everything is good, we can make a shuffle operation.
   12788   if (VecIn1.getNode()) {
   12789     unsigned InNumElements = VecIn1.getValueType().getVectorNumElements();
   12790     SmallVector<int, 8> Mask;
   12791     for (unsigned i = 0; i != NumInScalars; ++i) {
   12792       unsigned Opcode = N->getOperand(i).getOpcode();
   12793       if (Opcode == ISD::UNDEF) {
   12794         Mask.push_back(-1);
   12795         continue;
   12796       }
   12797 
   12798       // Operands can also be zero.
   12799       if (Opcode != ISD::EXTRACT_VECTOR_ELT) {
   12800         assert(UsesZeroVector &&
   12801                (Opcode == ISD::Constant || Opcode == ISD::ConstantFP) &&
   12802                "Unexpected node found!");
   12803         Mask.push_back(NumInScalars+i);
   12804         continue;
   12805       }
   12806 
   12807       // If extracting from the first vector, just use the index directly.
   12808       SDValue Extract = N->getOperand(i);
   12809       SDValue ExtVal = Extract.getOperand(1);
   12810       unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue();
   12811       if (Extract.getOperand(0) == VecIn1) {
   12812         Mask.push_back(ExtIndex);
   12813         continue;
   12814       }
   12815 
   12816       // Otherwise, use InIdx + InputVecSize
   12817       Mask.push_back(InNumElements + ExtIndex);
   12818     }
   12819 
   12820     // Avoid introducing illegal shuffles with zero.
   12821     if (UsesZeroVector && !TLI.isVectorClearMaskLegal(Mask, VT))
   12822       return SDValue();
   12823 
   12824     // We can't generate a shuffle node with mismatched input and output types.
   12825     // Attempt to transform a single input vector to the correct type.
   12826     if ((VT != VecIn1.getValueType())) {
   12827       // If the input vector type has a different base type to the output
   12828       // vector type, bail out.
   12829       EVT VTElemType = VT.getVectorElementType();
   12830       if ((VecIn1.getValueType().getVectorElementType() != VTElemType) ||
   12831           (VecIn2.getNode() &&
   12832            (VecIn2.getValueType().getVectorElementType() != VTElemType)))
   12833         return SDValue();
   12834 
   12835       // If the input vector is too small, widen it.
   12836       // We only support widening of vectors which are half the size of the
   12837       // output registers. For example XMM->YMM widening on X86 with AVX.
   12838       EVT VecInT = VecIn1.getValueType();
   12839       if (VecInT.getSizeInBits() * 2 == VT.getSizeInBits()) {
   12840         // If we only have one small input, widen it by adding undef values.
   12841         if (!VecIn2.getNode())
   12842           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1,
   12843                                DAG.getUNDEF(VecIn1.getValueType()));
   12844         else if (VecIn1.getValueType() == VecIn2.getValueType()) {
   12845           // If we have two small inputs of the same type, try to concat them.
   12846           VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, VecIn1, VecIn2);
   12847           VecIn2 = SDValue(nullptr, 0);
   12848         } else
   12849           return SDValue();
   12850       } else if (VecInT.getSizeInBits() == VT.getSizeInBits() * 2) {
   12851         // If the input vector is too large, try to split it.
   12852         // We don't support having two input vectors that are too large.
   12853         // If the zero vector was used, we can not split the vector,
   12854         // since we'd need 3 inputs.
   12855         if (UsesZeroVector || VecIn2.getNode())
   12856           return SDValue();
   12857 
   12858         if (!TLI.isExtractSubvectorCheap(VT, VT.getVectorNumElements()))
   12859           return SDValue();
   12860 
   12861         // Try to replace VecIn1 with two extract_subvectors
   12862         // No need to update the masks, they should still be correct.
   12863         VecIn2 = DAG.getNode(
   12864             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
   12865             DAG.getConstant(VT.getVectorNumElements(), dl,
   12866                             TLI.getVectorIdxTy(DAG.getDataLayout())));
   12867         VecIn1 = DAG.getNode(
   12868             ISD::EXTRACT_SUBVECTOR, dl, VT, VecIn1,
   12869             DAG.getConstant(0, dl, TLI.getVectorIdxTy(DAG.getDataLayout())));
   12870       } else
   12871         return SDValue();
   12872     }
   12873 
   12874     if (UsesZeroVector)
   12875       VecIn2 = VT.isInteger() ? DAG.getConstant(0, dl, VT) :
   12876                                 DAG.getConstantFP(0.0, dl, VT);
   12877     else
   12878       // If VecIn2 is unused then change it to undef.
   12879       VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT);
   12880 
   12881     // Check that we were able to transform all incoming values to the same
   12882     // type.
   12883     if (VecIn2.getValueType() != VecIn1.getValueType() ||
   12884         VecIn1.getValueType() != VT)
   12885           return SDValue();
   12886 
   12887     // Return the new VECTOR_SHUFFLE node.
   12888     SDValue Ops[2];
   12889     Ops[0] = VecIn1;
   12890     Ops[1] = VecIn2;
   12891     return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], Mask);
   12892   }
   12893 
   12894   return SDValue();
   12895 }
   12896 
   12897 static SDValue combineConcatVectorOfScalars(SDNode *N, SelectionDAG &DAG) {
   12898   const TargetLowering &TLI = DAG.getTargetLoweringInfo();
   12899   EVT OpVT = N->getOperand(0).getValueType();
   12900 
   12901   // If the operands are legal vectors, leave them alone.
   12902   if (TLI.isTypeLegal(OpVT))
   12903     return SDValue();
   12904 
   12905   SDLoc DL(N);
   12906   EVT VT = N->getValueType(0);
   12907   SmallVector<SDValue, 8> Ops;
   12908 
   12909   EVT SVT = EVT::getIntegerVT(*DAG.getContext(), OpVT.getSizeInBits());
   12910   SDValue ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   12911 
   12912   // Keep track of what we encounter.
   12913   bool AnyInteger = false;
   12914   bool AnyFP = false;
   12915   for (const SDValue &Op : N->ops()) {
   12916     if (ISD::BITCAST == Op.getOpcode() &&
   12917         !Op.getOperand(0).getValueType().isVector())
   12918       Ops.push_back(Op.getOperand(0));
   12919     else if (ISD::UNDEF == Op.getOpcode())
   12920       Ops.push_back(ScalarUndef);
   12921     else
   12922       return SDValue();
   12923 
   12924     // Note whether we encounter an integer or floating point scalar.
   12925     // If it's neither, bail out, it could be something weird like x86mmx.
   12926     EVT LastOpVT = Ops.back().getValueType();
   12927     if (LastOpVT.isFloatingPoint())
   12928       AnyFP = true;
   12929     else if (LastOpVT.isInteger())
   12930       AnyInteger = true;
   12931     else
   12932       return SDValue();
   12933   }
   12934 
   12935   // If any of the operands is a floating point scalar bitcast to a vector,
   12936   // use floating point types throughout, and bitcast everything.
   12937   // Replace UNDEFs by another scalar UNDEF node, of the final desired type.
   12938   if (AnyFP) {
   12939     SVT = EVT::getFloatingPointVT(OpVT.getSizeInBits());
   12940     ScalarUndef = DAG.getNode(ISD::UNDEF, DL, SVT);
   12941     if (AnyInteger) {
   12942       for (SDValue &Op : Ops) {
   12943         if (Op.getValueType() == SVT)
   12944           continue;
   12945         if (Op.isUndef())
   12946           Op = ScalarUndef;
   12947         else
   12948           Op = DAG.getBitcast(SVT, Op);
   12949       }
   12950     }
   12951   }
   12952 
   12953   EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SVT,
   12954                                VT.getSizeInBits() / SVT.getSizeInBits());
   12955   return DAG.getBitcast(VT, DAG.getBuildVector(VecVT, DL, Ops));
   12956 }
   12957 
   12958 // Check to see if this is a CONCAT_VECTORS of a bunch of EXTRACT_SUBVECTOR
   12959 // operations. If so, and if the EXTRACT_SUBVECTOR vector inputs come from at
   12960 // most two distinct vectors the same size as the result, attempt to turn this
   12961 // into a legal shuffle.
   12962 static SDValue combineConcatVectorOfExtracts(SDNode *N, SelectionDAG &DAG) {
   12963   EVT VT = N->getValueType(0);
   12964   EVT OpVT = N->getOperand(0).getValueType();
   12965   int NumElts = VT.getVectorNumElements();
   12966   int NumOpElts = OpVT.getVectorNumElements();
   12967 
   12968   SDValue SV0 = DAG.getUNDEF(VT), SV1 = DAG.getUNDEF(VT);
   12969   SmallVector<int, 8> Mask;
   12970 
   12971   for (SDValue Op : N->ops()) {
   12972     // Peek through any bitcast.
   12973     while (Op.getOpcode() == ISD::BITCAST)
   12974       Op = Op.getOperand(0);
   12975 
   12976     // UNDEF nodes convert to UNDEF shuffle mask values.
   12977     if (Op.isUndef()) {
   12978       Mask.append((unsigned)NumOpElts, -1);
   12979       continue;
   12980     }
   12981 
   12982     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   12983       return SDValue();
   12984 
   12985     // What vector are we extracting the subvector from and at what index?
   12986     SDValue ExtVec = Op.getOperand(0);
   12987 
   12988     // We want the EVT of the original extraction to correctly scale the
   12989     // extraction index.
   12990     EVT ExtVT = ExtVec.getValueType();
   12991 
   12992     // Peek through any bitcast.
   12993     while (ExtVec.getOpcode() == ISD::BITCAST)
   12994       ExtVec = ExtVec.getOperand(0);
   12995 
   12996     // UNDEF nodes convert to UNDEF shuffle mask values.
   12997     if (ExtVec.isUndef()) {
   12998       Mask.append((unsigned)NumOpElts, -1);
   12999       continue;
   13000     }
   13001 
   13002     if (!isa<ConstantSDNode>(Op.getOperand(1)))
   13003       return SDValue();
   13004     int ExtIdx = cast<ConstantSDNode>(Op.getOperand(1))->getZExtValue();
   13005 
   13006     // Ensure that we are extracting a subvector from a vector the same
   13007     // size as the result.
   13008     if (ExtVT.getSizeInBits() != VT.getSizeInBits())
   13009       return SDValue();
   13010 
   13011     // Scale the subvector index to account for any bitcast.
   13012     int NumExtElts = ExtVT.getVectorNumElements();
   13013     if (0 == (NumExtElts % NumElts))
   13014       ExtIdx /= (NumExtElts / NumElts);
   13015     else if (0 == (NumElts % NumExtElts))
   13016       ExtIdx *= (NumElts / NumExtElts);
   13017     else
   13018       return SDValue();
   13019 
   13020     // At most we can reference 2 inputs in the final shuffle.
   13021     if (SV0.isUndef() || SV0 == ExtVec) {
   13022       SV0 = ExtVec;
   13023       for (int i = 0; i != NumOpElts; ++i)
   13024         Mask.push_back(i + ExtIdx);
   13025     } else if (SV1.isUndef() || SV1 == ExtVec) {
   13026       SV1 = ExtVec;
   13027       for (int i = 0; i != NumOpElts; ++i)
   13028         Mask.push_back(i + ExtIdx + NumElts);
   13029     } else {
   13030       return SDValue();
   13031     }
   13032   }
   13033 
   13034   if (!DAG.getTargetLoweringInfo().isShuffleMaskLegal(Mask, VT))
   13035     return SDValue();
   13036 
   13037   return DAG.getVectorShuffle(VT, SDLoc(N), DAG.getBitcast(VT, SV0),
   13038                               DAG.getBitcast(VT, SV1), Mask);
   13039 }
   13040 
   13041 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) {
   13042   // If we only have one input vector, we don't need to do any concatenation.
   13043   if (N->getNumOperands() == 1)
   13044     return N->getOperand(0);
   13045 
   13046   // Check if all of the operands are undefs.
   13047   EVT VT = N->getValueType(0);
   13048   if (ISD::allOperandsUndef(N))
   13049     return DAG.getUNDEF(VT);
   13050 
   13051   // Optimize concat_vectors where all but the first of the vectors are undef.
   13052   if (std::all_of(std::next(N->op_begin()), N->op_end(), [](const SDValue &Op) {
   13053         return Op.isUndef();
   13054       })) {
   13055     SDValue In = N->getOperand(0);
   13056     assert(In.getValueType().isVector() && "Must concat vectors");
   13057 
   13058     // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr).
   13059     if (In->getOpcode() == ISD::BITCAST &&
   13060         !In->getOperand(0)->getValueType(0).isVector()) {
   13061       SDValue Scalar = In->getOperand(0);
   13062 
   13063       // If the bitcast type isn't legal, it might be a trunc of a legal type;
   13064       // look through the trunc so we can still do the transform:
   13065       //   concat_vectors(trunc(scalar), undef) -> scalar_to_vector(scalar)
   13066       if (Scalar->getOpcode() == ISD::TRUNCATE &&
   13067           !TLI.isTypeLegal(Scalar.getValueType()) &&
   13068           TLI.isTypeLegal(Scalar->getOperand(0).getValueType()))
   13069         Scalar = Scalar->getOperand(0);
   13070 
   13071       EVT SclTy = Scalar->getValueType(0);
   13072 
   13073       if (!SclTy.isFloatingPoint() && !SclTy.isInteger())
   13074         return SDValue();
   13075 
   13076       EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy,
   13077                                  VT.getSizeInBits() / SclTy.getSizeInBits());
   13078       if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType()))
   13079         return SDValue();
   13080 
   13081       SDLoc dl = SDLoc(N);
   13082       SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar);
   13083       return DAG.getBitcast(VT, Res);
   13084     }
   13085   }
   13086 
   13087   // Fold any combination of BUILD_VECTOR or UNDEF nodes into one BUILD_VECTOR.
   13088   // We have already tested above for an UNDEF only concatenation.
   13089   // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...))
   13090   // -> (BUILD_VECTOR A, B, ..., C, D, ...)
   13091   auto IsBuildVectorOrUndef = [](const SDValue &Op) {
   13092     return ISD::UNDEF == Op.getOpcode() || ISD::BUILD_VECTOR == Op.getOpcode();
   13093   };
   13094   if (llvm::all_of(N->ops(), IsBuildVectorOrUndef)) {
   13095     SmallVector<SDValue, 8> Opnds;
   13096     EVT SVT = VT.getScalarType();
   13097 
   13098     EVT MinVT = SVT;
   13099     if (!SVT.isFloatingPoint()) {
   13100       // If BUILD_VECTOR are from built from integer, they may have different
   13101       // operand types. Get the smallest type and truncate all operands to it.
   13102       bool FoundMinVT = false;
   13103       for (const SDValue &Op : N->ops())
   13104         if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   13105           EVT OpSVT = Op.getOperand(0)->getValueType(0);
   13106           MinVT = (!FoundMinVT || OpSVT.bitsLE(MinVT)) ? OpSVT : MinVT;
   13107           FoundMinVT = true;
   13108         }
   13109       assert(FoundMinVT && "Concat vector type mismatch");
   13110     }
   13111 
   13112     for (const SDValue &Op : N->ops()) {
   13113       EVT OpVT = Op.getValueType();
   13114       unsigned NumElts = OpVT.getVectorNumElements();
   13115 
   13116       if (ISD::UNDEF == Op.getOpcode())
   13117         Opnds.append(NumElts, DAG.getUNDEF(MinVT));
   13118 
   13119       if (ISD::BUILD_VECTOR == Op.getOpcode()) {
   13120         if (SVT.isFloatingPoint()) {
   13121           assert(SVT == OpVT.getScalarType() && "Concat vector type mismatch");
   13122           Opnds.append(Op->op_begin(), Op->op_begin() + NumElts);
   13123         } else {
   13124           for (unsigned i = 0; i != NumElts; ++i)
   13125             Opnds.push_back(
   13126                 DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinVT, Op.getOperand(i)));
   13127         }
   13128       }
   13129     }
   13130 
   13131     assert(VT.getVectorNumElements() == Opnds.size() &&
   13132            "Concat vector type mismatch");
   13133     return DAG.getBuildVector(VT, SDLoc(N), Opnds);
   13134   }
   13135 
   13136   // Fold CONCAT_VECTORS of only bitcast scalars (or undef) to BUILD_VECTOR.
   13137   if (SDValue V = combineConcatVectorOfScalars(N, DAG))
   13138     return V;
   13139 
   13140   // Fold CONCAT_VECTORS of EXTRACT_SUBVECTOR (or undef) to VECTOR_SHUFFLE.
   13141   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT))
   13142     if (SDValue V = combineConcatVectorOfExtracts(N, DAG))
   13143       return V;
   13144 
   13145   // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR
   13146   // nodes often generate nop CONCAT_VECTOR nodes.
   13147   // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that
   13148   // place the incoming vectors at the exact same location.
   13149   SDValue SingleSource = SDValue();
   13150   unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements();
   13151 
   13152   for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) {
   13153     SDValue Op = N->getOperand(i);
   13154 
   13155     if (Op.isUndef())
   13156       continue;
   13157 
   13158     // Check if this is the identity extract:
   13159     if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR)
   13160       return SDValue();
   13161 
   13162     // Find the single incoming vector for the extract_subvector.
   13163     if (SingleSource.getNode()) {
   13164       if (Op.getOperand(0) != SingleSource)
   13165         return SDValue();
   13166     } else {
   13167       SingleSource = Op.getOperand(0);
   13168 
   13169       // Check the source type is the same as the type of the result.
   13170       // If not, this concat may extend the vector, so we can not
   13171       // optimize it away.
   13172       if (SingleSource.getValueType() != N->getValueType(0))
   13173         return SDValue();
   13174     }
   13175 
   13176     unsigned IdentityIndex = i * PartNumElem;
   13177     ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1));
   13178     // The extract index must be constant.
   13179     if (!CS)
   13180       return SDValue();
   13181 
   13182     // Check that we are reading from the identity index.
   13183     if (CS->getZExtValue() != IdentityIndex)
   13184       return SDValue();
   13185   }
   13186 
   13187   if (SingleSource.getNode())
   13188     return SingleSource;
   13189 
   13190   return SDValue();
   13191 }
   13192 
   13193 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) {
   13194   EVT NVT = N->getValueType(0);
   13195   SDValue V = N->getOperand(0);
   13196 
   13197   if (V->getOpcode() == ISD::CONCAT_VECTORS) {
   13198     // Combine:
   13199     //    (extract_subvec (concat V1, V2, ...), i)
   13200     // Into:
   13201     //    Vi if possible
   13202     // Only operand 0 is checked as 'concat' assumes all inputs of the same
   13203     // type.
   13204     if (V->getOperand(0).getValueType() != NVT)
   13205       return SDValue();
   13206     unsigned Idx = N->getConstantOperandVal(1);
   13207     unsigned NumElems = NVT.getVectorNumElements();
   13208     assert((Idx % NumElems) == 0 &&
   13209            "IDX in concat is not a multiple of the result vector length.");
   13210     return V->getOperand(Idx / NumElems);
   13211   }
   13212 
   13213   // Skip bitcasting
   13214   if (V->getOpcode() == ISD::BITCAST)
   13215     V = V.getOperand(0);
   13216 
   13217   if (V->getOpcode() == ISD::INSERT_SUBVECTOR) {
   13218     SDLoc dl(N);
   13219     // Handle only simple case where vector being inserted and vector
   13220     // being extracted are of same type, and are half size of larger vectors.
   13221     EVT BigVT = V->getOperand(0).getValueType();
   13222     EVT SmallVT = V->getOperand(1).getValueType();
   13223     if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits())
   13224       return SDValue();
   13225 
   13226     // Only handle cases where both indexes are constants with the same type.
   13227     ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1));
   13228     ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2));
   13229 
   13230     if (InsIdx && ExtIdx &&
   13231         InsIdx->getValueType(0).getSizeInBits() <= 64 &&
   13232         ExtIdx->getValueType(0).getSizeInBits() <= 64) {
   13233       // Combine:
   13234       //    (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx)
   13235       // Into:
   13236       //    indices are equal or bit offsets are equal => V1
   13237       //    otherwise => (extract_subvec V1, ExtIdx)
   13238       if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() ==
   13239           ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits())
   13240         return DAG.getBitcast(NVT, V->getOperand(1));
   13241       return DAG.getNode(
   13242           ISD::EXTRACT_SUBVECTOR, dl, NVT,
   13243           DAG.getBitcast(N->getOperand(0).getValueType(), V->getOperand(0)),
   13244           N->getOperand(1));
   13245     }
   13246   }
   13247 
   13248   return SDValue();
   13249 }
   13250 
   13251 static SDValue simplifyShuffleOperandRecursively(SmallBitVector &UsedElements,
   13252                                                  SDValue V, SelectionDAG &DAG) {
   13253   SDLoc DL(V);
   13254   EVT VT = V.getValueType();
   13255 
   13256   switch (V.getOpcode()) {
   13257   default:
   13258     return V;
   13259 
   13260   case ISD::CONCAT_VECTORS: {
   13261     EVT OpVT = V->getOperand(0).getValueType();
   13262     int OpSize = OpVT.getVectorNumElements();
   13263     SmallBitVector OpUsedElements(OpSize, false);
   13264     bool FoundSimplification = false;
   13265     SmallVector<SDValue, 4> NewOps;
   13266     NewOps.reserve(V->getNumOperands());
   13267     for (int i = 0, NumOps = V->getNumOperands(); i < NumOps; ++i) {
   13268       SDValue Op = V->getOperand(i);
   13269       bool OpUsed = false;
   13270       for (int j = 0; j < OpSize; ++j)
   13271         if (UsedElements[i * OpSize + j]) {
   13272           OpUsedElements[j] = true;
   13273           OpUsed = true;
   13274         }
   13275       NewOps.push_back(
   13276           OpUsed ? simplifyShuffleOperandRecursively(OpUsedElements, Op, DAG)
   13277                  : DAG.getUNDEF(OpVT));
   13278       FoundSimplification |= Op == NewOps.back();
   13279       OpUsedElements.reset();
   13280     }
   13281     if (FoundSimplification)
   13282       V = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, NewOps);
   13283     return V;
   13284   }
   13285 
   13286   case ISD::INSERT_SUBVECTOR: {
   13287     SDValue BaseV = V->getOperand(0);
   13288     SDValue SubV = V->getOperand(1);
   13289     auto *IdxN = dyn_cast<ConstantSDNode>(V->getOperand(2));
   13290     if (!IdxN)
   13291       return V;
   13292 
   13293     int SubSize = SubV.getValueType().getVectorNumElements();
   13294     int Idx = IdxN->getZExtValue();
   13295     bool SubVectorUsed = false;
   13296     SmallBitVector SubUsedElements(SubSize, false);
   13297     for (int i = 0; i < SubSize; ++i)
   13298       if (UsedElements[i + Idx]) {
   13299         SubVectorUsed = true;
   13300         SubUsedElements[i] = true;
   13301         UsedElements[i + Idx] = false;
   13302       }
   13303 
   13304     // Now recurse on both the base and sub vectors.
   13305     SDValue SimplifiedSubV =
   13306         SubVectorUsed
   13307             ? simplifyShuffleOperandRecursively(SubUsedElements, SubV, DAG)
   13308             : DAG.getUNDEF(SubV.getValueType());
   13309     SDValue SimplifiedBaseV = simplifyShuffleOperandRecursively(UsedElements, BaseV, DAG);
   13310     if (SimplifiedSubV != SubV || SimplifiedBaseV != BaseV)
   13311       V = DAG.getNode(ISD::INSERT_SUBVECTOR, DL, VT,
   13312                       SimplifiedBaseV, SimplifiedSubV, V->getOperand(2));
   13313     return V;
   13314   }
   13315   }
   13316 }
   13317 
   13318 static SDValue simplifyShuffleOperands(ShuffleVectorSDNode *SVN, SDValue N0,
   13319                                        SDValue N1, SelectionDAG &DAG) {
   13320   EVT VT = SVN->getValueType(0);
   13321   int NumElts = VT.getVectorNumElements();
   13322   SmallBitVector N0UsedElements(NumElts, false), N1UsedElements(NumElts, false);
   13323   for (int M : SVN->getMask())
   13324     if (M >= 0 && M < NumElts)
   13325       N0UsedElements[M] = true;
   13326     else if (M >= NumElts)
   13327       N1UsedElements[M - NumElts] = true;
   13328 
   13329   SDValue S0 = simplifyShuffleOperandRecursively(N0UsedElements, N0, DAG);
   13330   SDValue S1 = simplifyShuffleOperandRecursively(N1UsedElements, N1, DAG);
   13331   if (S0 == N0 && S1 == N1)
   13332     return SDValue();
   13333 
   13334   return DAG.getVectorShuffle(VT, SDLoc(SVN), S0, S1, SVN->getMask());
   13335 }
   13336 
   13337 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat,
   13338 // or turn a shuffle of a single concat into simpler shuffle then concat.
   13339 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) {
   13340   EVT VT = N->getValueType(0);
   13341   unsigned NumElts = VT.getVectorNumElements();
   13342 
   13343   SDValue N0 = N->getOperand(0);
   13344   SDValue N1 = N->getOperand(1);
   13345   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   13346 
   13347   SmallVector<SDValue, 4> Ops;
   13348   EVT ConcatVT = N0.getOperand(0).getValueType();
   13349   unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements();
   13350   unsigned NumConcats = NumElts / NumElemsPerConcat;
   13351 
   13352   // Special case: shuffle(concat(A,B)) can be more efficiently represented
   13353   // as concat(shuffle(A,B),UNDEF) if the shuffle doesn't set any of the high
   13354   // half vector elements.
   13355   if (NumElemsPerConcat * 2 == NumElts && N1.isUndef() &&
   13356       std::all_of(SVN->getMask().begin() + NumElemsPerConcat,
   13357                   SVN->getMask().end(), [](int i) { return i == -1; })) {
   13358     N0 = DAG.getVectorShuffle(ConcatVT, SDLoc(N), N0.getOperand(0), N0.getOperand(1),
   13359                               makeArrayRef(SVN->getMask().begin(), NumElemsPerConcat));
   13360     N1 = DAG.getUNDEF(ConcatVT);
   13361     return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0, N1);
   13362   }
   13363 
   13364   // Look at every vector that's inserted. We're looking for exact
   13365   // subvector-sized copies from a concatenated vector
   13366   for (unsigned I = 0; I != NumConcats; ++I) {
   13367     // Make sure we're dealing with a copy.
   13368     unsigned Begin = I * NumElemsPerConcat;
   13369     bool AllUndef = true, NoUndef = true;
   13370     for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) {
   13371       if (SVN->getMaskElt(J) >= 0)
   13372         AllUndef = false;
   13373       else
   13374         NoUndef = false;
   13375     }
   13376 
   13377     if (NoUndef) {
   13378       if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0)
   13379         return SDValue();
   13380 
   13381       for (unsigned J = 1; J != NumElemsPerConcat; ++J)
   13382         if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J))
   13383           return SDValue();
   13384 
   13385       unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat;
   13386       if (FirstElt < N0.getNumOperands())
   13387         Ops.push_back(N0.getOperand(FirstElt));
   13388       else
   13389         Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands()));
   13390 
   13391     } else if (AllUndef) {
   13392       Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType()));
   13393     } else { // Mixed with general masks and undefs, can't do optimization.
   13394       return SDValue();
   13395     }
   13396   }
   13397 
   13398   return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops);
   13399 }
   13400 
   13401 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) {
   13402   EVT VT = N->getValueType(0);
   13403   unsigned NumElts = VT.getVectorNumElements();
   13404 
   13405   SDValue N0 = N->getOperand(0);
   13406   SDValue N1 = N->getOperand(1);
   13407 
   13408   assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG");
   13409 
   13410   // Canonicalize shuffle undef, undef -> undef
   13411   if (N0.isUndef() && N1.isUndef())
   13412     return DAG.getUNDEF(VT);
   13413 
   13414   ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N);
   13415 
   13416   // Canonicalize shuffle v, v -> v, undef
   13417   if (N0 == N1) {
   13418     SmallVector<int, 8> NewMask;
   13419     for (unsigned i = 0; i != NumElts; ++i) {
   13420       int Idx = SVN->getMaskElt(i);
   13421       if (Idx >= (int)NumElts) Idx -= NumElts;
   13422       NewMask.push_back(Idx);
   13423     }
   13424     return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), NewMask);
   13425   }
   13426 
   13427   // Canonicalize shuffle undef, v -> v, undef.  Commute the shuffle mask.
   13428   if (N0.isUndef())
   13429     return DAG.getCommutedVectorShuffle(*SVN);
   13430 
   13431   // Remove references to rhs if it is undef
   13432   if (N1.isUndef()) {
   13433     bool Changed = false;
   13434     SmallVector<int, 8> NewMask;
   13435     for (unsigned i = 0; i != NumElts; ++i) {
   13436       int Idx = SVN->getMaskElt(i);
   13437       if (Idx >= (int)NumElts) {
   13438         Idx = -1;
   13439         Changed = true;
   13440       }
   13441       NewMask.push_back(Idx);
   13442     }
   13443     if (Changed)
   13444       return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, NewMask);
   13445   }
   13446 
   13447   // If it is a splat, check if the argument vector is another splat or a
   13448   // build_vector.
   13449   if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) {
   13450     SDNode *V = N0.getNode();
   13451 
   13452     // If this is a bit convert that changes the element type of the vector but
   13453     // not the number of vector elements, look through it.  Be careful not to
   13454     // look though conversions that change things like v4f32 to v2f64.
   13455     if (V->getOpcode() == ISD::BITCAST) {
   13456       SDValue ConvInput = V->getOperand(0);
   13457       if (ConvInput.getValueType().isVector() &&
   13458           ConvInput.getValueType().getVectorNumElements() == NumElts)
   13459         V = ConvInput.getNode();
   13460     }
   13461 
   13462     if (V->getOpcode() == ISD::BUILD_VECTOR) {
   13463       assert(V->getNumOperands() == NumElts &&
   13464              "BUILD_VECTOR has wrong number of operands");
   13465       SDValue Base;
   13466       bool AllSame = true;
   13467       for (unsigned i = 0; i != NumElts; ++i) {
   13468         if (!V->getOperand(i).isUndef()) {
   13469           Base = V->getOperand(i);
   13470           break;
   13471         }
   13472       }
   13473       // Splat of <u, u, u, u>, return <u, u, u, u>
   13474       if (!Base.getNode())
   13475         return N0;
   13476       for (unsigned i = 0; i != NumElts; ++i) {
   13477         if (V->getOperand(i) != Base) {
   13478           AllSame = false;
   13479           break;
   13480         }
   13481       }
   13482       // Splat of <x, x, x, x>, return <x, x, x, x>
   13483       if (AllSame)
   13484         return N0;
   13485 
   13486       // Canonicalize any other splat as a build_vector.
   13487       const SDValue &Splatted = V->getOperand(SVN->getSplatIndex());
   13488       SmallVector<SDValue, 8> Ops(NumElts, Splatted);
   13489       SDValue NewBV = DAG.getBuildVector(V->getValueType(0), SDLoc(N), Ops);
   13490 
   13491       // We may have jumped through bitcasts, so the type of the
   13492       // BUILD_VECTOR may not match the type of the shuffle.
   13493       if (V->getValueType(0) != VT)
   13494         NewBV = DAG.getBitcast(VT, NewBV);
   13495       return NewBV;
   13496     }
   13497   }
   13498 
   13499   // There are various patterns used to build up a vector from smaller vectors,
   13500   // subvectors, or elements. Scan chains of these and replace unused insertions
   13501   // or components with undef.
   13502   if (SDValue S = simplifyShuffleOperands(SVN, N0, N1, DAG))
   13503     return S;
   13504 
   13505   if (N0.getOpcode() == ISD::CONCAT_VECTORS &&
   13506       Level < AfterLegalizeVectorOps &&
   13507       (N1.isUndef() ||
   13508       (N1.getOpcode() == ISD::CONCAT_VECTORS &&
   13509        N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) {
   13510     if (SDValue V = partitionShuffleOfConcats(N, DAG))
   13511       return V;
   13512   }
   13513 
   13514   // Attempt to combine a shuffle of 2 inputs of 'scalar sources' -
   13515   // BUILD_VECTOR or SCALAR_TO_VECTOR into a single BUILD_VECTOR.
   13516   if (Level < AfterLegalizeVectorOps && TLI.isTypeLegal(VT)) {
   13517     SmallVector<SDValue, 8> Ops;
   13518     for (int M : SVN->getMask()) {
   13519       SDValue Op = DAG.getUNDEF(VT.getScalarType());
   13520       if (M >= 0) {
   13521         int Idx = M % NumElts;
   13522         SDValue &S = (M < (int)NumElts ? N0 : N1);
   13523         if (S.getOpcode() == ISD::BUILD_VECTOR && S.hasOneUse()) {
   13524           Op = S.getOperand(Idx);
   13525         } else if (S.getOpcode() == ISD::SCALAR_TO_VECTOR && S.hasOneUse()) {
   13526           if (Idx == 0)
   13527             Op = S.getOperand(0);
   13528         } else {
   13529           // Operand can't be combined - bail out.
   13530           break;
   13531         }
   13532       }
   13533       Ops.push_back(Op);
   13534     }
   13535     if (Ops.size() == VT.getVectorNumElements()) {
   13536       // BUILD_VECTOR requires all inputs to be of the same type, find the
   13537       // maximum type and extend them all.
   13538       EVT SVT = VT.getScalarType();
   13539       if (SVT.isInteger())
   13540         for (SDValue &Op : Ops)
   13541           SVT = (SVT.bitsLT(Op.getValueType()) ? Op.getValueType() : SVT);
   13542       if (SVT != VT.getScalarType())
   13543         for (SDValue &Op : Ops)
   13544           Op = TLI.isZExtFree(Op.getValueType(), SVT)
   13545                    ? DAG.getZExtOrTrunc(Op, SDLoc(N), SVT)
   13546                    : DAG.getSExtOrTrunc(Op, SDLoc(N), SVT);
   13547       return DAG.getBuildVector(VT, SDLoc(N), Ops);
   13548     }
   13549   }
   13550 
   13551   // If this shuffle only has a single input that is a bitcasted shuffle,
   13552   // attempt to merge the 2 shuffles and suitably bitcast the inputs/output
   13553   // back to their original types.
   13554   if (N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() &&
   13555       N1.isUndef() && Level < AfterLegalizeVectorOps &&
   13556       TLI.isTypeLegal(VT)) {
   13557 
   13558     // Peek through the bitcast only if there is one user.
   13559     SDValue BC0 = N0;
   13560     while (BC0.getOpcode() == ISD::BITCAST) {
   13561       if (!BC0.hasOneUse())
   13562         break;
   13563       BC0 = BC0.getOperand(0);
   13564     }
   13565 
   13566     auto ScaleShuffleMask = [](ArrayRef<int> Mask, int Scale) {
   13567       if (Scale == 1)
   13568         return SmallVector<int, 8>(Mask.begin(), Mask.end());
   13569 
   13570       SmallVector<int, 8> NewMask;
   13571       for (int M : Mask)
   13572         for (int s = 0; s != Scale; ++s)
   13573           NewMask.push_back(M < 0 ? -1 : Scale * M + s);
   13574       return NewMask;
   13575     };
   13576 
   13577     if (BC0.getOpcode() == ISD::VECTOR_SHUFFLE && BC0.hasOneUse()) {
   13578       EVT SVT = VT.getScalarType();
   13579       EVT InnerVT = BC0->getValueType(0);
   13580       EVT InnerSVT = InnerVT.getScalarType();
   13581 
   13582       // Determine which shuffle works with the smaller scalar type.
   13583       EVT ScaleVT = SVT.bitsLT(InnerSVT) ? VT : InnerVT;
   13584       EVT ScaleSVT = ScaleVT.getScalarType();
   13585 
   13586       if (TLI.isTypeLegal(ScaleVT) &&
   13587           0 == (InnerSVT.getSizeInBits() % ScaleSVT.getSizeInBits()) &&
   13588           0 == (SVT.getSizeInBits() % ScaleSVT.getSizeInBits())) {
   13589 
   13590         int InnerScale = InnerSVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   13591         int OuterScale = SVT.getSizeInBits() / ScaleSVT.getSizeInBits();
   13592 
   13593         // Scale the shuffle masks to the smaller scalar type.
   13594         ShuffleVectorSDNode *InnerSVN = cast<ShuffleVectorSDNode>(BC0);
   13595         SmallVector<int, 8> InnerMask =
   13596             ScaleShuffleMask(InnerSVN->getMask(), InnerScale);
   13597         SmallVector<int, 8> OuterMask =
   13598             ScaleShuffleMask(SVN->getMask(), OuterScale);
   13599 
   13600         // Merge the shuffle masks.
   13601         SmallVector<int, 8> NewMask;
   13602         for (int M : OuterMask)
   13603           NewMask.push_back(M < 0 ? -1 : InnerMask[M]);
   13604 
   13605         // Test for shuffle mask legality over both commutations.
   13606         SDValue SV0 = BC0->getOperand(0);
   13607         SDValue SV1 = BC0->getOperand(1);
   13608         bool LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   13609         if (!LegalMask) {
   13610           std::swap(SV0, SV1);
   13611           ShuffleVectorSDNode::commuteMask(NewMask);
   13612           LegalMask = TLI.isShuffleMaskLegal(NewMask, ScaleVT);
   13613         }
   13614 
   13615         if (LegalMask) {
   13616           SV0 = DAG.getBitcast(ScaleVT, SV0);
   13617           SV1 = DAG.getBitcast(ScaleVT, SV1);
   13618           return DAG.getBitcast(
   13619               VT, DAG.getVectorShuffle(ScaleVT, SDLoc(N), SV0, SV1, NewMask));
   13620         }
   13621       }
   13622     }
   13623   }
   13624 
   13625   // Canonicalize shuffles according to rules:
   13626   //  shuffle(A, shuffle(A, B)) -> shuffle(shuffle(A,B), A)
   13627   //  shuffle(B, shuffle(A, B)) -> shuffle(shuffle(A,B), B)
   13628   //  shuffle(B, shuffle(A, Undef)) -> shuffle(shuffle(A, Undef), B)
   13629   if (N1.getOpcode() == ISD::VECTOR_SHUFFLE &&
   13630       N0.getOpcode() != ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG &&
   13631       TLI.isTypeLegal(VT)) {
   13632     // The incoming shuffle must be of the same type as the result of the
   13633     // current shuffle.
   13634     assert(N1->getOperand(0).getValueType() == VT &&
   13635            "Shuffle types don't match");
   13636 
   13637     SDValue SV0 = N1->getOperand(0);
   13638     SDValue SV1 = N1->getOperand(1);
   13639     bool HasSameOp0 = N0 == SV0;
   13640     bool IsSV1Undef = SV1.isUndef();
   13641     if (HasSameOp0 || IsSV1Undef || N0 == SV1)
   13642       // Commute the operands of this shuffle so that next rule
   13643       // will trigger.
   13644       return DAG.getCommutedVectorShuffle(*SVN);
   13645   }
   13646 
   13647   // Try to fold according to rules:
   13648   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   13649   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   13650   //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   13651   // Don't try to fold shuffles with illegal type.
   13652   // Only fold if this shuffle is the only user of the other shuffle.
   13653   if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && N->isOnlyUserOf(N0.getNode()) &&
   13654       Level < AfterLegalizeDAG && TLI.isTypeLegal(VT)) {
   13655     ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0);
   13656 
   13657     // The incoming shuffle must be of the same type as the result of the
   13658     // current shuffle.
   13659     assert(OtherSV->getOperand(0).getValueType() == VT &&
   13660            "Shuffle types don't match");
   13661 
   13662     SDValue SV0, SV1;
   13663     SmallVector<int, 4> Mask;
   13664     // Compute the combined shuffle mask for a shuffle with SV0 as the first
   13665     // operand, and SV1 as the second operand.
   13666     for (unsigned i = 0; i != NumElts; ++i) {
   13667       int Idx = SVN->getMaskElt(i);
   13668       if (Idx < 0) {
   13669         // Propagate Undef.
   13670         Mask.push_back(Idx);
   13671         continue;
   13672       }
   13673 
   13674       SDValue CurrentVec;
   13675       if (Idx < (int)NumElts) {
   13676         // This shuffle index refers to the inner shuffle N0. Lookup the inner
   13677         // shuffle mask to identify which vector is actually referenced.
   13678         Idx = OtherSV->getMaskElt(Idx);
   13679         if (Idx < 0) {
   13680           // Propagate Undef.
   13681           Mask.push_back(Idx);
   13682           continue;
   13683         }
   13684 
   13685         CurrentVec = (Idx < (int) NumElts) ? OtherSV->getOperand(0)
   13686                                            : OtherSV->getOperand(1);
   13687       } else {
   13688         // This shuffle index references an element within N1.
   13689         CurrentVec = N1;
   13690       }
   13691 
   13692       // Simple case where 'CurrentVec' is UNDEF.
   13693       if (CurrentVec.isUndef()) {
   13694         Mask.push_back(-1);
   13695         continue;
   13696       }
   13697 
   13698       // Canonicalize the shuffle index. We don't know yet if CurrentVec
   13699       // will be the first or second operand of the combined shuffle.
   13700       Idx = Idx % NumElts;
   13701       if (!SV0.getNode() || SV0 == CurrentVec) {
   13702         // Ok. CurrentVec is the left hand side.
   13703         // Update the mask accordingly.
   13704         SV0 = CurrentVec;
   13705         Mask.push_back(Idx);
   13706         continue;
   13707       }
   13708 
   13709       // Bail out if we cannot convert the shuffle pair into a single shuffle.
   13710       if (SV1.getNode() && SV1 != CurrentVec)
   13711         return SDValue();
   13712 
   13713       // Ok. CurrentVec is the right hand side.
   13714       // Update the mask accordingly.
   13715       SV1 = CurrentVec;
   13716       Mask.push_back(Idx + NumElts);
   13717     }
   13718 
   13719     // Check if all indices in Mask are Undef. In case, propagate Undef.
   13720     bool isUndefMask = true;
   13721     for (unsigned i = 0; i != NumElts && isUndefMask; ++i)
   13722       isUndefMask &= Mask[i] < 0;
   13723 
   13724     if (isUndefMask)
   13725       return DAG.getUNDEF(VT);
   13726 
   13727     if (!SV0.getNode())
   13728       SV0 = DAG.getUNDEF(VT);
   13729     if (!SV1.getNode())
   13730       SV1 = DAG.getUNDEF(VT);
   13731 
   13732     // Avoid introducing shuffles with illegal mask.
   13733     if (!TLI.isShuffleMaskLegal(Mask, VT)) {
   13734       ShuffleVectorSDNode::commuteMask(Mask);
   13735 
   13736       if (!TLI.isShuffleMaskLegal(Mask, VT))
   13737         return SDValue();
   13738 
   13739       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, A, M2)
   13740       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, A, M2)
   13741       //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(C, B, M2)
   13742       std::swap(SV0, SV1);
   13743     }
   13744 
   13745     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, B, M2)
   13746     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(A, C, M2)
   13747     //   shuffle(shuffle(A, B, M0), C, M1) -> shuffle(B, C, M2)
   13748     return DAG.getVectorShuffle(VT, SDLoc(N), SV0, SV1, Mask);
   13749   }
   13750 
   13751   return SDValue();
   13752 }
   13753 
   13754 SDValue DAGCombiner::visitSCALAR_TO_VECTOR(SDNode *N) {
   13755   SDValue InVal = N->getOperand(0);
   13756   EVT VT = N->getValueType(0);
   13757 
   13758   // Replace a SCALAR_TO_VECTOR(EXTRACT_VECTOR_ELT(V,C0)) pattern
   13759   // with a VECTOR_SHUFFLE.
   13760   if (InVal.getOpcode() == ISD::EXTRACT_VECTOR_ELT) {
   13761     SDValue InVec = InVal->getOperand(0);
   13762     SDValue EltNo = InVal->getOperand(1);
   13763 
   13764     // FIXME: We could support implicit truncation if the shuffle can be
   13765     // scaled to a smaller vector scalar type.
   13766     ConstantSDNode *C0 = dyn_cast<ConstantSDNode>(EltNo);
   13767     if (C0 && VT == InVec.getValueType() &&
   13768         VT.getScalarType() == InVal.getValueType()) {
   13769       SmallVector<int, 8> NewMask(VT.getVectorNumElements(), -1);
   13770       int Elt = C0->getZExtValue();
   13771       NewMask[0] = Elt;
   13772 
   13773       if (TLI.isShuffleMaskLegal(NewMask, VT))
   13774         return DAG.getVectorShuffle(VT, SDLoc(N), InVec, DAG.getUNDEF(VT),
   13775                                     NewMask);
   13776     }
   13777   }
   13778 
   13779   return SDValue();
   13780 }
   13781 
   13782 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) {
   13783   SDValue N0 = N->getOperand(0);
   13784   SDValue N1 = N->getOperand(1);
   13785   SDValue N2 = N->getOperand(2);
   13786 
   13787   if (N0.getValueType() != N1.getValueType())
   13788     return SDValue();
   13789 
   13790   // If the input vector is a concatenation, and the insert replaces
   13791   // one of the halves, we can optimize into a single concat_vectors.
   13792   if (N0.getOpcode() == ISD::CONCAT_VECTORS && N0->getNumOperands() == 2 &&
   13793       N2.getOpcode() == ISD::Constant) {
   13794     APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue();
   13795     EVT VT = N->getValueType(0);
   13796 
   13797     // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   13798     // (concat_vectors Z, Y)
   13799     if (InsIdx == 0)
   13800       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N1,
   13801                          N0.getOperand(1));
   13802 
   13803     // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) ->
   13804     // (concat_vectors X, Z)
   13805     if (InsIdx == VT.getVectorNumElements() / 2)
   13806       return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, N0.getOperand(0),
   13807                          N1);
   13808   }
   13809 
   13810   return SDValue();
   13811 }
   13812 
   13813 SDValue DAGCombiner::visitFP_TO_FP16(SDNode *N) {
   13814   SDValue N0 = N->getOperand(0);
   13815 
   13816   // fold (fp_to_fp16 (fp16_to_fp op)) -> op
   13817   if (N0->getOpcode() == ISD::FP16_TO_FP)
   13818     return N0->getOperand(0);
   13819 
   13820   return SDValue();
   13821 }
   13822 
   13823 SDValue DAGCombiner::visitFP16_TO_FP(SDNode *N) {
   13824   SDValue N0 = N->getOperand(0);
   13825 
   13826   // fold fp16_to_fp(op & 0xffff) -> fp16_to_fp(op)
   13827   if (N0->getOpcode() == ISD::AND) {
   13828     ConstantSDNode *AndConst = getAsNonOpaqueConstant(N0.getOperand(1));
   13829     if (AndConst && AndConst->getAPIntValue() == 0xffff) {
   13830       return DAG.getNode(ISD::FP16_TO_FP, SDLoc(N), N->getValueType(0),
   13831                          N0.getOperand(0));
   13832     }
   13833   }
   13834 
   13835   return SDValue();
   13836 }
   13837 
   13838 /// Returns a vector_shuffle if it able to transform an AND to a vector_shuffle
   13839 /// with the destination vector and a zero vector.
   13840 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==>
   13841 ///      vector_shuffle V, Zero, <0, 4, 2, 4>
   13842 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) {
   13843   EVT VT = N->getValueType(0);
   13844   SDValue LHS = N->getOperand(0);
   13845   SDValue RHS = N->getOperand(1);
   13846   SDLoc dl(N);
   13847 
   13848   // Make sure we're not running after operation legalization where it
   13849   // may have custom lowered the vector shuffles.
   13850   if (LegalOperations)
   13851     return SDValue();
   13852 
   13853   if (N->getOpcode() != ISD::AND)
   13854     return SDValue();
   13855 
   13856   if (RHS.getOpcode() == ISD::BITCAST)
   13857     RHS = RHS.getOperand(0);
   13858 
   13859   if (RHS.getOpcode() != ISD::BUILD_VECTOR)
   13860     return SDValue();
   13861 
   13862   EVT RVT = RHS.getValueType();
   13863   unsigned NumElts = RHS.getNumOperands();
   13864 
   13865   // Attempt to create a valid clear mask, splitting the mask into
   13866   // sub elements and checking to see if each is
   13867   // all zeros or all ones - suitable for shuffle masking.
   13868   auto BuildClearMask = [&](int Split) {
   13869     int NumSubElts = NumElts * Split;
   13870     int NumSubBits = RVT.getScalarSizeInBits() / Split;
   13871 
   13872     SmallVector<int, 8> Indices;
   13873     for (int i = 0; i != NumSubElts; ++i) {
   13874       int EltIdx = i / Split;
   13875       int SubIdx = i % Split;
   13876       SDValue Elt = RHS.getOperand(EltIdx);
   13877       if (Elt.isUndef()) {
   13878         Indices.push_back(-1);
   13879         continue;
   13880       }
   13881 
   13882       APInt Bits;
   13883       if (isa<ConstantSDNode>(Elt))
   13884         Bits = cast<ConstantSDNode>(Elt)->getAPIntValue();
   13885       else if (isa<ConstantFPSDNode>(Elt))
   13886         Bits = cast<ConstantFPSDNode>(Elt)->getValueAPF().bitcastToAPInt();
   13887       else
   13888         return SDValue();
   13889 
   13890       // Extract the sub element from the constant bit mask.
   13891       if (DAG.getDataLayout().isBigEndian()) {
   13892         Bits = Bits.lshr((Split - SubIdx - 1) * NumSubBits);
   13893       } else {
   13894         Bits = Bits.lshr(SubIdx * NumSubBits);
   13895       }
   13896 
   13897       if (Split > 1)
   13898         Bits = Bits.trunc(NumSubBits);
   13899 
   13900       if (Bits.isAllOnesValue())
   13901         Indices.push_back(i);
   13902       else if (Bits == 0)
   13903         Indices.push_back(i + NumSubElts);
   13904       else
   13905         return SDValue();
   13906     }
   13907 
   13908     // Let's see if the target supports this vector_shuffle.
   13909     EVT ClearSVT = EVT::getIntegerVT(*DAG.getContext(), NumSubBits);
   13910     EVT ClearVT = EVT::getVectorVT(*DAG.getContext(), ClearSVT, NumSubElts);
   13911     if (!TLI.isVectorClearMaskLegal(Indices, ClearVT))
   13912       return SDValue();
   13913 
   13914     SDValue Zero = DAG.getConstant(0, dl, ClearVT);
   13915     return DAG.getBitcast(VT, DAG.getVectorShuffle(ClearVT, dl,
   13916                                                    DAG.getBitcast(ClearVT, LHS),
   13917                                                    Zero, Indices));
   13918   };
   13919 
   13920   // Determine maximum split level (byte level masking).
   13921   int MaxSplit = 1;
   13922   if (RVT.getScalarSizeInBits() % 8 == 0)
   13923     MaxSplit = RVT.getScalarSizeInBits() / 8;
   13924 
   13925   for (int Split = 1; Split <= MaxSplit; ++Split)
   13926     if (RVT.getScalarSizeInBits() % Split == 0)
   13927       if (SDValue S = BuildClearMask(Split))
   13928         return S;
   13929 
   13930   return SDValue();
   13931 }
   13932 
   13933 /// Visit a binary vector operation, like ADD.
   13934 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) {
   13935   assert(N->getValueType(0).isVector() &&
   13936          "SimplifyVBinOp only works on vectors!");
   13937 
   13938   SDValue LHS = N->getOperand(0);
   13939   SDValue RHS = N->getOperand(1);
   13940   SDValue Ops[] = {LHS, RHS};
   13941 
   13942   // See if we can constant fold the vector operation.
   13943   if (SDValue Fold = DAG.FoldConstantVectorArithmetic(
   13944           N->getOpcode(), SDLoc(LHS), LHS.getValueType(), Ops, N->getFlags()))
   13945     return Fold;
   13946 
   13947   // Try to convert a constant mask AND into a shuffle clear mask.
   13948   if (SDValue Shuffle = XformToShuffleWithZero(N))
   13949     return Shuffle;
   13950 
   13951   // Type legalization might introduce new shuffles in the DAG.
   13952   // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask)))
   13953   //   -> (shuffle (VBinOp (A, B)), Undef, Mask).
   13954   if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) &&
   13955       isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() &&
   13956       LHS.getOperand(1).isUndef() &&
   13957       RHS.getOperand(1).isUndef()) {
   13958     ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS);
   13959     ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS);
   13960 
   13961     if (SVN0->getMask().equals(SVN1->getMask())) {
   13962       EVT VT = N->getValueType(0);
   13963       SDValue UndefVector = LHS.getOperand(1);
   13964       SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT,
   13965                                      LHS.getOperand(0), RHS.getOperand(0),
   13966                                      N->getFlags());
   13967       AddUsersToWorklist(N);
   13968       return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector,
   13969                                   SVN0->getMask());
   13970     }
   13971   }
   13972 
   13973   return SDValue();
   13974 }
   13975 
   13976 SDValue DAGCombiner::SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1,
   13977                                     SDValue N2) {
   13978   assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!");
   13979 
   13980   SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2,
   13981                                  cast<CondCodeSDNode>(N0.getOperand(2))->get());
   13982 
   13983   // If we got a simplified select_cc node back from SimplifySelectCC, then
   13984   // break it down into a new SETCC node, and a new SELECT node, and then return
   13985   // the SELECT node, since we were called with a SELECT node.
   13986   if (SCC.getNode()) {
   13987     // Check to see if we got a select_cc back (to turn into setcc/select).
   13988     // Otherwise, just return whatever node we got back, like fabs.
   13989     if (SCC.getOpcode() == ISD::SELECT_CC) {
   13990       SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0),
   13991                                   N0.getValueType(),
   13992                                   SCC.getOperand(0), SCC.getOperand(1),
   13993                                   SCC.getOperand(4));
   13994       AddToWorklist(SETCC.getNode());
   13995       return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), SETCC,
   13996                            SCC.getOperand(2), SCC.getOperand(3));
   13997     }
   13998 
   13999     return SCC;
   14000   }
   14001   return SDValue();
   14002 }
   14003 
   14004 /// Given a SELECT or a SELECT_CC node, where LHS and RHS are the two values
   14005 /// being selected between, see if we can simplify the select.  Callers of this
   14006 /// should assume that TheSelect is deleted if this returns true.  As such, they
   14007 /// should return the appropriate thing (e.g. the node) back to the top-level of
   14008 /// the DAG combiner loop to avoid it being looked at.
   14009 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS,
   14010                                     SDValue RHS) {
   14011 
   14012   // fold (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   14013   // The select + setcc is redundant, because fsqrt returns NaN for X < 0.
   14014   if (const ConstantFPSDNode *NaN = isConstOrConstSplatFP(LHS)) {
   14015     if (NaN->isNaN() && RHS.getOpcode() == ISD::FSQRT) {
   14016       // We have: (select (setcc ?, ?, ?), NaN, (fsqrt ?))
   14017       SDValue Sqrt = RHS;
   14018       ISD::CondCode CC;
   14019       SDValue CmpLHS;
   14020       const ConstantFPSDNode *Zero = nullptr;
   14021 
   14022       if (TheSelect->getOpcode() == ISD::SELECT_CC) {
   14023         CC = dyn_cast<CondCodeSDNode>(TheSelect->getOperand(4))->get();
   14024         CmpLHS = TheSelect->getOperand(0);
   14025         Zero = isConstOrConstSplatFP(TheSelect->getOperand(1));
   14026       } else {
   14027         // SELECT or VSELECT
   14028         SDValue Cmp = TheSelect->getOperand(0);
   14029         if (Cmp.getOpcode() == ISD::SETCC) {
   14030           CC = dyn_cast<CondCodeSDNode>(Cmp.getOperand(2))->get();
   14031           CmpLHS = Cmp.getOperand(0);
   14032           Zero = isConstOrConstSplatFP(Cmp.getOperand(1));
   14033         }
   14034       }
   14035       if (Zero && Zero->isZero() &&
   14036           Sqrt.getOperand(0) == CmpLHS && (CC == ISD::SETOLT ||
   14037           CC == ISD::SETULT || CC == ISD::SETLT)) {
   14038         // We have: (select (setcc x, [+-]0.0, *lt), NaN, (fsqrt x))
   14039         CombineTo(TheSelect, Sqrt);
   14040         return true;
   14041       }
   14042     }
   14043   }
   14044   // Cannot simplify select with vector condition
   14045   if (TheSelect->getOperand(0).getValueType().isVector()) return false;
   14046 
   14047   // If this is a select from two identical things, try to pull the operation
   14048   // through the select.
   14049   if (LHS.getOpcode() != RHS.getOpcode() ||
   14050       !LHS.hasOneUse() || !RHS.hasOneUse())
   14051     return false;
   14052 
   14053   // If this is a load and the token chain is identical, replace the select
   14054   // of two loads with a load through a select of the address to load from.
   14055   // This triggers in things like "select bool X, 10.0, 123.0" after the FP
   14056   // constants have been dropped into the constant pool.
   14057   if (LHS.getOpcode() == ISD::LOAD) {
   14058     LoadSDNode *LLD = cast<LoadSDNode>(LHS);
   14059     LoadSDNode *RLD = cast<LoadSDNode>(RHS);
   14060 
   14061     // Token chains must be identical.
   14062     if (LHS.getOperand(0) != RHS.getOperand(0) ||
   14063         // Do not let this transformation reduce the number of volatile loads.
   14064         LLD->isVolatile() || RLD->isVolatile() ||
   14065         // FIXME: If either is a pre/post inc/dec load,
   14066         // we'd need to split out the address adjustment.
   14067         LLD->isIndexed() || RLD->isIndexed() ||
   14068         // If this is an EXTLOAD, the VT's must match.
   14069         LLD->getMemoryVT() != RLD->getMemoryVT() ||
   14070         // If this is an EXTLOAD, the kind of extension must match.
   14071         (LLD->getExtensionType() != RLD->getExtensionType() &&
   14072          // The only exception is if one of the extensions is anyext.
   14073          LLD->getExtensionType() != ISD::EXTLOAD &&
   14074          RLD->getExtensionType() != ISD::EXTLOAD) ||
   14075         // FIXME: this discards src value information.  This is
   14076         // over-conservative. It would be beneficial to be able to remember
   14077         // both potential memory locations.  Since we are discarding
   14078         // src value info, don't do the transformation if the memory
   14079         // locations are not in the default address space.
   14080         LLD->getPointerInfo().getAddrSpace() != 0 ||
   14081         RLD->getPointerInfo().getAddrSpace() != 0 ||
   14082         !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(),
   14083                                       LLD->getBasePtr().getValueType()))
   14084       return false;
   14085 
   14086     // Check that the select condition doesn't reach either load.  If so,
   14087     // folding this will induce a cycle into the DAG.  If not, this is safe to
   14088     // xform, so create a select of the addresses.
   14089     SDValue Addr;
   14090     if (TheSelect->getOpcode() == ISD::SELECT) {
   14091       SDNode *CondNode = TheSelect->getOperand(0).getNode();
   14092       if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) ||
   14093           (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode)))
   14094         return false;
   14095       // The loads must not depend on one another.
   14096       if (LLD->isPredecessorOf(RLD) ||
   14097           RLD->isPredecessorOf(LLD))
   14098         return false;
   14099       Addr = DAG.getSelect(SDLoc(TheSelect),
   14100                            LLD->getBasePtr().getValueType(),
   14101                            TheSelect->getOperand(0), LLD->getBasePtr(),
   14102                            RLD->getBasePtr());
   14103     } else {  // Otherwise SELECT_CC
   14104       SDNode *CondLHS = TheSelect->getOperand(0).getNode();
   14105       SDNode *CondRHS = TheSelect->getOperand(1).getNode();
   14106 
   14107       if ((LLD->hasAnyUseOfValue(1) &&
   14108            (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) ||
   14109           (RLD->hasAnyUseOfValue(1) &&
   14110            (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS))))
   14111         return false;
   14112 
   14113       Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect),
   14114                          LLD->getBasePtr().getValueType(),
   14115                          TheSelect->getOperand(0),
   14116                          TheSelect->getOperand(1),
   14117                          LLD->getBasePtr(), RLD->getBasePtr(),
   14118                          TheSelect->getOperand(4));
   14119     }
   14120 
   14121     SDValue Load;
   14122     // It is safe to replace the two loads if they have different alignments,
   14123     // but the new load must be the minimum (most restrictive) alignment of the
   14124     // inputs.
   14125     bool isInvariant = LLD->isInvariant() & RLD->isInvariant();
   14126     unsigned Alignment = std::min(LLD->getAlignment(), RLD->getAlignment());
   14127     if (LLD->getExtensionType() == ISD::NON_EXTLOAD) {
   14128       Load = DAG.getLoad(TheSelect->getValueType(0),
   14129                          SDLoc(TheSelect),
   14130                          // FIXME: Discards pointer and AA info.
   14131                          LLD->getChain(), Addr, MachinePointerInfo(),
   14132                          LLD->isVolatile(), LLD->isNonTemporal(),
   14133                          isInvariant, Alignment);
   14134     } else {
   14135       Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ?
   14136                             RLD->getExtensionType() : LLD->getExtensionType(),
   14137                             SDLoc(TheSelect),
   14138                             TheSelect->getValueType(0),
   14139                             // FIXME: Discards pointer and AA info.
   14140                             LLD->getChain(), Addr, MachinePointerInfo(),
   14141                             LLD->getMemoryVT(), LLD->isVolatile(),
   14142                             LLD->isNonTemporal(), isInvariant, Alignment);
   14143     }
   14144 
   14145     // Users of the select now use the result of the load.
   14146     CombineTo(TheSelect, Load);
   14147 
   14148     // Users of the old loads now use the new load's chain.  We know the
   14149     // old-load value is dead now.
   14150     CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1));
   14151     CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1));
   14152     return true;
   14153   }
   14154 
   14155   return false;
   14156 }
   14157 
   14158 /// Simplify an expression of the form (N0 cond N1) ? N2 : N3
   14159 /// where 'cond' is the comparison specified by CC.
   14160 SDValue DAGCombiner::SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1,
   14161                                       SDValue N2, SDValue N3, ISD::CondCode CC,
   14162                                       bool NotExtCompare) {
   14163   // (x ? y : y) -> y.
   14164   if (N2 == N3) return N2;
   14165 
   14166   EVT VT = N2.getValueType();
   14167   ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode());
   14168   ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode());
   14169 
   14170   // Determine if the condition we're dealing with is constant
   14171   SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()),
   14172                               N0, N1, CC, DL, false);
   14173   if (SCC.getNode()) AddToWorklist(SCC.getNode());
   14174 
   14175   if (ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode())) {
   14176     // fold select_cc true, x, y -> x
   14177     // fold select_cc false, x, y -> y
   14178     return !SCCC->isNullValue() ? N2 : N3;
   14179   }
   14180 
   14181   // Check to see if we can simplify the select into an fabs node
   14182   if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) {
   14183     // Allow either -0.0 or 0.0
   14184     if (CFP->isZero()) {
   14185       // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs
   14186       if ((CC == ISD::SETGE || CC == ISD::SETGT) &&
   14187           N0 == N2 && N3.getOpcode() == ISD::FNEG &&
   14188           N2 == N3.getOperand(0))
   14189         return DAG.getNode(ISD::FABS, DL, VT, N0);
   14190 
   14191       // select (setl[te] X, +/-0.0), fneg(X), X -> fabs
   14192       if ((CC == ISD::SETLT || CC == ISD::SETLE) &&
   14193           N0 == N3 && N2.getOpcode() == ISD::FNEG &&
   14194           N2.getOperand(0) == N3)
   14195         return DAG.getNode(ISD::FABS, DL, VT, N3);
   14196     }
   14197   }
   14198 
   14199   // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)"
   14200   // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0
   14201   // in it.  This is a win when the constant is not otherwise available because
   14202   // it replaces two constant pool loads with one.  We only do this if the FP
   14203   // type is known to be legal, because if it isn't, then we are before legalize
   14204   // types an we want the other legalization to happen first (e.g. to avoid
   14205   // messing with soft float) and if the ConstantFP is not legal, because if
   14206   // it is legal, we may not need to store the FP constant in a constant pool.
   14207   if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2))
   14208     if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) {
   14209       if (TLI.isTypeLegal(N2.getValueType()) &&
   14210           (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) !=
   14211                TargetLowering::Legal &&
   14212            !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) &&
   14213            !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) &&
   14214           // If both constants have multiple uses, then we won't need to do an
   14215           // extra load, they are likely around in registers for other users.
   14216           (TV->hasOneUse() || FV->hasOneUse())) {
   14217         Constant *Elts[] = {
   14218           const_cast<ConstantFP*>(FV->getConstantFPValue()),
   14219           const_cast<ConstantFP*>(TV->getConstantFPValue())
   14220         };
   14221         Type *FPTy = Elts[0]->getType();
   14222         const DataLayout &TD = DAG.getDataLayout();
   14223 
   14224         // Create a ConstantArray of the two constants.
   14225         Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts);
   14226         SDValue CPIdx =
   14227             DAG.getConstantPool(CA, TLI.getPointerTy(DAG.getDataLayout()),
   14228                                 TD.getPrefTypeAlignment(FPTy));
   14229         unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment();
   14230 
   14231         // Get the offsets to the 0 and 1 element of the array so that we can
   14232         // select between them.
   14233         SDValue Zero = DAG.getIntPtrConstant(0, DL);
   14234         unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType());
   14235         SDValue One = DAG.getIntPtrConstant(EltSize, SDLoc(FV));
   14236 
   14237         SDValue Cond = DAG.getSetCC(DL,
   14238                                     getSetCCResultType(N0.getValueType()),
   14239                                     N0, N1, CC);
   14240         AddToWorklist(Cond.getNode());
   14241         SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(),
   14242                                           Cond, One, Zero);
   14243         AddToWorklist(CstOffset.getNode());
   14244         CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx,
   14245                             CstOffset);
   14246         AddToWorklist(CPIdx.getNode());
   14247         return DAG.getLoad(
   14248             TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx,
   14249             MachinePointerInfo::getConstantPool(DAG.getMachineFunction()),
   14250             false, false, false, Alignment);
   14251       }
   14252     }
   14253 
   14254   // Check to see if we can perform the "gzip trick", transforming
   14255   // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A)
   14256   if (isNullConstant(N3) && CC == ISD::SETLT &&
   14257       (isNullConstant(N1) ||                 // (a < 0) ? b : 0
   14258        (isOneConstant(N1) && N0 == N2))) {   // (a < 1) ? a : 0
   14259     EVT XType = N0.getValueType();
   14260     EVT AType = N2.getValueType();
   14261     if (XType.bitsGE(AType)) {
   14262       // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a
   14263       // single-bit constant.
   14264       if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue() - 1)) == 0)) {
   14265         unsigned ShCtV = N2C->getAPIntValue().logBase2();
   14266         ShCtV = XType.getSizeInBits() - ShCtV - 1;
   14267         SDValue ShCt = DAG.getConstant(ShCtV, SDLoc(N0),
   14268                                        getShiftAmountTy(N0.getValueType()));
   14269         SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0),
   14270                                     XType, N0, ShCt);
   14271         AddToWorklist(Shift.getNode());
   14272 
   14273         if (XType.bitsGT(AType)) {
   14274           Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   14275           AddToWorklist(Shift.getNode());
   14276         }
   14277 
   14278         return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   14279       }
   14280 
   14281       SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0),
   14282                                   XType, N0,
   14283                                   DAG.getConstant(XType.getSizeInBits() - 1,
   14284                                                   SDLoc(N0),
   14285                                          getShiftAmountTy(N0.getValueType())));
   14286       AddToWorklist(Shift.getNode());
   14287 
   14288       if (XType.bitsGT(AType)) {
   14289         Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift);
   14290         AddToWorklist(Shift.getNode());
   14291       }
   14292 
   14293       return DAG.getNode(ISD::AND, DL, AType, Shift, N2);
   14294     }
   14295   }
   14296 
   14297   // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A)
   14298   // where y is has a single bit set.
   14299   // A plaintext description would be, we can turn the SELECT_CC into an AND
   14300   // when the condition can be materialized as an all-ones register.  Any
   14301   // single bit-test can be materialized as an all-ones register with
   14302   // shift-left and shift-right-arith.
   14303   if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND &&
   14304       N0->getValueType(0) == VT && isNullConstant(N1) && isNullConstant(N2)) {
   14305     SDValue AndLHS = N0->getOperand(0);
   14306     ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1));
   14307     if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) {
   14308       // Shift the tested bit over the sign bit.
   14309       const APInt &AndMask = ConstAndRHS->getAPIntValue();
   14310       SDValue ShlAmt =
   14311         DAG.getConstant(AndMask.countLeadingZeros(), SDLoc(AndLHS),
   14312                         getShiftAmountTy(AndLHS.getValueType()));
   14313       SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt);
   14314 
   14315       // Now arithmetic right shift it all the way over, so the result is either
   14316       // all-ones, or zero.
   14317       SDValue ShrAmt =
   14318         DAG.getConstant(AndMask.getBitWidth() - 1, SDLoc(Shl),
   14319                         getShiftAmountTy(Shl.getValueType()));
   14320       SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt);
   14321 
   14322       return DAG.getNode(ISD::AND, DL, VT, Shr, N3);
   14323     }
   14324   }
   14325 
   14326   // fold select C, 16, 0 -> shl C, 4
   14327   if (N2C && isNullConstant(N3) && N2C->getAPIntValue().isPowerOf2() &&
   14328       TLI.getBooleanContents(N0.getValueType()) ==
   14329           TargetLowering::ZeroOrOneBooleanContent) {
   14330 
   14331     // If the caller doesn't want us to simplify this into a zext of a compare,
   14332     // don't do it.
   14333     if (NotExtCompare && N2C->isOne())
   14334       return SDValue();
   14335 
   14336     // Get a SetCC of the condition
   14337     // NOTE: Don't create a SETCC if it's not legal on this target.
   14338     if (!LegalOperations ||
   14339         TLI.isOperationLegal(ISD::SETCC, N0.getValueType())) {
   14340       SDValue Temp, SCC;
   14341       // cast from setcc result type to select result type
   14342       if (LegalTypes) {
   14343         SCC  = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()),
   14344                             N0, N1, CC);
   14345         if (N2.getValueType().bitsLT(SCC.getValueType()))
   14346           Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2),
   14347                                         N2.getValueType());
   14348         else
   14349           Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   14350                              N2.getValueType(), SCC);
   14351       } else {
   14352         SCC  = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC);
   14353         Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2),
   14354                            N2.getValueType(), SCC);
   14355       }
   14356 
   14357       AddToWorklist(SCC.getNode());
   14358       AddToWorklist(Temp.getNode());
   14359 
   14360       if (N2C->isOne())
   14361         return Temp;
   14362 
   14363       // shl setcc result by log2 n2c
   14364       return DAG.getNode(
   14365           ISD::SHL, DL, N2.getValueType(), Temp,
   14366           DAG.getConstant(N2C->getAPIntValue().logBase2(), SDLoc(Temp),
   14367                           getShiftAmountTy(Temp.getValueType())));
   14368     }
   14369   }
   14370 
   14371   // Check to see if this is an integer abs.
   14372   // select_cc setg[te] X,  0,  X, -X ->
   14373   // select_cc setgt    X, -1,  X, -X ->
   14374   // select_cc setl[te] X,  0, -X,  X ->
   14375   // select_cc setlt    X,  1, -X,  X ->
   14376   // Y = sra (X, size(X)-1); xor (add (X, Y), Y)
   14377   if (N1C) {
   14378     ConstantSDNode *SubC = nullptr;
   14379     if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) ||
   14380          (N1C->isAllOnesValue() && CC == ISD::SETGT)) &&
   14381         N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1))
   14382       SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0));
   14383     else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) ||
   14384               (N1C->isOne() && CC == ISD::SETLT)) &&
   14385              N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1))
   14386       SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0));
   14387 
   14388     EVT XType = N0.getValueType();
   14389     if (SubC && SubC->isNullValue() && XType.isInteger()) {
   14390       SDLoc DL(N0);
   14391       SDValue Shift = DAG.getNode(ISD::SRA, DL, XType,
   14392                                   N0,
   14393                                   DAG.getConstant(XType.getSizeInBits() - 1, DL,
   14394                                          getShiftAmountTy(N0.getValueType())));
   14395       SDValue Add = DAG.getNode(ISD::ADD, DL,
   14396                                 XType, N0, Shift);
   14397       AddToWorklist(Shift.getNode());
   14398       AddToWorklist(Add.getNode());
   14399       return DAG.getNode(ISD::XOR, DL, XType, Add, Shift);
   14400     }
   14401   }
   14402 
   14403   // select_cc seteq X, 0, sizeof(X), ctlz(X) -> ctlz(X)
   14404   // select_cc seteq X, 0, sizeof(X), ctlz_zero_undef(X) -> ctlz(X)
   14405   // select_cc seteq X, 0, sizeof(X), cttz(X) -> cttz(X)
   14406   // select_cc seteq X, 0, sizeof(X), cttz_zero_undef(X) -> cttz(X)
   14407   // select_cc setne X, 0, ctlz(X), sizeof(X) -> ctlz(X)
   14408   // select_cc setne X, 0, ctlz_zero_undef(X), sizeof(X) -> ctlz(X)
   14409   // select_cc setne X, 0, cttz(X), sizeof(X) -> cttz(X)
   14410   // select_cc setne X, 0, cttz_zero_undef(X), sizeof(X) -> cttz(X)
   14411   if (N1C && N1C->isNullValue() && (CC == ISD::SETEQ || CC == ISD::SETNE)) {
   14412     SDValue ValueOnZero = N2;
   14413     SDValue Count = N3;
   14414     // If the condition is NE instead of E, swap the operands.
   14415     if (CC == ISD::SETNE)
   14416       std::swap(ValueOnZero, Count);
   14417     // Check if the value on zero is a constant equal to the bits in the type.
   14418     if (auto *ValueOnZeroC = dyn_cast<ConstantSDNode>(ValueOnZero)) {
   14419       if (ValueOnZeroC->getAPIntValue() == VT.getSizeInBits()) {
   14420         // If the other operand is cttz/cttz_zero_undef of N0, and cttz is
   14421         // legal, combine to just cttz.
   14422         if ((Count.getOpcode() == ISD::CTTZ ||
   14423              Count.getOpcode() == ISD::CTTZ_ZERO_UNDEF) &&
   14424             N0 == Count.getOperand(0) &&
   14425             (!LegalOperations || TLI.isOperationLegal(ISD::CTTZ, VT)))
   14426           return DAG.getNode(ISD::CTTZ, DL, VT, N0);
   14427         // If the other operand is ctlz/ctlz_zero_undef of N0, and ctlz is
   14428         // legal, combine to just ctlz.
   14429         if ((Count.getOpcode() == ISD::CTLZ ||
   14430              Count.getOpcode() == ISD::CTLZ_ZERO_UNDEF) &&
   14431             N0 == Count.getOperand(0) &&
   14432             (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, VT)))
   14433           return DAG.getNode(ISD::CTLZ, DL, VT, N0);
   14434       }
   14435     }
   14436   }
   14437 
   14438   return SDValue();
   14439 }
   14440 
   14441 /// This is a stub for TargetLowering::SimplifySetCC.
   14442 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
   14443                                    ISD::CondCode Cond, const SDLoc &DL,
   14444                                    bool foldBooleans) {
   14445   TargetLowering::DAGCombinerInfo
   14446     DagCombineInfo(DAG, Level, false, this);
   14447   return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL);
   14448 }
   14449 
   14450 /// Given an ISD::SDIV node expressing a divide by constant, return
   14451 /// a DAG expression to select that will generate the same value by multiplying
   14452 /// by a magic number.
   14453 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   14454 SDValue DAGCombiner::BuildSDIV(SDNode *N) {
   14455   // when optimising for minimum size, we don't want to expand a div to a mul
   14456   // and a shift.
   14457   if (DAG.getMachineFunction().getFunction()->optForMinSize())
   14458     return SDValue();
   14459 
   14460   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14461   if (!C)
   14462     return SDValue();
   14463 
   14464   // Avoid division by zero.
   14465   if (C->isNullValue())
   14466     return SDValue();
   14467 
   14468   std::vector<SDNode*> Built;
   14469   SDValue S =
   14470       TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   14471 
   14472   for (SDNode *N : Built)
   14473     AddToWorklist(N);
   14474   return S;
   14475 }
   14476 
   14477 /// Given an ISD::SDIV node expressing a divide by constant power of 2, return a
   14478 /// DAG expression that will generate the same value by right shifting.
   14479 SDValue DAGCombiner::BuildSDIVPow2(SDNode *N) {
   14480   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14481   if (!C)
   14482     return SDValue();
   14483 
   14484   // Avoid division by zero.
   14485   if (C->isNullValue())
   14486     return SDValue();
   14487 
   14488   std::vector<SDNode *> Built;
   14489   SDValue S = TLI.BuildSDIVPow2(N, C->getAPIntValue(), DAG, &Built);
   14490 
   14491   for (SDNode *N : Built)
   14492     AddToWorklist(N);
   14493   return S;
   14494 }
   14495 
   14496 /// Given an ISD::UDIV node expressing a divide by constant, return a DAG
   14497 /// expression that will generate the same value by multiplying by a magic
   14498 /// number.
   14499 /// Ref: "Hacker's Delight" or "The PowerPC Compiler Writer's Guide".
   14500 SDValue DAGCombiner::BuildUDIV(SDNode *N) {
   14501   // when optimising for minimum size, we don't want to expand a div to a mul
   14502   // and a shift.
   14503   if (DAG.getMachineFunction().getFunction()->optForMinSize())
   14504     return SDValue();
   14505 
   14506   ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1));
   14507   if (!C)
   14508     return SDValue();
   14509 
   14510   // Avoid division by zero.
   14511   if (C->isNullValue())
   14512     return SDValue();
   14513 
   14514   std::vector<SDNode*> Built;
   14515   SDValue S =
   14516       TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built);
   14517 
   14518   for (SDNode *N : Built)
   14519     AddToWorklist(N);
   14520   return S;
   14521 }
   14522 
   14523 SDValue DAGCombiner::BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags) {
   14524   if (Level >= AfterLegalizeDAG)
   14525     return SDValue();
   14526 
   14527   // Expose the DAG combiner to the target combiner implementations.
   14528   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   14529 
   14530   unsigned Iterations = 0;
   14531   if (SDValue Est = TLI.getRecipEstimate(Op, DCI, Iterations)) {
   14532     if (Iterations) {
   14533       // Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14534       // For the reciprocal, we need to find the zero of the function:
   14535       //   F(X) = A X - 1 [which has a zero at X = 1/A]
   14536       //     =>
   14537       //   X_{i+1} = X_i (2 - A X_i) = X_i + X_i (1 - A X_i) [this second form
   14538       //     does not require additional intermediate precision]
   14539       EVT VT = Op.getValueType();
   14540       SDLoc DL(Op);
   14541       SDValue FPOne = DAG.getConstantFP(1.0, DL, VT);
   14542 
   14543       AddToWorklist(Est.getNode());
   14544 
   14545       // Newton iterations: Est = Est + Est (1 - Arg * Est)
   14546       for (unsigned i = 0; i < Iterations; ++i) {
   14547         SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Op, Est, Flags);
   14548         AddToWorklist(NewEst.getNode());
   14549 
   14550         NewEst = DAG.getNode(ISD::FSUB, DL, VT, FPOne, NewEst, Flags);
   14551         AddToWorklist(NewEst.getNode());
   14552 
   14553         NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   14554         AddToWorklist(NewEst.getNode());
   14555 
   14556         Est = DAG.getNode(ISD::FADD, DL, VT, Est, NewEst, Flags);
   14557         AddToWorklist(Est.getNode());
   14558       }
   14559     }
   14560     return Est;
   14561   }
   14562 
   14563   return SDValue();
   14564 }
   14565 
   14566 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14567 /// For the reciprocal sqrt, we need to find the zero of the function:
   14568 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   14569 ///     =>
   14570 ///   X_{i+1} = X_i (1.5 - A X_i^2 / 2)
   14571 /// As a result, we precompute A/2 prior to the iteration loop.
   14572 SDValue DAGCombiner::buildSqrtNROneConst(SDValue Arg, SDValue Est,
   14573                                          unsigned Iterations,
   14574                                          SDNodeFlags *Flags, bool Reciprocal) {
   14575   EVT VT = Arg.getValueType();
   14576   SDLoc DL(Arg);
   14577   SDValue ThreeHalves = DAG.getConstantFP(1.5, DL, VT);
   14578 
   14579   // We now need 0.5 * Arg which we can write as (1.5 * Arg - Arg) so that
   14580   // this entire sequence requires only one FP constant.
   14581   SDValue HalfArg = DAG.getNode(ISD::FMUL, DL, VT, ThreeHalves, Arg, Flags);
   14582   AddToWorklist(HalfArg.getNode());
   14583 
   14584   HalfArg = DAG.getNode(ISD::FSUB, DL, VT, HalfArg, Arg, Flags);
   14585   AddToWorklist(HalfArg.getNode());
   14586 
   14587   // Newton iterations: Est = Est * (1.5 - HalfArg * Est * Est)
   14588   for (unsigned i = 0; i < Iterations; ++i) {
   14589     SDValue NewEst = DAG.getNode(ISD::FMUL, DL, VT, Est, Est, Flags);
   14590     AddToWorklist(NewEst.getNode());
   14591 
   14592     NewEst = DAG.getNode(ISD::FMUL, DL, VT, HalfArg, NewEst, Flags);
   14593     AddToWorklist(NewEst.getNode());
   14594 
   14595     NewEst = DAG.getNode(ISD::FSUB, DL, VT, ThreeHalves, NewEst, Flags);
   14596     AddToWorklist(NewEst.getNode());
   14597 
   14598     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, NewEst, Flags);
   14599     AddToWorklist(Est.getNode());
   14600   }
   14601 
   14602   // If non-reciprocal square root is requested, multiply the result by Arg.
   14603   if (!Reciprocal) {
   14604     Est = DAG.getNode(ISD::FMUL, DL, VT, Est, Arg, Flags);
   14605     AddToWorklist(Est.getNode());
   14606   }
   14607 
   14608   return Est;
   14609 }
   14610 
   14611 /// Newton iteration for a function: F(X) is X_{i+1} = X_i - F(X_i)/F'(X_i)
   14612 /// For the reciprocal sqrt, we need to find the zero of the function:
   14613 ///   F(X) = 1/X^2 - A [which has a zero at X = 1/sqrt(A)]
   14614 ///     =>
   14615 ///   X_{i+1} = (-0.5 * X_i) * (A * X_i * X_i + (-3.0))
   14616 SDValue DAGCombiner::buildSqrtNRTwoConst(SDValue Arg, SDValue Est,
   14617                                          unsigned Iterations,
   14618                                          SDNodeFlags *Flags, bool Reciprocal) {
   14619   EVT VT = Arg.getValueType();
   14620   SDLoc DL(Arg);
   14621   SDValue MinusThree = DAG.getConstantFP(-3.0, DL, VT);
   14622   SDValue MinusHalf = DAG.getConstantFP(-0.5, DL, VT);
   14623 
   14624   // This routine must enter the loop below to work correctly
   14625   // when (Reciprocal == false).
   14626   assert(Iterations > 0);
   14627 
   14628   // Newton iterations for reciprocal square root:
   14629   // E = (E * -0.5) * ((A * E) * E + -3.0)
   14630   for (unsigned i = 0; i < Iterations; ++i) {
   14631     SDValue AE = DAG.getNode(ISD::FMUL, DL, VT, Arg, Est, Flags);
   14632     AddToWorklist(AE.getNode());
   14633 
   14634     SDValue AEE = DAG.getNode(ISD::FMUL, DL, VT, AE, Est, Flags);
   14635     AddToWorklist(AEE.getNode());
   14636 
   14637     SDValue RHS = DAG.getNode(ISD::FADD, DL, VT, AEE, MinusThree, Flags);
   14638     AddToWorklist(RHS.getNode());
   14639 
   14640     // When calculating a square root at the last iteration build:
   14641     // S = ((A * E) * -0.5) * ((A * E) * E + -3.0)
   14642     // (notice a common subexpression)
   14643     SDValue LHS;
   14644     if (Reciprocal || (i + 1) < Iterations) {
   14645       // RSQRT: LHS = (E * -0.5)
   14646       LHS = DAG.getNode(ISD::FMUL, DL, VT, Est, MinusHalf, Flags);
   14647     } else {
   14648       // SQRT: LHS = (A * E) * -0.5
   14649       LHS = DAG.getNode(ISD::FMUL, DL, VT, AE, MinusHalf, Flags);
   14650     }
   14651     AddToWorklist(LHS.getNode());
   14652 
   14653     Est = DAG.getNode(ISD::FMUL, DL, VT, LHS, RHS, Flags);
   14654     AddToWorklist(Est.getNode());
   14655   }
   14656 
   14657   return Est;
   14658 }
   14659 
   14660 /// Build code to calculate either rsqrt(Op) or sqrt(Op). In the latter case
   14661 /// Op*rsqrt(Op) is actually computed, so additional postprocessing is needed if
   14662 /// Op can be zero.
   14663 SDValue DAGCombiner::buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags,
   14664                                            bool Reciprocal) {
   14665   if (Level >= AfterLegalizeDAG)
   14666     return SDValue();
   14667 
   14668   // Expose the DAG combiner to the target combiner implementations.
   14669   TargetLowering::DAGCombinerInfo DCI(DAG, Level, false, this);
   14670   unsigned Iterations = 0;
   14671   bool UseOneConstNR = false;
   14672   if (SDValue Est = TLI.getRsqrtEstimate(Op, DCI, Iterations, UseOneConstNR)) {
   14673     AddToWorklist(Est.getNode());
   14674     if (Iterations) {
   14675       Est = UseOneConstNR
   14676                 ? buildSqrtNROneConst(Op, Est, Iterations, Flags, Reciprocal)
   14677                 : buildSqrtNRTwoConst(Op, Est, Iterations, Flags, Reciprocal);
   14678     }
   14679     return Est;
   14680   }
   14681 
   14682   return SDValue();
   14683 }
   14684 
   14685 SDValue DAGCombiner::buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
   14686   return buildSqrtEstimateImpl(Op, Flags, true);
   14687 }
   14688 
   14689 SDValue DAGCombiner::buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags) {
   14690   SDValue Est = buildSqrtEstimateImpl(Op, Flags, false);
   14691   if (!Est)
   14692     return SDValue();
   14693 
   14694   // Unfortunately, Est is now NaN if the input was exactly 0.
   14695   // Select out this case and force the answer to 0.
   14696   EVT VT = Est.getValueType();
   14697   SDLoc DL(Op);
   14698   SDValue Zero = DAG.getConstantFP(0.0, DL, VT);
   14699   EVT CCVT = getSetCCResultType(VT);
   14700   SDValue ZeroCmp = DAG.getSetCC(DL, CCVT, Op, Zero, ISD::SETEQ);
   14701   AddToWorklist(ZeroCmp.getNode());
   14702 
   14703   Est = DAG.getNode(VT.isVector() ? ISD::VSELECT : ISD::SELECT, DL, VT, ZeroCmp,
   14704                     Zero, Est);
   14705   AddToWorklist(Est.getNode());
   14706   return Est;
   14707 }
   14708 
   14709 /// Return true if base is a frame index, which is known not to alias with
   14710 /// anything but itself.  Provides base object and offset as results.
   14711 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset,
   14712                            const GlobalValue *&GV, const void *&CV) {
   14713   // Assume it is a primitive operation.
   14714   Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr;
   14715 
   14716   // If it's an adding a simple constant then integrate the offset.
   14717   if (Base.getOpcode() == ISD::ADD) {
   14718     if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) {
   14719       Base = Base.getOperand(0);
   14720       Offset += C->getZExtValue();
   14721     }
   14722   }
   14723 
   14724   // Return the underlying GlobalValue, and update the Offset.  Return false
   14725   // for GlobalAddressSDNode since the same GlobalAddress may be represented
   14726   // by multiple nodes with different offsets.
   14727   if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) {
   14728     GV = G->getGlobal();
   14729     Offset += G->getOffset();
   14730     return false;
   14731   }
   14732 
   14733   // Return the underlying Constant value, and update the Offset.  Return false
   14734   // for ConstantSDNodes since the same constant pool entry may be represented
   14735   // by multiple nodes with different offsets.
   14736   if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) {
   14737     CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal()
   14738                                          : (const void *)C->getConstVal();
   14739     Offset += C->getOffset();
   14740     return false;
   14741   }
   14742   // If it's any of the following then it can't alias with anything but itself.
   14743   return isa<FrameIndexSDNode>(Base);
   14744 }
   14745 
   14746 /// Return true if there is any possibility that the two addresses overlap.
   14747 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const {
   14748   // If they are the same then they must be aliases.
   14749   if (Op0->getBasePtr() == Op1->getBasePtr()) return true;
   14750 
   14751   // If they are both volatile then they cannot be reordered.
   14752   if (Op0->isVolatile() && Op1->isVolatile()) return true;
   14753 
   14754   // If one operation reads from invariant memory, and the other may store, they
   14755   // cannot alias. These should really be checking the equivalent of mayWrite,
   14756   // but it only matters for memory nodes other than load /store.
   14757   if (Op0->isInvariant() && Op1->writeMem())
   14758     return false;
   14759 
   14760   if (Op1->isInvariant() && Op0->writeMem())
   14761     return false;
   14762 
   14763   // Gather base node and offset information.
   14764   SDValue Base1, Base2;
   14765   int64_t Offset1, Offset2;
   14766   const GlobalValue *GV1, *GV2;
   14767   const void *CV1, *CV2;
   14768   bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(),
   14769                                       Base1, Offset1, GV1, CV1);
   14770   bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(),
   14771                                       Base2, Offset2, GV2, CV2);
   14772 
   14773   // If they have a same base address then check to see if they overlap.
   14774   if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2)))
   14775     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   14776              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   14777 
   14778   // It is possible for different frame indices to alias each other, mostly
   14779   // when tail call optimization reuses return address slots for arguments.
   14780   // To catch this case, look up the actual index of frame indices to compute
   14781   // the real alias relationship.
   14782   if (isFrameIndex1 && isFrameIndex2) {
   14783     MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
   14784     Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex());
   14785     Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex());
   14786     return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 ||
   14787              (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1);
   14788   }
   14789 
   14790   // Otherwise, if we know what the bases are, and they aren't identical, then
   14791   // we know they cannot alias.
   14792   if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2))
   14793     return false;
   14794 
   14795   // If we know required SrcValue1 and SrcValue2 have relatively large alignment
   14796   // compared to the size and offset of the access, we may be able to prove they
   14797   // do not alias.  This check is conservative for now to catch cases created by
   14798   // splitting vector types.
   14799   if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) &&
   14800       (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) &&
   14801       (Op0->getMemoryVT().getSizeInBits() >> 3 ==
   14802        Op1->getMemoryVT().getSizeInBits() >> 3) &&
   14803       (Op0->getOriginalAlignment() > (Op0->getMemoryVT().getSizeInBits() >> 3))) {
   14804     int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment();
   14805     int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment();
   14806 
   14807     // There is no overlap between these relatively aligned accesses of similar
   14808     // size, return no alias.
   14809     if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 ||
   14810         (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1)
   14811       return false;
   14812   }
   14813 
   14814   bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0
   14815                    ? CombinerGlobalAA
   14816                    : DAG.getSubtarget().useAA();
   14817 #ifndef NDEBUG
   14818   if (CombinerAAOnlyFunc.getNumOccurrences() &&
   14819       CombinerAAOnlyFunc != DAG.getMachineFunction().getName())
   14820     UseAA = false;
   14821 #endif
   14822   if (UseAA &&
   14823       Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) {
   14824     // Use alias analysis information.
   14825     int64_t MinOffset = std::min(Op0->getSrcValueOffset(),
   14826                                  Op1->getSrcValueOffset());
   14827     int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) +
   14828         Op0->getSrcValueOffset() - MinOffset;
   14829     int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) +
   14830         Op1->getSrcValueOffset() - MinOffset;
   14831     AliasResult AAResult =
   14832         AA.alias(MemoryLocation(Op0->getMemOperand()->getValue(), Overlap1,
   14833                                 UseTBAA ? Op0->getAAInfo() : AAMDNodes()),
   14834                  MemoryLocation(Op1->getMemOperand()->getValue(), Overlap2,
   14835                                 UseTBAA ? Op1->getAAInfo() : AAMDNodes()));
   14836     if (AAResult == NoAlias)
   14837       return false;
   14838   }
   14839 
   14840   // Otherwise we have to assume they alias.
   14841   return true;
   14842 }
   14843 
   14844 /// Walk up chain skipping non-aliasing memory nodes,
   14845 /// looking for aliasing nodes and adding them to the Aliases vector.
   14846 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain,
   14847                                    SmallVectorImpl<SDValue> &Aliases) {
   14848   SmallVector<SDValue, 8> Chains;     // List of chains to visit.
   14849   SmallPtrSet<SDNode *, 16> Visited;  // Visited node set.
   14850 
   14851   // Get alias information for node.
   14852   bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile();
   14853 
   14854   // Starting off.
   14855   Chains.push_back(OriginalChain);
   14856   unsigned Depth = 0;
   14857 
   14858   // Look at each chain and determine if it is an alias.  If so, add it to the
   14859   // aliases list.  If not, then continue up the chain looking for the next
   14860   // candidate.
   14861   while (!Chains.empty()) {
   14862     SDValue Chain = Chains.pop_back_val();
   14863 
   14864     // For TokenFactor nodes, look at each operand and only continue up the
   14865     // chain until we reach the depth limit.
   14866     //
   14867     // FIXME: The depth check could be made to return the last non-aliasing
   14868     // chain we found before we hit a tokenfactor rather than the original
   14869     // chain.
   14870     if (Depth > TLI.getGatherAllAliasesMaxDepth()) {
   14871       Aliases.clear();
   14872       Aliases.push_back(OriginalChain);
   14873       return;
   14874     }
   14875 
   14876     // Don't bother if we've been before.
   14877     if (!Visited.insert(Chain.getNode()).second)
   14878       continue;
   14879 
   14880     switch (Chain.getOpcode()) {
   14881     case ISD::EntryToken:
   14882       // Entry token is ideal chain operand, but handled in FindBetterChain.
   14883       break;
   14884 
   14885     case ISD::LOAD:
   14886     case ISD::STORE: {
   14887       // Get alias information for Chain.
   14888       bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) &&
   14889           !cast<LSBaseSDNode>(Chain.getNode())->isVolatile();
   14890 
   14891       // If chain is alias then stop here.
   14892       if (!(IsLoad && IsOpLoad) &&
   14893           isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) {
   14894         Aliases.push_back(Chain);
   14895       } else {
   14896         // Look further up the chain.
   14897         Chains.push_back(Chain.getOperand(0));
   14898         ++Depth;
   14899       }
   14900       break;
   14901     }
   14902 
   14903     case ISD::TokenFactor:
   14904       // We have to check each of the operands of the token factor for "small"
   14905       // token factors, so we queue them up.  Adding the operands to the queue
   14906       // (stack) in reverse order maintains the original order and increases the
   14907       // likelihood that getNode will find a matching token factor (CSE.)
   14908       if (Chain.getNumOperands() > 16) {
   14909         Aliases.push_back(Chain);
   14910         break;
   14911       }
   14912       for (unsigned n = Chain.getNumOperands(); n;)
   14913         Chains.push_back(Chain.getOperand(--n));
   14914       ++Depth;
   14915       break;
   14916 
   14917     default:
   14918       // For all other instructions we will just have to take what we can get.
   14919       Aliases.push_back(Chain);
   14920       break;
   14921     }
   14922   }
   14923 }
   14924 
   14925 /// Walk up chain skipping non-aliasing memory nodes, looking for a better chain
   14926 /// (aliasing node.)
   14927 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) {
   14928   SmallVector<SDValue, 8> Aliases;  // Ops for replacing token factor.
   14929 
   14930   // Accumulate all the aliases to this node.
   14931   GatherAllAliases(N, OldChain, Aliases);
   14932 
   14933   // If no operands then chain to entry token.
   14934   if (Aliases.size() == 0)
   14935     return DAG.getEntryNode();
   14936 
   14937   // If a single operand then chain to it.  We don't need to revisit it.
   14938   if (Aliases.size() == 1)
   14939     return Aliases[0];
   14940 
   14941   // Construct a custom tailored token factor.
   14942   return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases);
   14943 }
   14944 
   14945 bool DAGCombiner::findBetterNeighborChains(StoreSDNode *St) {
   14946   // This holds the base pointer, index, and the offset in bytes from the base
   14947   // pointer.
   14948   BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr(), DAG);
   14949 
   14950   // We must have a base and an offset.
   14951   if (!BasePtr.Base.getNode())
   14952     return false;
   14953 
   14954   // Do not handle stores to undef base pointers.
   14955   if (BasePtr.Base.isUndef())
   14956     return false;
   14957 
   14958   SmallVector<StoreSDNode *, 8> ChainedStores;
   14959   ChainedStores.push_back(St);
   14960 
   14961   // Walk up the chain and look for nodes with offsets from the same
   14962   // base pointer. Stop when reaching an instruction with a different kind
   14963   // or instruction which has a different base pointer.
   14964   StoreSDNode *Index = St;
   14965   while (Index) {
   14966     // If the chain has more than one use, then we can't reorder the mem ops.
   14967     if (Index != St && !SDValue(Index, 0)->hasOneUse())
   14968       break;
   14969 
   14970     if (Index->isVolatile() || Index->isIndexed())
   14971       break;
   14972 
   14973     // Find the base pointer and offset for this memory node.
   14974     BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr(), DAG);
   14975 
   14976     // Check that the base pointer is the same as the original one.
   14977     if (!Ptr.equalBaseIndex(BasePtr))
   14978       break;
   14979 
   14980     // Find the next memory operand in the chain. If the next operand in the
   14981     // chain is a store then move up and continue the scan with the next
   14982     // memory operand. If the next operand is a load save it and use alias
   14983     // information to check if it interferes with anything.
   14984     SDNode *NextInChain = Index->getChain().getNode();
   14985     while (true) {
   14986       if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) {
   14987         // We found a store node. Use it for the next iteration.
   14988         if (STn->isVolatile() || STn->isIndexed()) {
   14989           Index = nullptr;
   14990           break;
   14991         }
   14992         ChainedStores.push_back(STn);
   14993         Index = STn;
   14994         break;
   14995       } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) {
   14996         NextInChain = Ldn->getChain().getNode();
   14997         continue;
   14998       } else {
   14999         Index = nullptr;
   15000         break;
   15001       }
   15002     }
   15003   }
   15004 
   15005   bool MadeChangeToSt = false;
   15006   SmallVector<std::pair<StoreSDNode *, SDValue>, 8> BetterChains;
   15007 
   15008   for (StoreSDNode *ChainedStore : ChainedStores) {
   15009     SDValue Chain = ChainedStore->getChain();
   15010     SDValue BetterChain = FindBetterChain(ChainedStore, Chain);
   15011 
   15012     if (Chain != BetterChain) {
   15013       if (ChainedStore == St)
   15014         MadeChangeToSt = true;
   15015       BetterChains.push_back(std::make_pair(ChainedStore, BetterChain));
   15016     }
   15017   }
   15018 
   15019   // Do all replacements after finding the replacements to make to avoid making
   15020   // the chains more complicated by introducing new TokenFactors.
   15021   for (auto Replacement : BetterChains)
   15022     replaceStoreChain(Replacement.first, Replacement.second);
   15023 
   15024   return MadeChangeToSt;
   15025 }
   15026 
   15027 /// This is the entry point for the file.
   15028 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA,
   15029                            CodeGenOpt::Level OptLevel) {
   15030   /// This is the main entry point to this class.
   15031   DAGCombiner(*this, AA, OptLevel).Run(Level);
   15032 }
   15033