1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetLowering.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dagcombine" 44 45 STATISTIC(NodesCombined , "Number of dag nodes combined"); 46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50 STATISTIC(SlicedLoads, "Number of load sliced"); 51 52 namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65 #ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70 #endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84 //------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode *, 64> Worklist; 103 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode *, unsigned> WorklistMap; 110 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode *, 64> CombinedNodes; 116 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode *N) { 123 for (SDNode *Node : N->uses()) 124 AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode *N); 129 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode *N) { 134 // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode *N) { 145 CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode *N); 157 bool recursivelyDeleteUnusedNodes(SDNode *N); 158 159 /// Replaces all uses of the results of one DAG node with new values. 160 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 161 bool AddTo = true); 162 163 /// Replaces all uses of the results of one DAG node with new values. 164 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 165 return CombineTo(N, &Res, 1, AddTo); 166 } 167 168 /// Replaces all uses of the results of one DAG node with new values. 169 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 170 bool AddTo = true) { 171 SDValue To[] = { Res0, Res1 }; 172 return CombineTo(N, To, 2, AddTo); 173 } 174 175 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 176 177 private: 178 179 /// Check the specified integer node value to see if it can be simplified or 180 /// if things it uses can be simplified by bit propagation. 181 /// If so, return true. 182 bool SimplifyDemandedBits(SDValue Op) { 183 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 184 APInt Demanded = APInt::getAllOnesValue(BitWidth); 185 return SimplifyDemandedBits(Op, Demanded); 186 } 187 188 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 189 190 bool CombineToPreIndexedLoadStore(SDNode *N); 191 bool CombineToPostIndexedLoadStore(SDNode *N); 192 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 193 bool SliceUpLoad(SDNode *N); 194 195 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 196 /// load. 197 /// 198 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 199 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 200 /// \param EltNo index of the vector element to load. 201 /// \param OriginalLoad load that EVE came from to be replaced. 202 /// \returns EVE on success SDValue() on failure. 203 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 204 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 205 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 206 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 207 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 208 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 209 SDValue PromoteIntBinOp(SDValue Op); 210 SDValue PromoteIntShiftOp(SDValue Op); 211 SDValue PromoteExtend(SDValue Op); 212 bool PromoteLoad(SDValue Op); 213 214 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 215 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 216 ISD::NodeType ExtType); 217 218 /// Call the node-specific routine that knows how to fold each 219 /// particular type of node. If that doesn't do anything, try the 220 /// target-specific DAG combines. 221 SDValue combine(SDNode *N); 222 223 // Visitation implementation - Implement dag node combining for different 224 // node types. The semantics are as follows: 225 // Return Value: 226 // SDValue.getNode() == 0 - No change was made 227 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 228 // otherwise - N should be replaced by the returned Operand. 229 // 230 SDValue visitTokenFactor(SDNode *N); 231 SDValue visitMERGE_VALUES(SDNode *N); 232 SDValue visitADD(SDNode *N); 233 SDValue visitSUB(SDNode *N); 234 SDValue visitADDC(SDNode *N); 235 SDValue visitSUBC(SDNode *N); 236 SDValue visitADDE(SDNode *N); 237 SDValue visitSUBE(SDNode *N); 238 SDValue visitMUL(SDNode *N); 239 SDValue useDivRem(SDNode *N); 240 SDValue visitSDIV(SDNode *N); 241 SDValue visitUDIV(SDNode *N); 242 SDValue visitREM(SDNode *N); 243 SDValue visitMULHU(SDNode *N); 244 SDValue visitMULHS(SDNode *N); 245 SDValue visitSMUL_LOHI(SDNode *N); 246 SDValue visitUMUL_LOHI(SDNode *N); 247 SDValue visitSMULO(SDNode *N); 248 SDValue visitUMULO(SDNode *N); 249 SDValue visitIMINMAX(SDNode *N); 250 SDValue visitAND(SDNode *N); 251 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 252 SDValue visitOR(SDNode *N); 253 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 254 SDValue visitXOR(SDNode *N); 255 SDValue SimplifyVBinOp(SDNode *N); 256 SDValue visitSHL(SDNode *N); 257 SDValue visitSRA(SDNode *N); 258 SDValue visitSRL(SDNode *N); 259 SDValue visitRotate(SDNode *N); 260 SDValue visitBSWAP(SDNode *N); 261 SDValue visitCTLZ(SDNode *N); 262 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 263 SDValue visitCTTZ(SDNode *N); 264 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 265 SDValue visitCTPOP(SDNode *N); 266 SDValue visitSELECT(SDNode *N); 267 SDValue visitVSELECT(SDNode *N); 268 SDValue visitSELECT_CC(SDNode *N); 269 SDValue visitSETCC(SDNode *N); 270 SDValue visitSETCCE(SDNode *N); 271 SDValue visitSIGN_EXTEND(SDNode *N); 272 SDValue visitZERO_EXTEND(SDNode *N); 273 SDValue visitANY_EXTEND(SDNode *N); 274 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 275 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 276 SDValue visitTRUNCATE(SDNode *N); 277 SDValue visitBITCAST(SDNode *N); 278 SDValue visitBUILD_PAIR(SDNode *N); 279 SDValue visitFADD(SDNode *N); 280 SDValue visitFSUB(SDNode *N); 281 SDValue visitFMUL(SDNode *N); 282 SDValue visitFMA(SDNode *N); 283 SDValue visitFDIV(SDNode *N); 284 SDValue visitFREM(SDNode *N); 285 SDValue visitFSQRT(SDNode *N); 286 SDValue visitFCOPYSIGN(SDNode *N); 287 SDValue visitSINT_TO_FP(SDNode *N); 288 SDValue visitUINT_TO_FP(SDNode *N); 289 SDValue visitFP_TO_SINT(SDNode *N); 290 SDValue visitFP_TO_UINT(SDNode *N); 291 SDValue visitFP_ROUND(SDNode *N); 292 SDValue visitFP_ROUND_INREG(SDNode *N); 293 SDValue visitFP_EXTEND(SDNode *N); 294 SDValue visitFNEG(SDNode *N); 295 SDValue visitFABS(SDNode *N); 296 SDValue visitFCEIL(SDNode *N); 297 SDValue visitFTRUNC(SDNode *N); 298 SDValue visitFFLOOR(SDNode *N); 299 SDValue visitFMINNUM(SDNode *N); 300 SDValue visitFMAXNUM(SDNode *N); 301 SDValue visitBRCOND(SDNode *N); 302 SDValue visitBR_CC(SDNode *N); 303 SDValue visitLOAD(SDNode *N); 304 305 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 306 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 307 308 SDValue visitSTORE(SDNode *N); 309 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 310 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 311 SDValue visitBUILD_VECTOR(SDNode *N); 312 SDValue visitCONCAT_VECTORS(SDNode *N); 313 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 314 SDValue visitVECTOR_SHUFFLE(SDNode *N); 315 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 316 SDValue visitINSERT_SUBVECTOR(SDNode *N); 317 SDValue visitMLOAD(SDNode *N); 318 SDValue visitMSTORE(SDNode *N); 319 SDValue visitMGATHER(SDNode *N); 320 SDValue visitMSCATTER(SDNode *N); 321 SDValue visitFP_TO_FP16(SDNode *N); 322 SDValue visitFP16_TO_FP(SDNode *N); 323 324 SDValue visitFADDForFMACombine(SDNode *N); 325 SDValue visitFSUBForFMACombine(SDNode *N); 326 SDValue visitFMULForFMACombine(SDNode *N); 327 328 SDValue XformToShuffleWithZero(SDNode *N); 329 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 330 331 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 332 333 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 334 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 335 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 336 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 337 SDValue N3, ISD::CondCode CC, 338 bool NotExtCompare = false); 339 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 340 SDLoc DL, bool foldBooleans = true); 341 342 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 343 SDValue &CC) const; 344 bool isOneUseSetCC(SDValue N) const; 345 346 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 347 unsigned HiOp); 348 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 349 SDValue CombineExtLoad(SDNode *N); 350 SDValue combineRepeatedFPDivisors(SDNode *N); 351 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 352 SDValue BuildSDIV(SDNode *N); 353 SDValue BuildSDIVPow2(SDNode *N); 354 SDValue BuildUDIV(SDNode *N); 355 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); 356 SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); 357 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, 358 SDNodeFlags *Flags); 359 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, 360 SDNodeFlags *Flags); 361 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 362 bool DemandHighBits = true); 363 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 364 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 365 SDValue InnerPos, SDValue InnerNeg, 366 unsigned PosOpcode, unsigned NegOpcode, 367 SDLoc DL); 368 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 369 SDValue ReduceLoadWidth(SDNode *N); 370 SDValue ReduceLoadOpStoreWidth(SDNode *N); 371 SDValue TransformFPLoadStorePair(SDNode *N); 372 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 373 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 374 375 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 376 377 /// Walk up chain skipping non-aliasing memory nodes, 378 /// looking for aliasing nodes and adding them to the Aliases vector. 379 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 380 SmallVectorImpl<SDValue> &Aliases); 381 382 /// Return true if there is any possibility that the two addresses overlap. 383 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 384 385 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 386 /// chain (aliasing node.) 387 SDValue FindBetterChain(SDNode *N, SDValue Chain); 388 389 /// Do FindBetterChain for a store and any possibly adjacent stores on 390 /// consecutive chains. 391 bool findBetterNeighborChains(StoreSDNode *St); 392 393 /// Holds a pointer to an LSBaseSDNode as well as information on where it 394 /// is located in a sequence of memory operations connected by a chain. 395 struct MemOpLink { 396 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 397 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 398 // Ptr to the mem node. 399 LSBaseSDNode *MemNode; 400 // Offset from the base ptr. 401 int64_t OffsetFromBase; 402 // What is the sequence number of this mem node. 403 // Lowest mem operand in the DAG starts at zero. 404 unsigned SequenceNum; 405 }; 406 407 /// This is a helper function for visitMUL to check the profitability 408 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 409 /// MulNode is the original multiply, AddNode is (add x, c1), 410 /// and ConstNode is c2. 411 bool isMulAddWithConstProfitable(SDNode *MulNode, 412 SDValue &AddNode, 413 SDValue &ConstNode); 414 415 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a 416 /// constant build_vector of the stored constant values in Stores. 417 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, 418 SDLoc SL, 419 ArrayRef<MemOpLink> Stores, 420 SmallVectorImpl<SDValue> &Chains, 421 EVT Ty) const; 422 423 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 424 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 425 /// the type of the loaded value to be extended. LoadedVT returns the type 426 /// of the original loaded value. NarrowLoad returns whether the load would 427 /// need to be narrowed in order to match. 428 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 429 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 430 bool &NarrowLoad); 431 432 /// This is a helper function for MergeConsecutiveStores. When the source 433 /// elements of the consecutive stores are all constants or all extracted 434 /// vector elements, try to merge them into one larger store. 435 /// \return True if a merged store was created. 436 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 437 EVT MemVT, unsigned NumStores, 438 bool IsConstantSrc, bool UseVector); 439 440 /// This is a helper function for MergeConsecutiveStores. 441 /// Stores that may be merged are placed in StoreNodes. 442 /// Loads that may alias with those stores are placed in AliasLoadNodes. 443 void getStoreMergeAndAliasCandidates( 444 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 445 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); 446 447 /// Merge consecutive store operations into a wide store. 448 /// This optimization uses wide integers or vectors when possible. 449 /// \return True if some memory operations were changed. 450 bool MergeConsecutiveStores(StoreSDNode *N); 451 452 /// \brief Try to transform a truncation where C is a constant: 453 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 454 /// 455 /// \p N needs to be a truncation and its first operand an AND. Other 456 /// requirements are checked by the function (e.g. that trunc is 457 /// single-use) and if missed an empty SDValue is returned. 458 SDValue distributeTruncateThroughAnd(SDNode *N); 459 460 public: 461 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 462 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 463 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 464 ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); 465 } 466 467 /// Runs the dag combiner on all nodes in the work list 468 void Run(CombineLevel AtLevel); 469 470 SelectionDAG &getDAG() const { return DAG; } 471 472 /// Returns a type large enough to hold any valid shift amount - before type 473 /// legalization these can be huge. 474 EVT getShiftAmountTy(EVT LHSTy) { 475 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 476 if (LHSTy.isVector()) 477 return LHSTy; 478 auto &DL = DAG.getDataLayout(); 479 return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) 480 : TLI.getPointerTy(DL); 481 } 482 483 /// This method returns true if we are running before type legalization or 484 /// if the specified VT is legal. 485 bool isTypeLegal(const EVT &VT) { 486 if (!LegalTypes) return true; 487 return TLI.isTypeLegal(VT); 488 } 489 490 /// Convenience wrapper around TargetLowering::getSetCCResultType 491 EVT getSetCCResultType(EVT VT) const { 492 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 493 } 494 }; 495 } 496 497 498 namespace { 499 /// This class is a DAGUpdateListener that removes any deleted 500 /// nodes from the worklist. 501 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 502 DAGCombiner &DC; 503 public: 504 explicit WorklistRemover(DAGCombiner &dc) 505 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 506 507 void NodeDeleted(SDNode *N, SDNode *E) override { 508 DC.removeFromWorklist(N); 509 } 510 }; 511 } 512 513 //===----------------------------------------------------------------------===// 514 // TargetLowering::DAGCombinerInfo implementation 515 //===----------------------------------------------------------------------===// 516 517 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 518 ((DAGCombiner*)DC)->AddToWorklist(N); 519 } 520 521 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 522 ((DAGCombiner*)DC)->removeFromWorklist(N); 523 } 524 525 SDValue TargetLowering::DAGCombinerInfo:: 526 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 527 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 528 } 529 530 SDValue TargetLowering::DAGCombinerInfo:: 531 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 532 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 533 } 534 535 536 SDValue TargetLowering::DAGCombinerInfo:: 537 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 538 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 539 } 540 541 void TargetLowering::DAGCombinerInfo:: 542 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 543 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 544 } 545 546 //===----------------------------------------------------------------------===// 547 // Helper Functions 548 //===----------------------------------------------------------------------===// 549 550 void DAGCombiner::deleteAndRecombine(SDNode *N) { 551 removeFromWorklist(N); 552 553 // If the operands of this node are only used by the node, they will now be 554 // dead. Make sure to re-visit them and recursively delete dead nodes. 555 for (const SDValue &Op : N->ops()) 556 // For an operand generating multiple values, one of the values may 557 // become dead allowing further simplification (e.g. split index 558 // arithmetic from an indexed load). 559 if (Op->hasOneUse() || Op->getNumValues() > 1) 560 AddToWorklist(Op.getNode()); 561 562 DAG.DeleteNode(N); 563 } 564 565 /// Return 1 if we can compute the negated form of the specified expression for 566 /// the same cost as the expression itself, or 2 if we can compute the negated 567 /// form more cheaply than the expression itself. 568 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 569 const TargetLowering &TLI, 570 const TargetOptions *Options, 571 unsigned Depth = 0) { 572 // fneg is removable even if it has multiple uses. 573 if (Op.getOpcode() == ISD::FNEG) return 2; 574 575 // Don't allow anything with multiple uses. 576 if (!Op.hasOneUse()) return 0; 577 578 // Don't recurse exponentially. 579 if (Depth > 6) return 0; 580 581 switch (Op.getOpcode()) { 582 default: return false; 583 case ISD::ConstantFP: 584 // Don't invert constant FP values after legalize. The negated constant 585 // isn't necessarily legal. 586 return LegalOperations ? 0 : 1; 587 case ISD::FADD: 588 // FIXME: determine better conditions for this xform. 589 if (!Options->UnsafeFPMath) return 0; 590 591 // After operation legalization, it might not be legal to create new FSUBs. 592 if (LegalOperations && 593 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 594 return 0; 595 596 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 597 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 598 Options, Depth + 1)) 599 return V; 600 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 601 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 602 Depth + 1); 603 case ISD::FSUB: 604 // We can't turn -(A-B) into B-A when we honor signed zeros. 605 if (!Options->UnsafeFPMath) return 0; 606 607 // fold (fneg (fsub A, B)) -> (fsub B, A) 608 return 1; 609 610 case ISD::FMUL: 611 case ISD::FDIV: 612 if (Options->HonorSignDependentRoundingFPMath()) return 0; 613 614 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 615 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 616 Options, Depth + 1)) 617 return V; 618 619 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 620 Depth + 1); 621 622 case ISD::FP_EXTEND: 623 case ISD::FP_ROUND: 624 case ISD::FSIN: 625 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 626 Depth + 1); 627 } 628 } 629 630 /// If isNegatibleForFree returns true, return the newly negated expression. 631 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 632 bool LegalOperations, unsigned Depth = 0) { 633 const TargetOptions &Options = DAG.getTarget().Options; 634 // fneg is removable even if it has multiple uses. 635 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 636 637 // Don't allow anything with multiple uses. 638 assert(Op.hasOneUse() && "Unknown reuse!"); 639 640 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 641 642 const SDNodeFlags *Flags = Op.getNode()->getFlags(); 643 644 switch (Op.getOpcode()) { 645 default: llvm_unreachable("Unknown code"); 646 case ISD::ConstantFP: { 647 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 648 V.changeSign(); 649 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 650 } 651 case ISD::FADD: 652 // FIXME: determine better conditions for this xform. 653 assert(Options.UnsafeFPMath); 654 655 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 656 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 657 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 658 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 659 GetNegatedExpression(Op.getOperand(0), DAG, 660 LegalOperations, Depth+1), 661 Op.getOperand(1), Flags); 662 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 663 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 664 GetNegatedExpression(Op.getOperand(1), DAG, 665 LegalOperations, Depth+1), 666 Op.getOperand(0), Flags); 667 case ISD::FSUB: 668 // We can't turn -(A-B) into B-A when we honor signed zeros. 669 assert(Options.UnsafeFPMath); 670 671 // fold (fneg (fsub 0, B)) -> B 672 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 673 if (N0CFP->isZero()) 674 return Op.getOperand(1); 675 676 // fold (fneg (fsub A, B)) -> (fsub B, A) 677 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 678 Op.getOperand(1), Op.getOperand(0), Flags); 679 680 case ISD::FMUL: 681 case ISD::FDIV: 682 assert(!Options.HonorSignDependentRoundingFPMath()); 683 684 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 685 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 686 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 687 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 688 GetNegatedExpression(Op.getOperand(0), DAG, 689 LegalOperations, Depth+1), 690 Op.getOperand(1), Flags); 691 692 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 693 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 694 Op.getOperand(0), 695 GetNegatedExpression(Op.getOperand(1), DAG, 696 LegalOperations, Depth+1), Flags); 697 698 case ISD::FP_EXTEND: 699 case ISD::FSIN: 700 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 701 GetNegatedExpression(Op.getOperand(0), DAG, 702 LegalOperations, Depth+1)); 703 case ISD::FP_ROUND: 704 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 705 GetNegatedExpression(Op.getOperand(0), DAG, 706 LegalOperations, Depth+1), 707 Op.getOperand(1)); 708 } 709 } 710 711 // Return true if this node is a setcc, or is a select_cc 712 // that selects between the target values used for true and false, making it 713 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 714 // the appropriate nodes based on the type of node we are checking. This 715 // simplifies life a bit for the callers. 716 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 717 SDValue &CC) const { 718 if (N.getOpcode() == ISD::SETCC) { 719 LHS = N.getOperand(0); 720 RHS = N.getOperand(1); 721 CC = N.getOperand(2); 722 return true; 723 } 724 725 if (N.getOpcode() != ISD::SELECT_CC || 726 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 727 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 728 return false; 729 730 if (TLI.getBooleanContents(N.getValueType()) == 731 TargetLowering::UndefinedBooleanContent) 732 return false; 733 734 LHS = N.getOperand(0); 735 RHS = N.getOperand(1); 736 CC = N.getOperand(4); 737 return true; 738 } 739 740 /// Return true if this is a SetCC-equivalent operation with only one use. 741 /// If this is true, it allows the users to invert the operation for free when 742 /// it is profitable to do so. 743 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 744 SDValue N0, N1, N2; 745 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 746 return true; 747 return false; 748 } 749 750 /// Returns true if N is a BUILD_VECTOR node whose 751 /// elements are all the same constant or undefined. 752 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 753 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 754 if (!C) 755 return false; 756 757 APInt SplatUndef; 758 unsigned SplatBitSize; 759 bool HasAnyUndefs; 760 EVT EltVT = N->getValueType(0).getVectorElementType(); 761 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 762 HasAnyUndefs) && 763 EltVT.getSizeInBits() >= SplatBitSize); 764 } 765 766 // \brief Returns the SDNode if it is a constant integer BuildVector 767 // or constant integer. 768 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { 769 if (isa<ConstantSDNode>(N)) 770 return N.getNode(); 771 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 772 return N.getNode(); 773 return nullptr; 774 } 775 776 // \brief Returns the SDNode if it is a constant float BuildVector 777 // or constant float. 778 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 779 if (isa<ConstantFPSDNode>(N)) 780 return N.getNode(); 781 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 782 return N.getNode(); 783 return nullptr; 784 } 785 786 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 787 // int. 788 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 789 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 790 return CN; 791 792 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 793 BitVector UndefElements; 794 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 795 796 // BuildVectors can truncate their operands. Ignore that case here. 797 // FIXME: We blindly ignore splats which include undef which is overly 798 // pessimistic. 799 if (CN && UndefElements.none() && 800 CN->getValueType(0) == N.getValueType().getScalarType()) 801 return CN; 802 } 803 804 return nullptr; 805 } 806 807 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 808 // float. 809 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 810 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 811 return CN; 812 813 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 814 BitVector UndefElements; 815 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 816 817 if (CN && UndefElements.none()) 818 return CN; 819 } 820 821 return nullptr; 822 } 823 824 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 825 SDValue N0, SDValue N1) { 826 EVT VT = N0.getValueType(); 827 if (N0.getOpcode() == Opc) { 828 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 829 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { 830 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 831 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 832 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 833 return SDValue(); 834 } 835 if (N0.hasOneUse()) { 836 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 837 // use 838 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 839 if (!OpNode.getNode()) 840 return SDValue(); 841 AddToWorklist(OpNode.getNode()); 842 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 843 } 844 } 845 } 846 847 if (N1.getOpcode() == Opc) { 848 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 849 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { 850 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 851 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 852 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 853 return SDValue(); 854 } 855 if (N1.hasOneUse()) { 856 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 857 // use 858 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 859 if (!OpNode.getNode()) 860 return SDValue(); 861 AddToWorklist(OpNode.getNode()); 862 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 863 } 864 } 865 } 866 867 return SDValue(); 868 } 869 870 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 871 bool AddTo) { 872 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 873 ++NodesCombined; 874 DEBUG(dbgs() << "\nReplacing.1 "; 875 N->dump(&DAG); 876 dbgs() << "\nWith: "; 877 To[0].getNode()->dump(&DAG); 878 dbgs() << " and " << NumTo-1 << " other values\n"); 879 for (unsigned i = 0, e = NumTo; i != e; ++i) 880 assert((!To[i].getNode() || 881 N->getValueType(i) == To[i].getValueType()) && 882 "Cannot combine value to value of different type!"); 883 884 WorklistRemover DeadNodes(*this); 885 DAG.ReplaceAllUsesWith(N, To); 886 if (AddTo) { 887 // Push the new nodes and any users onto the worklist 888 for (unsigned i = 0, e = NumTo; i != e; ++i) { 889 if (To[i].getNode()) { 890 AddToWorklist(To[i].getNode()); 891 AddUsersToWorklist(To[i].getNode()); 892 } 893 } 894 } 895 896 // Finally, if the node is now dead, remove it from the graph. The node 897 // may not be dead if the replacement process recursively simplified to 898 // something else needing this node. 899 if (N->use_empty()) 900 deleteAndRecombine(N); 901 return SDValue(N, 0); 902 } 903 904 void DAGCombiner:: 905 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 906 // Replace all uses. If any nodes become isomorphic to other nodes and 907 // are deleted, make sure to remove them from our worklist. 908 WorklistRemover DeadNodes(*this); 909 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 910 911 // Push the new node and any (possibly new) users onto the worklist. 912 AddToWorklist(TLO.New.getNode()); 913 AddUsersToWorklist(TLO.New.getNode()); 914 915 // Finally, if the node is now dead, remove it from the graph. The node 916 // may not be dead if the replacement process recursively simplified to 917 // something else needing this node. 918 if (TLO.Old.getNode()->use_empty()) 919 deleteAndRecombine(TLO.Old.getNode()); 920 } 921 922 /// Check the specified integer node value to see if it can be simplified or if 923 /// things it uses can be simplified by bit propagation. If so, return true. 924 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 925 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 926 APInt KnownZero, KnownOne; 927 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 928 return false; 929 930 // Revisit the node. 931 AddToWorklist(Op.getNode()); 932 933 // Replace the old value with the new one. 934 ++NodesCombined; 935 DEBUG(dbgs() << "\nReplacing.2 "; 936 TLO.Old.getNode()->dump(&DAG); 937 dbgs() << "\nWith: "; 938 TLO.New.getNode()->dump(&DAG); 939 dbgs() << '\n'); 940 941 CommitTargetLoweringOpt(TLO); 942 return true; 943 } 944 945 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 946 SDLoc dl(Load); 947 EVT VT = Load->getValueType(0); 948 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 949 950 DEBUG(dbgs() << "\nReplacing.9 "; 951 Load->dump(&DAG); 952 dbgs() << "\nWith: "; 953 Trunc.getNode()->dump(&DAG); 954 dbgs() << '\n'); 955 WorklistRemover DeadNodes(*this); 956 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 957 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 958 deleteAndRecombine(Load); 959 AddToWorklist(Trunc.getNode()); 960 } 961 962 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 963 Replace = false; 964 SDLoc dl(Op); 965 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 966 EVT MemVT = LD->getMemoryVT(); 967 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 968 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 969 : ISD::EXTLOAD) 970 : LD->getExtensionType(); 971 Replace = true; 972 return DAG.getExtLoad(ExtType, dl, PVT, 973 LD->getChain(), LD->getBasePtr(), 974 MemVT, LD->getMemOperand()); 975 } 976 977 unsigned Opc = Op.getOpcode(); 978 switch (Opc) { 979 default: break; 980 case ISD::AssertSext: 981 return DAG.getNode(ISD::AssertSext, dl, PVT, 982 SExtPromoteOperand(Op.getOperand(0), PVT), 983 Op.getOperand(1)); 984 case ISD::AssertZext: 985 return DAG.getNode(ISD::AssertZext, dl, PVT, 986 ZExtPromoteOperand(Op.getOperand(0), PVT), 987 Op.getOperand(1)); 988 case ISD::Constant: { 989 unsigned ExtOpc = 990 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 991 return DAG.getNode(ExtOpc, dl, PVT, Op); 992 } 993 } 994 995 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 996 return SDValue(); 997 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 998 } 999 1000 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1001 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1002 return SDValue(); 1003 EVT OldVT = Op.getValueType(); 1004 SDLoc dl(Op); 1005 bool Replace = false; 1006 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1007 if (!NewOp.getNode()) 1008 return SDValue(); 1009 AddToWorklist(NewOp.getNode()); 1010 1011 if (Replace) 1012 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1013 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 1014 DAG.getValueType(OldVT)); 1015 } 1016 1017 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1018 EVT OldVT = Op.getValueType(); 1019 SDLoc dl(Op); 1020 bool Replace = false; 1021 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1022 if (!NewOp.getNode()) 1023 return SDValue(); 1024 AddToWorklist(NewOp.getNode()); 1025 1026 if (Replace) 1027 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1028 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 1029 } 1030 1031 /// Promote the specified integer binary operation if the target indicates it is 1032 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1033 /// i32 since i16 instructions are longer. 1034 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1035 if (!LegalOperations) 1036 return SDValue(); 1037 1038 EVT VT = Op.getValueType(); 1039 if (VT.isVector() || !VT.isInteger()) 1040 return SDValue(); 1041 1042 // If operation type is 'undesirable', e.g. i16 on x86, consider 1043 // promoting it. 1044 unsigned Opc = Op.getOpcode(); 1045 if (TLI.isTypeDesirableForOp(Opc, VT)) 1046 return SDValue(); 1047 1048 EVT PVT = VT; 1049 // Consult target whether it is a good idea to promote this operation and 1050 // what's the right type to promote it to. 1051 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1052 assert(PVT != VT && "Don't know what type to promote to!"); 1053 1054 bool Replace0 = false; 1055 SDValue N0 = Op.getOperand(0); 1056 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1057 if (!NN0.getNode()) 1058 return SDValue(); 1059 1060 bool Replace1 = false; 1061 SDValue N1 = Op.getOperand(1); 1062 SDValue NN1; 1063 if (N0 == N1) 1064 NN1 = NN0; 1065 else { 1066 NN1 = PromoteOperand(N1, PVT, Replace1); 1067 if (!NN1.getNode()) 1068 return SDValue(); 1069 } 1070 1071 AddToWorklist(NN0.getNode()); 1072 if (NN1.getNode()) 1073 AddToWorklist(NN1.getNode()); 1074 1075 if (Replace0) 1076 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1077 if (Replace1) 1078 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1079 1080 DEBUG(dbgs() << "\nPromoting "; 1081 Op.getNode()->dump(&DAG)); 1082 SDLoc dl(Op); 1083 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1084 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1085 } 1086 return SDValue(); 1087 } 1088 1089 /// Promote the specified integer shift operation if the target indicates it is 1090 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1091 /// i32 since i16 instructions are longer. 1092 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1093 if (!LegalOperations) 1094 return SDValue(); 1095 1096 EVT VT = Op.getValueType(); 1097 if (VT.isVector() || !VT.isInteger()) 1098 return SDValue(); 1099 1100 // If operation type is 'undesirable', e.g. i16 on x86, consider 1101 // promoting it. 1102 unsigned Opc = Op.getOpcode(); 1103 if (TLI.isTypeDesirableForOp(Opc, VT)) 1104 return SDValue(); 1105 1106 EVT PVT = VT; 1107 // Consult target whether it is a good idea to promote this operation and 1108 // what's the right type to promote it to. 1109 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1110 assert(PVT != VT && "Don't know what type to promote to!"); 1111 1112 bool Replace = false; 1113 SDValue N0 = Op.getOperand(0); 1114 if (Opc == ISD::SRA) 1115 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1116 else if (Opc == ISD::SRL) 1117 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1118 else 1119 N0 = PromoteOperand(N0, PVT, Replace); 1120 if (!N0.getNode()) 1121 return SDValue(); 1122 1123 AddToWorklist(N0.getNode()); 1124 if (Replace) 1125 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1126 1127 DEBUG(dbgs() << "\nPromoting "; 1128 Op.getNode()->dump(&DAG)); 1129 SDLoc dl(Op); 1130 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1131 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1132 } 1133 return SDValue(); 1134 } 1135 1136 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1137 if (!LegalOperations) 1138 return SDValue(); 1139 1140 EVT VT = Op.getValueType(); 1141 if (VT.isVector() || !VT.isInteger()) 1142 return SDValue(); 1143 1144 // If operation type is 'undesirable', e.g. i16 on x86, consider 1145 // promoting it. 1146 unsigned Opc = Op.getOpcode(); 1147 if (TLI.isTypeDesirableForOp(Opc, VT)) 1148 return SDValue(); 1149 1150 EVT PVT = VT; 1151 // Consult target whether it is a good idea to promote this operation and 1152 // what's the right type to promote it to. 1153 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1154 assert(PVT != VT && "Don't know what type to promote to!"); 1155 // fold (aext (aext x)) -> (aext x) 1156 // fold (aext (zext x)) -> (zext x) 1157 // fold (aext (sext x)) -> (sext x) 1158 DEBUG(dbgs() << "\nPromoting "; 1159 Op.getNode()->dump(&DAG)); 1160 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1161 } 1162 return SDValue(); 1163 } 1164 1165 bool DAGCombiner::PromoteLoad(SDValue Op) { 1166 if (!LegalOperations) 1167 return false; 1168 1169 EVT VT = Op.getValueType(); 1170 if (VT.isVector() || !VT.isInteger()) 1171 return false; 1172 1173 // If operation type is 'undesirable', e.g. i16 on x86, consider 1174 // promoting it. 1175 unsigned Opc = Op.getOpcode(); 1176 if (TLI.isTypeDesirableForOp(Opc, VT)) 1177 return false; 1178 1179 EVT PVT = VT; 1180 // Consult target whether it is a good idea to promote this operation and 1181 // what's the right type to promote it to. 1182 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1183 assert(PVT != VT && "Don't know what type to promote to!"); 1184 1185 SDLoc dl(Op); 1186 SDNode *N = Op.getNode(); 1187 LoadSDNode *LD = cast<LoadSDNode>(N); 1188 EVT MemVT = LD->getMemoryVT(); 1189 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1190 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1191 : ISD::EXTLOAD) 1192 : LD->getExtensionType(); 1193 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1194 LD->getChain(), LD->getBasePtr(), 1195 MemVT, LD->getMemOperand()); 1196 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1197 1198 DEBUG(dbgs() << "\nPromoting "; 1199 N->dump(&DAG); 1200 dbgs() << "\nTo: "; 1201 Result.getNode()->dump(&DAG); 1202 dbgs() << '\n'); 1203 WorklistRemover DeadNodes(*this); 1204 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1205 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1206 deleteAndRecombine(N); 1207 AddToWorklist(Result.getNode()); 1208 return true; 1209 } 1210 return false; 1211 } 1212 1213 /// \brief Recursively delete a node which has no uses and any operands for 1214 /// which it is the only use. 1215 /// 1216 /// Note that this both deletes the nodes and removes them from the worklist. 1217 /// It also adds any nodes who have had a user deleted to the worklist as they 1218 /// may now have only one use and subject to other combines. 1219 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1220 if (!N->use_empty()) 1221 return false; 1222 1223 SmallSetVector<SDNode *, 16> Nodes; 1224 Nodes.insert(N); 1225 do { 1226 N = Nodes.pop_back_val(); 1227 if (!N) 1228 continue; 1229 1230 if (N->use_empty()) { 1231 for (const SDValue &ChildN : N->op_values()) 1232 Nodes.insert(ChildN.getNode()); 1233 1234 removeFromWorklist(N); 1235 DAG.DeleteNode(N); 1236 } else { 1237 AddToWorklist(N); 1238 } 1239 } while (!Nodes.empty()); 1240 return true; 1241 } 1242 1243 //===----------------------------------------------------------------------===// 1244 // Main DAG Combiner implementation 1245 //===----------------------------------------------------------------------===// 1246 1247 void DAGCombiner::Run(CombineLevel AtLevel) { 1248 // set the instance variables, so that the various visit routines may use it. 1249 Level = AtLevel; 1250 LegalOperations = Level >= AfterLegalizeVectorOps; 1251 LegalTypes = Level >= AfterLegalizeTypes; 1252 1253 // Add all the dag nodes to the worklist. 1254 for (SDNode &Node : DAG.allnodes()) 1255 AddToWorklist(&Node); 1256 1257 // Create a dummy node (which is not added to allnodes), that adds a reference 1258 // to the root node, preventing it from being deleted, and tracking any 1259 // changes of the root. 1260 HandleSDNode Dummy(DAG.getRoot()); 1261 1262 // while the worklist isn't empty, find a node and 1263 // try and combine it. 1264 while (!WorklistMap.empty()) { 1265 SDNode *N; 1266 // The Worklist holds the SDNodes in order, but it may contain null entries. 1267 do { 1268 N = Worklist.pop_back_val(); 1269 } while (!N); 1270 1271 bool GoodWorklistEntry = WorklistMap.erase(N); 1272 (void)GoodWorklistEntry; 1273 assert(GoodWorklistEntry && 1274 "Found a worklist entry without a corresponding map entry!"); 1275 1276 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1277 // N is deleted from the DAG, since they too may now be dead or may have a 1278 // reduced number of uses, allowing other xforms. 1279 if (recursivelyDeleteUnusedNodes(N)) 1280 continue; 1281 1282 WorklistRemover DeadNodes(*this); 1283 1284 // If this combine is running after legalizing the DAG, re-legalize any 1285 // nodes pulled off the worklist. 1286 if (Level == AfterLegalizeDAG) { 1287 SmallSetVector<SDNode *, 16> UpdatedNodes; 1288 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1289 1290 for (SDNode *LN : UpdatedNodes) { 1291 AddToWorklist(LN); 1292 AddUsersToWorklist(LN); 1293 } 1294 if (!NIsValid) 1295 continue; 1296 } 1297 1298 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1299 1300 // Add any operands of the new node which have not yet been combined to the 1301 // worklist as well. Because the worklist uniques things already, this 1302 // won't repeatedly process the same operand. 1303 CombinedNodes.insert(N); 1304 for (const SDValue &ChildN : N->op_values()) 1305 if (!CombinedNodes.count(ChildN.getNode())) 1306 AddToWorklist(ChildN.getNode()); 1307 1308 SDValue RV = combine(N); 1309 1310 if (!RV.getNode()) 1311 continue; 1312 1313 ++NodesCombined; 1314 1315 // If we get back the same node we passed in, rather than a new node or 1316 // zero, we know that the node must have defined multiple values and 1317 // CombineTo was used. Since CombineTo takes care of the worklist 1318 // mechanics for us, we have no work to do in this case. 1319 if (RV.getNode() == N) 1320 continue; 1321 1322 assert(N->getOpcode() != ISD::DELETED_NODE && 1323 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1324 "Node was deleted but visit returned new node!"); 1325 1326 DEBUG(dbgs() << " ... into: "; 1327 RV.getNode()->dump(&DAG)); 1328 1329 // Transfer debug value. 1330 DAG.TransferDbgValues(SDValue(N, 0), RV); 1331 if (N->getNumValues() == RV.getNode()->getNumValues()) 1332 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1333 else { 1334 assert(N->getValueType(0) == RV.getValueType() && 1335 N->getNumValues() == 1 && "Type mismatch"); 1336 SDValue OpV = RV; 1337 DAG.ReplaceAllUsesWith(N, &OpV); 1338 } 1339 1340 // Push the new node and any users onto the worklist 1341 AddToWorklist(RV.getNode()); 1342 AddUsersToWorklist(RV.getNode()); 1343 1344 // Finally, if the node is now dead, remove it from the graph. The node 1345 // may not be dead if the replacement process recursively simplified to 1346 // something else needing this node. This will also take care of adding any 1347 // operands which have lost a user to the worklist. 1348 recursivelyDeleteUnusedNodes(N); 1349 } 1350 1351 // If the root changed (e.g. it was a dead load, update the root). 1352 DAG.setRoot(Dummy.getValue()); 1353 DAG.RemoveDeadNodes(); 1354 } 1355 1356 SDValue DAGCombiner::visit(SDNode *N) { 1357 switch (N->getOpcode()) { 1358 default: break; 1359 case ISD::TokenFactor: return visitTokenFactor(N); 1360 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1361 case ISD::ADD: return visitADD(N); 1362 case ISD::SUB: return visitSUB(N); 1363 case ISD::ADDC: return visitADDC(N); 1364 case ISD::SUBC: return visitSUBC(N); 1365 case ISD::ADDE: return visitADDE(N); 1366 case ISD::SUBE: return visitSUBE(N); 1367 case ISD::MUL: return visitMUL(N); 1368 case ISD::SDIV: return visitSDIV(N); 1369 case ISD::UDIV: return visitUDIV(N); 1370 case ISD::SREM: 1371 case ISD::UREM: return visitREM(N); 1372 case ISD::MULHU: return visitMULHU(N); 1373 case ISD::MULHS: return visitMULHS(N); 1374 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1375 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1376 case ISD::SMULO: return visitSMULO(N); 1377 case ISD::UMULO: return visitUMULO(N); 1378 case ISD::SMIN: 1379 case ISD::SMAX: 1380 case ISD::UMIN: 1381 case ISD::UMAX: return visitIMINMAX(N); 1382 case ISD::AND: return visitAND(N); 1383 case ISD::OR: return visitOR(N); 1384 case ISD::XOR: return visitXOR(N); 1385 case ISD::SHL: return visitSHL(N); 1386 case ISD::SRA: return visitSRA(N); 1387 case ISD::SRL: return visitSRL(N); 1388 case ISD::ROTR: 1389 case ISD::ROTL: return visitRotate(N); 1390 case ISD::BSWAP: return visitBSWAP(N); 1391 case ISD::CTLZ: return visitCTLZ(N); 1392 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1393 case ISD::CTTZ: return visitCTTZ(N); 1394 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1395 case ISD::CTPOP: return visitCTPOP(N); 1396 case ISD::SELECT: return visitSELECT(N); 1397 case ISD::VSELECT: return visitVSELECT(N); 1398 case ISD::SELECT_CC: return visitSELECT_CC(N); 1399 case ISD::SETCC: return visitSETCC(N); 1400 case ISD::SETCCE: return visitSETCCE(N); 1401 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1402 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1403 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1404 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1405 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1406 case ISD::TRUNCATE: return visitTRUNCATE(N); 1407 case ISD::BITCAST: return visitBITCAST(N); 1408 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1409 case ISD::FADD: return visitFADD(N); 1410 case ISD::FSUB: return visitFSUB(N); 1411 case ISD::FMUL: return visitFMUL(N); 1412 case ISD::FMA: return visitFMA(N); 1413 case ISD::FDIV: return visitFDIV(N); 1414 case ISD::FREM: return visitFREM(N); 1415 case ISD::FSQRT: return visitFSQRT(N); 1416 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1417 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1418 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1419 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1420 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1421 case ISD::FP_ROUND: return visitFP_ROUND(N); 1422 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1423 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1424 case ISD::FNEG: return visitFNEG(N); 1425 case ISD::FABS: return visitFABS(N); 1426 case ISD::FFLOOR: return visitFFLOOR(N); 1427 case ISD::FMINNUM: return visitFMINNUM(N); 1428 case ISD::FMAXNUM: return visitFMAXNUM(N); 1429 case ISD::FCEIL: return visitFCEIL(N); 1430 case ISD::FTRUNC: return visitFTRUNC(N); 1431 case ISD::BRCOND: return visitBRCOND(N); 1432 case ISD::BR_CC: return visitBR_CC(N); 1433 case ISD::LOAD: return visitLOAD(N); 1434 case ISD::STORE: return visitSTORE(N); 1435 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1436 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1437 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1438 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1439 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1440 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1441 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1442 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1443 case ISD::MGATHER: return visitMGATHER(N); 1444 case ISD::MLOAD: return visitMLOAD(N); 1445 case ISD::MSCATTER: return visitMSCATTER(N); 1446 case ISD::MSTORE: return visitMSTORE(N); 1447 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1448 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1449 } 1450 return SDValue(); 1451 } 1452 1453 SDValue DAGCombiner::combine(SDNode *N) { 1454 SDValue RV = visit(N); 1455 1456 // If nothing happened, try a target-specific DAG combine. 1457 if (!RV.getNode()) { 1458 assert(N->getOpcode() != ISD::DELETED_NODE && 1459 "Node was deleted but visit returned NULL!"); 1460 1461 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1462 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1463 1464 // Expose the DAG combiner to the target combiner impls. 1465 TargetLowering::DAGCombinerInfo 1466 DagCombineInfo(DAG, Level, false, this); 1467 1468 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1469 } 1470 } 1471 1472 // If nothing happened still, try promoting the operation. 1473 if (!RV.getNode()) { 1474 switch (N->getOpcode()) { 1475 default: break; 1476 case ISD::ADD: 1477 case ISD::SUB: 1478 case ISD::MUL: 1479 case ISD::AND: 1480 case ISD::OR: 1481 case ISD::XOR: 1482 RV = PromoteIntBinOp(SDValue(N, 0)); 1483 break; 1484 case ISD::SHL: 1485 case ISD::SRA: 1486 case ISD::SRL: 1487 RV = PromoteIntShiftOp(SDValue(N, 0)); 1488 break; 1489 case ISD::SIGN_EXTEND: 1490 case ISD::ZERO_EXTEND: 1491 case ISD::ANY_EXTEND: 1492 RV = PromoteExtend(SDValue(N, 0)); 1493 break; 1494 case ISD::LOAD: 1495 if (PromoteLoad(SDValue(N, 0))) 1496 RV = SDValue(N, 0); 1497 break; 1498 } 1499 } 1500 1501 // If N is a commutative binary node, try commuting it to enable more 1502 // sdisel CSE. 1503 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1504 N->getNumValues() == 1) { 1505 SDValue N0 = N->getOperand(0); 1506 SDValue N1 = N->getOperand(1); 1507 1508 // Constant operands are canonicalized to RHS. 1509 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1510 SDValue Ops[] = {N1, N0}; 1511 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1512 N->getFlags()); 1513 if (CSENode) 1514 return SDValue(CSENode, 0); 1515 } 1516 } 1517 1518 return RV; 1519 } 1520 1521 /// Given a node, return its input chain if it has one, otherwise return a null 1522 /// sd operand. 1523 static SDValue getInputChainForNode(SDNode *N) { 1524 if (unsigned NumOps = N->getNumOperands()) { 1525 if (N->getOperand(0).getValueType() == MVT::Other) 1526 return N->getOperand(0); 1527 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1528 return N->getOperand(NumOps-1); 1529 for (unsigned i = 1; i < NumOps-1; ++i) 1530 if (N->getOperand(i).getValueType() == MVT::Other) 1531 return N->getOperand(i); 1532 } 1533 return SDValue(); 1534 } 1535 1536 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1537 // If N has two operands, where one has an input chain equal to the other, 1538 // the 'other' chain is redundant. 1539 if (N->getNumOperands() == 2) { 1540 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1541 return N->getOperand(0); 1542 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1543 return N->getOperand(1); 1544 } 1545 1546 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1547 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1548 SmallPtrSet<SDNode*, 16> SeenOps; 1549 bool Changed = false; // If we should replace this token factor. 1550 1551 // Start out with this token factor. 1552 TFs.push_back(N); 1553 1554 // Iterate through token factors. The TFs grows when new token factors are 1555 // encountered. 1556 for (unsigned i = 0; i < TFs.size(); ++i) { 1557 SDNode *TF = TFs[i]; 1558 1559 // Check each of the operands. 1560 for (const SDValue &Op : TF->op_values()) { 1561 1562 switch (Op.getOpcode()) { 1563 case ISD::EntryToken: 1564 // Entry tokens don't need to be added to the list. They are 1565 // redundant. 1566 Changed = true; 1567 break; 1568 1569 case ISD::TokenFactor: 1570 if (Op.hasOneUse() && 1571 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1572 // Queue up for processing. 1573 TFs.push_back(Op.getNode()); 1574 // Clean up in case the token factor is removed. 1575 AddToWorklist(Op.getNode()); 1576 Changed = true; 1577 break; 1578 } 1579 // Fall thru 1580 1581 default: 1582 // Only add if it isn't already in the list. 1583 if (SeenOps.insert(Op.getNode()).second) 1584 Ops.push_back(Op); 1585 else 1586 Changed = true; 1587 break; 1588 } 1589 } 1590 } 1591 1592 SDValue Result; 1593 1594 // If we've changed things around then replace token factor. 1595 if (Changed) { 1596 if (Ops.empty()) { 1597 // The entry token is the only possible outcome. 1598 Result = DAG.getEntryNode(); 1599 } else { 1600 // New and improved token factor. 1601 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1602 } 1603 1604 // Add users to worklist if AA is enabled, since it may introduce 1605 // a lot of new chained token factors while removing memory deps. 1606 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1607 : DAG.getSubtarget().useAA(); 1608 return CombineTo(N, Result, UseAA /*add to worklist*/); 1609 } 1610 1611 return Result; 1612 } 1613 1614 /// MERGE_VALUES can always be eliminated. 1615 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1616 WorklistRemover DeadNodes(*this); 1617 // Replacing results may cause a different MERGE_VALUES to suddenly 1618 // be CSE'd with N, and carry its uses with it. Iterate until no 1619 // uses remain, to ensure that the node can be safely deleted. 1620 // First add the users of this node to the work list so that they 1621 // can be tried again once they have new operands. 1622 AddUsersToWorklist(N); 1623 do { 1624 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1625 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1626 } while (!N->use_empty()); 1627 deleteAndRecombine(N); 1628 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1629 } 1630 1631 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a 1632 /// ContantSDNode pointer else nullptr. 1633 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1634 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1635 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1636 } 1637 1638 SDValue DAGCombiner::visitADD(SDNode *N) { 1639 SDValue N0 = N->getOperand(0); 1640 SDValue N1 = N->getOperand(1); 1641 EVT VT = N0.getValueType(); 1642 1643 // fold vector ops 1644 if (VT.isVector()) { 1645 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1646 return FoldedVOp; 1647 1648 // fold (add x, 0) -> x, vector edition 1649 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1650 return N0; 1651 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1652 return N1; 1653 } 1654 1655 // fold (add x, undef) -> undef 1656 if (N0.getOpcode() == ISD::UNDEF) 1657 return N0; 1658 if (N1.getOpcode() == ISD::UNDEF) 1659 return N1; 1660 // fold (add c1, c2) -> c1+c2 1661 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1662 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1663 if (N0C && N1C) 1664 return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); 1665 // canonicalize constant to RHS 1666 if (isConstantIntBuildVectorOrConstantInt(N0) && 1667 !isConstantIntBuildVectorOrConstantInt(N1)) 1668 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1669 // fold (add x, 0) -> x 1670 if (isNullConstant(N1)) 1671 return N0; 1672 // fold (add Sym, c) -> Sym+c 1673 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1674 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1675 GA->getOpcode() == ISD::GlobalAddress) 1676 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1677 GA->getOffset() + 1678 (uint64_t)N1C->getSExtValue()); 1679 // fold ((c1-A)+c2) -> (c1+c2)-A 1680 if (N1C && N0.getOpcode() == ISD::SUB) 1681 if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { 1682 SDLoc DL(N); 1683 return DAG.getNode(ISD::SUB, DL, VT, 1684 DAG.getConstant(N1C->getAPIntValue()+ 1685 N0C->getAPIntValue(), DL, VT), 1686 N0.getOperand(1)); 1687 } 1688 // reassociate add 1689 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1690 return RADD; 1691 // fold ((0-A) + B) -> B-A 1692 if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) 1693 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1694 // fold (A + (0-B)) -> A-B 1695 if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) 1696 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1697 // fold (A+(B-A)) -> B 1698 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1699 return N1.getOperand(0); 1700 // fold ((B-A)+A) -> B 1701 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1702 return N0.getOperand(0); 1703 // fold (A+(B-(A+C))) to (B-C) 1704 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1705 N0 == N1.getOperand(1).getOperand(0)) 1706 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1707 N1.getOperand(1).getOperand(1)); 1708 // fold (A+(B-(C+A))) to (B-C) 1709 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1710 N0 == N1.getOperand(1).getOperand(1)) 1711 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1712 N1.getOperand(1).getOperand(0)); 1713 // fold (A+((B-A)+or-C)) to (B+or-C) 1714 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1715 N1.getOperand(0).getOpcode() == ISD::SUB && 1716 N0 == N1.getOperand(0).getOperand(1)) 1717 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1718 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1719 1720 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1721 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1722 SDValue N00 = N0.getOperand(0); 1723 SDValue N01 = N0.getOperand(1); 1724 SDValue N10 = N1.getOperand(0); 1725 SDValue N11 = N1.getOperand(1); 1726 1727 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1728 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1729 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1730 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1731 } 1732 1733 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1734 return SDValue(N, 0); 1735 1736 // fold (a+b) -> (a|b) iff a and b share no bits. 1737 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 1738 VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) 1739 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1740 1741 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1742 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 1743 isNullConstant(N1.getOperand(0).getOperand(0))) 1744 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1745 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1746 N1.getOperand(0).getOperand(1), 1747 N1.getOperand(1))); 1748 if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && 1749 isNullConstant(N0.getOperand(0).getOperand(0))) 1750 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1751 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1752 N0.getOperand(0).getOperand(1), 1753 N0.getOperand(1))); 1754 1755 if (N1.getOpcode() == ISD::AND) { 1756 SDValue AndOp0 = N1.getOperand(0); 1757 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1758 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1759 1760 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1761 // and similar xforms where the inner op is either ~0 or 0. 1762 if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { 1763 SDLoc DL(N); 1764 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1765 } 1766 } 1767 1768 // add (sext i1), X -> sub X, (zext i1) 1769 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1770 N0.getOperand(0).getValueType() == MVT::i1 && 1771 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1772 SDLoc DL(N); 1773 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1774 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1775 } 1776 1777 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1778 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1779 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1780 if (TN->getVT() == MVT::i1) { 1781 SDLoc DL(N); 1782 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1783 DAG.getConstant(1, DL, VT)); 1784 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1785 } 1786 } 1787 1788 return SDValue(); 1789 } 1790 1791 SDValue DAGCombiner::visitADDC(SDNode *N) { 1792 SDValue N0 = N->getOperand(0); 1793 SDValue N1 = N->getOperand(1); 1794 EVT VT = N0.getValueType(); 1795 1796 // If the flag result is dead, turn this into an ADD. 1797 if (!N->hasAnyUseOfValue(1)) 1798 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1799 DAG.getNode(ISD::CARRY_FALSE, 1800 SDLoc(N), MVT::Glue)); 1801 1802 // canonicalize constant to RHS. 1803 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1804 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1805 if (N0C && !N1C) 1806 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1807 1808 // fold (addc x, 0) -> x + no carry out 1809 if (isNullConstant(N1)) 1810 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1811 SDLoc(N), MVT::Glue)); 1812 1813 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1814 APInt LHSZero, LHSOne; 1815 APInt RHSZero, RHSOne; 1816 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1817 1818 if (LHSZero.getBoolValue()) { 1819 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1820 1821 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1822 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1823 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1824 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1825 DAG.getNode(ISD::CARRY_FALSE, 1826 SDLoc(N), MVT::Glue)); 1827 } 1828 1829 return SDValue(); 1830 } 1831 1832 SDValue DAGCombiner::visitADDE(SDNode *N) { 1833 SDValue N0 = N->getOperand(0); 1834 SDValue N1 = N->getOperand(1); 1835 SDValue CarryIn = N->getOperand(2); 1836 1837 // canonicalize constant to RHS 1838 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1839 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1840 if (N0C && !N1C) 1841 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1842 N1, N0, CarryIn); 1843 1844 // fold (adde x, y, false) -> (addc x, y) 1845 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1846 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1847 1848 return SDValue(); 1849 } 1850 1851 // Since it may not be valid to emit a fold to zero for vector initializers 1852 // check if we can before folding. 1853 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1854 SelectionDAG &DAG, 1855 bool LegalOperations, bool LegalTypes) { 1856 if (!VT.isVector()) 1857 return DAG.getConstant(0, DL, VT); 1858 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1859 return DAG.getConstant(0, DL, VT); 1860 return SDValue(); 1861 } 1862 1863 SDValue DAGCombiner::visitSUB(SDNode *N) { 1864 SDValue N0 = N->getOperand(0); 1865 SDValue N1 = N->getOperand(1); 1866 EVT VT = N0.getValueType(); 1867 1868 // fold vector ops 1869 if (VT.isVector()) { 1870 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1871 return FoldedVOp; 1872 1873 // fold (sub x, 0) -> x, vector edition 1874 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1875 return N0; 1876 } 1877 1878 // fold (sub x, x) -> 0 1879 // FIXME: Refactor this and xor and other similar operations together. 1880 if (N0 == N1) 1881 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1882 // fold (sub c1, c2) -> c1-c2 1883 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1884 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1885 if (N0C && N1C) 1886 return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); 1887 // fold (sub x, c) -> (add x, -c) 1888 if (N1C) { 1889 SDLoc DL(N); 1890 return DAG.getNode(ISD::ADD, DL, VT, N0, 1891 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 1892 } 1893 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1894 if (isAllOnesConstant(N0)) 1895 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1896 // fold A-(A-B) -> B 1897 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1898 return N1.getOperand(1); 1899 // fold (A+B)-A -> B 1900 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1901 return N0.getOperand(1); 1902 // fold (A+B)-B -> A 1903 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1904 return N0.getOperand(0); 1905 // fold C2-(A+C1) -> (C2-C1)-A 1906 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1907 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1908 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1909 SDLoc DL(N); 1910 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1911 DL, VT); 1912 return DAG.getNode(ISD::SUB, DL, VT, NewC, 1913 N1.getOperand(0)); 1914 } 1915 // fold ((A+(B+or-C))-B) -> A+or-C 1916 if (N0.getOpcode() == ISD::ADD && 1917 (N0.getOperand(1).getOpcode() == ISD::SUB || 1918 N0.getOperand(1).getOpcode() == ISD::ADD) && 1919 N0.getOperand(1).getOperand(0) == N1) 1920 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1921 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1922 // fold ((A+(C+B))-B) -> A+C 1923 if (N0.getOpcode() == ISD::ADD && 1924 N0.getOperand(1).getOpcode() == ISD::ADD && 1925 N0.getOperand(1).getOperand(1) == N1) 1926 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1927 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1928 // fold ((A-(B-C))-C) -> A-B 1929 if (N0.getOpcode() == ISD::SUB && 1930 N0.getOperand(1).getOpcode() == ISD::SUB && 1931 N0.getOperand(1).getOperand(1) == N1) 1932 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1933 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1934 1935 // If either operand of a sub is undef, the result is undef 1936 if (N0.getOpcode() == ISD::UNDEF) 1937 return N0; 1938 if (N1.getOpcode() == ISD::UNDEF) 1939 return N1; 1940 1941 // If the relocation model supports it, consider symbol offsets. 1942 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1943 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1944 // fold (sub Sym, c) -> Sym-c 1945 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1946 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1947 GA->getOffset() - 1948 (uint64_t)N1C->getSExtValue()); 1949 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1950 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1951 if (GA->getGlobal() == GB->getGlobal()) 1952 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1953 SDLoc(N), VT); 1954 } 1955 1956 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1957 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1958 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1959 if (TN->getVT() == MVT::i1) { 1960 SDLoc DL(N); 1961 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1962 DAG.getConstant(1, DL, VT)); 1963 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1964 } 1965 } 1966 1967 return SDValue(); 1968 } 1969 1970 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1971 SDValue N0 = N->getOperand(0); 1972 SDValue N1 = N->getOperand(1); 1973 EVT VT = N0.getValueType(); 1974 SDLoc DL(N); 1975 1976 // If the flag result is dead, turn this into an SUB. 1977 if (!N->hasAnyUseOfValue(1)) 1978 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 1979 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1980 1981 // fold (subc x, x) -> 0 + no borrow 1982 if (N0 == N1) 1983 return CombineTo(N, DAG.getConstant(0, DL, VT), 1984 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1985 1986 // fold (subc x, 0) -> x + no borrow 1987 if (isNullConstant(N1)) 1988 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1989 1990 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1991 if (isAllOnesConstant(N0)) 1992 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 1993 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1994 1995 return SDValue(); 1996 } 1997 1998 SDValue DAGCombiner::visitSUBE(SDNode *N) { 1999 SDValue N0 = N->getOperand(0); 2000 SDValue N1 = N->getOperand(1); 2001 SDValue CarryIn = N->getOperand(2); 2002 2003 // fold (sube x, y, false) -> (subc x, y) 2004 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2005 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2006 2007 return SDValue(); 2008 } 2009 2010 SDValue DAGCombiner::visitMUL(SDNode *N) { 2011 SDValue N0 = N->getOperand(0); 2012 SDValue N1 = N->getOperand(1); 2013 EVT VT = N0.getValueType(); 2014 2015 // fold (mul x, undef) -> 0 2016 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2017 return DAG.getConstant(0, SDLoc(N), VT); 2018 2019 bool N0IsConst = false; 2020 bool N1IsConst = false; 2021 bool N1IsOpaqueConst = false; 2022 bool N0IsOpaqueConst = false; 2023 APInt ConstValue0, ConstValue1; 2024 // fold vector ops 2025 if (VT.isVector()) { 2026 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2027 return FoldedVOp; 2028 2029 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 2030 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 2031 } else { 2032 N0IsConst = isa<ConstantSDNode>(N0); 2033 if (N0IsConst) { 2034 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2035 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2036 } 2037 N1IsConst = isa<ConstantSDNode>(N1); 2038 if (N1IsConst) { 2039 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2040 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2041 } 2042 } 2043 2044 // fold (mul c1, c2) -> c1*c2 2045 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2046 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2047 N0.getNode(), N1.getNode()); 2048 2049 // canonicalize constant to RHS (vector doesn't have to splat) 2050 if (isConstantIntBuildVectorOrConstantInt(N0) && 2051 !isConstantIntBuildVectorOrConstantInt(N1)) 2052 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2053 // fold (mul x, 0) -> 0 2054 if (N1IsConst && ConstValue1 == 0) 2055 return N1; 2056 // We require a splat of the entire scalar bit width for non-contiguous 2057 // bit patterns. 2058 bool IsFullSplat = 2059 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2060 // fold (mul x, 1) -> x 2061 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2062 return N0; 2063 // fold (mul x, -1) -> 0-x 2064 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2065 SDLoc DL(N); 2066 return DAG.getNode(ISD::SUB, DL, VT, 2067 DAG.getConstant(0, DL, VT), N0); 2068 } 2069 // fold (mul x, (1 << c)) -> x << c 2070 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && 2071 IsFullSplat) { 2072 SDLoc DL(N); 2073 return DAG.getNode(ISD::SHL, DL, VT, N0, 2074 DAG.getConstant(ConstValue1.logBase2(), DL, 2075 getShiftAmountTy(N0.getValueType()))); 2076 } 2077 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2078 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && 2079 IsFullSplat) { 2080 unsigned Log2Val = (-ConstValue1).logBase2(); 2081 SDLoc DL(N); 2082 // FIXME: If the input is something that is easily negated (e.g. a 2083 // single-use add), we should put the negate there. 2084 return DAG.getNode(ISD::SUB, DL, VT, 2085 DAG.getConstant(0, DL, VT), 2086 DAG.getNode(ISD::SHL, DL, VT, N0, 2087 DAG.getConstant(Log2Val, DL, 2088 getShiftAmountTy(N0.getValueType())))); 2089 } 2090 2091 APInt Val; 2092 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2093 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2094 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2095 isa<ConstantSDNode>(N0.getOperand(1)))) { 2096 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2097 N1, N0.getOperand(1)); 2098 AddToWorklist(C3.getNode()); 2099 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2100 N0.getOperand(0), C3); 2101 } 2102 2103 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2104 // use. 2105 { 2106 SDValue Sh(nullptr,0), Y(nullptr,0); 2107 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2108 if (N0.getOpcode() == ISD::SHL && 2109 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2110 isa<ConstantSDNode>(N0.getOperand(1))) && 2111 N0.getNode()->hasOneUse()) { 2112 Sh = N0; Y = N1; 2113 } else if (N1.getOpcode() == ISD::SHL && 2114 isa<ConstantSDNode>(N1.getOperand(1)) && 2115 N1.getNode()->hasOneUse()) { 2116 Sh = N1; Y = N0; 2117 } 2118 2119 if (Sh.getNode()) { 2120 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2121 Sh.getOperand(0), Y); 2122 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2123 Mul, Sh.getOperand(1)); 2124 } 2125 } 2126 2127 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2128 if (isConstantIntBuildVectorOrConstantInt(N1) && 2129 N0.getOpcode() == ISD::ADD && 2130 isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2131 isMulAddWithConstProfitable(N, N0, N1)) 2132 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2133 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2134 N0.getOperand(0), N1), 2135 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2136 N0.getOperand(1), N1)); 2137 2138 // reassociate mul 2139 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2140 return RMUL; 2141 2142 return SDValue(); 2143 } 2144 2145 /// Return true if divmod libcall is available. 2146 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2147 const TargetLowering &TLI) { 2148 RTLIB::Libcall LC; 2149 switch (Node->getSimpleValueType(0).SimpleTy) { 2150 default: return false; // No libcall for vector types. 2151 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2152 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2153 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2154 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2155 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 2156 } 2157 2158 return TLI.getLibcallName(LC) != nullptr; 2159 } 2160 2161 /// Issue divrem if both quotient and remainder are needed. 2162 SDValue DAGCombiner::useDivRem(SDNode *Node) { 2163 if (Node->use_empty()) 2164 return SDValue(); // This is a dead node, leave it alone. 2165 2166 EVT VT = Node->getValueType(0); 2167 if (!TLI.isTypeLegal(VT)) 2168 return SDValue(); 2169 2170 unsigned Opcode = Node->getOpcode(); 2171 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 2172 2173 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 2174 // If DIVREM is going to get expanded into a libcall, 2175 // but there is no libcall available, then don't combine. 2176 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 2177 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 2178 return SDValue(); 2179 2180 // If div is legal, it's better to do the normal expansion 2181 unsigned OtherOpcode = 0; 2182 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 2183 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 2184 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 2185 return SDValue(); 2186 } else { 2187 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2188 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 2189 return SDValue(); 2190 } 2191 2192 SDValue Op0 = Node->getOperand(0); 2193 SDValue Op1 = Node->getOperand(1); 2194 SDValue combined; 2195 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 2196 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 2197 SDNode *User = *UI; 2198 if (User == Node || User->use_empty()) 2199 continue; 2200 // Convert the other matching node(s), too; 2201 // otherwise, the DIVREM may get target-legalized into something 2202 // target-specific that we won't be able to recognize. 2203 unsigned UserOpc = User->getOpcode(); 2204 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 2205 User->getOperand(0) == Op0 && 2206 User->getOperand(1) == Op1) { 2207 if (!combined) { 2208 if (UserOpc == OtherOpcode) { 2209 SDVTList VTs = DAG.getVTList(VT, VT); 2210 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 2211 } else if (UserOpc == DivRemOpc) { 2212 combined = SDValue(User, 0); 2213 } else { 2214 assert(UserOpc == Opcode); 2215 continue; 2216 } 2217 } 2218 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 2219 CombineTo(User, combined); 2220 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 2221 CombineTo(User, combined.getValue(1)); 2222 } 2223 } 2224 return combined; 2225 } 2226 2227 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2228 SDValue N0 = N->getOperand(0); 2229 SDValue N1 = N->getOperand(1); 2230 EVT VT = N->getValueType(0); 2231 2232 // fold vector ops 2233 if (VT.isVector()) 2234 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2235 return FoldedVOp; 2236 2237 SDLoc DL(N); 2238 2239 // fold (sdiv c1, c2) -> c1/c2 2240 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2241 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2242 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 2243 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 2244 // fold (sdiv X, 1) -> X 2245 if (N1C && N1C->isOne()) 2246 return N0; 2247 // fold (sdiv X, -1) -> 0-X 2248 if (N1C && N1C->isAllOnesValue()) 2249 return DAG.getNode(ISD::SUB, DL, VT, 2250 DAG.getConstant(0, DL, VT), N0); 2251 2252 // If we know the sign bits of both operands are zero, strength reduce to a 2253 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2254 if (!VT.isVector()) { 2255 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2256 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 2257 } 2258 2259 // fold (sdiv X, pow2) -> simple ops after legalize 2260 // FIXME: We check for the exact bit here because the generic lowering gives 2261 // better results in that case. The target-specific lowering should learn how 2262 // to handle exact sdivs efficiently. 2263 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2264 !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && 2265 (N1C->getAPIntValue().isPowerOf2() || 2266 (-N1C->getAPIntValue()).isPowerOf2())) { 2267 // Target-specific implementation of sdiv x, pow2. 2268 if (SDValue Res = BuildSDIVPow2(N)) 2269 return Res; 2270 2271 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2272 2273 // Splat the sign bit into the register 2274 SDValue SGN = 2275 DAG.getNode(ISD::SRA, DL, VT, N0, 2276 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, 2277 getShiftAmountTy(N0.getValueType()))); 2278 AddToWorklist(SGN.getNode()); 2279 2280 // Add (N0 < 0) ? abs2 - 1 : 0; 2281 SDValue SRL = 2282 DAG.getNode(ISD::SRL, DL, VT, SGN, 2283 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, 2284 getShiftAmountTy(SGN.getValueType()))); 2285 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); 2286 AddToWorklist(SRL.getNode()); 2287 AddToWorklist(ADD.getNode()); // Divide by pow2 2288 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, 2289 DAG.getConstant(lg2, DL, 2290 getShiftAmountTy(ADD.getValueType()))); 2291 2292 // If we're dividing by a positive value, we're done. Otherwise, we must 2293 // negate the result. 2294 if (N1C->getAPIntValue().isNonNegative()) 2295 return SRA; 2296 2297 AddToWorklist(SRA.getNode()); 2298 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); 2299 } 2300 2301 // If integer divide is expensive and we satisfy the requirements, emit an 2302 // alternate sequence. Targets may check function attributes for size/speed 2303 // trade-offs. 2304 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2305 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2306 if (SDValue Op = BuildSDIV(N)) 2307 return Op; 2308 2309 // sdiv, srem -> sdivrem 2310 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2311 // Otherwise, we break the simplification logic in visitREM(). 2312 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2313 if (SDValue DivRem = useDivRem(N)) 2314 return DivRem; 2315 2316 // undef / X -> 0 2317 if (N0.getOpcode() == ISD::UNDEF) 2318 return DAG.getConstant(0, DL, VT); 2319 // X / undef -> undef 2320 if (N1.getOpcode() == ISD::UNDEF) 2321 return N1; 2322 2323 return SDValue(); 2324 } 2325 2326 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2327 SDValue N0 = N->getOperand(0); 2328 SDValue N1 = N->getOperand(1); 2329 EVT VT = N->getValueType(0); 2330 2331 // fold vector ops 2332 if (VT.isVector()) 2333 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2334 return FoldedVOp; 2335 2336 SDLoc DL(N); 2337 2338 // fold (udiv c1, c2) -> c1/c2 2339 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2340 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2341 if (N0C && N1C) 2342 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 2343 N0C, N1C)) 2344 return Folded; 2345 // fold (udiv x, (1 << c)) -> x >>u c 2346 if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) 2347 return DAG.getNode(ISD::SRL, DL, VT, N0, 2348 DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, 2349 getShiftAmountTy(N0.getValueType()))); 2350 2351 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2352 if (N1.getOpcode() == ISD::SHL) { 2353 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2354 if (SHC->getAPIntValue().isPowerOf2()) { 2355 EVT ADDVT = N1.getOperand(1).getValueType(); 2356 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, 2357 N1.getOperand(1), 2358 DAG.getConstant(SHC->getAPIntValue() 2359 .logBase2(), 2360 DL, ADDVT)); 2361 AddToWorklist(Add.getNode()); 2362 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 2363 } 2364 } 2365 } 2366 2367 // fold (udiv x, c) -> alternate 2368 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2369 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2370 if (SDValue Op = BuildUDIV(N)) 2371 return Op; 2372 2373 // sdiv, srem -> sdivrem 2374 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2375 // Otherwise, we break the simplification logic in visitREM(). 2376 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2377 if (SDValue DivRem = useDivRem(N)) 2378 return DivRem; 2379 2380 // undef / X -> 0 2381 if (N0.getOpcode() == ISD::UNDEF) 2382 return DAG.getConstant(0, DL, VT); 2383 // X / undef -> undef 2384 if (N1.getOpcode() == ISD::UNDEF) 2385 return N1; 2386 2387 return SDValue(); 2388 } 2389 2390 // handles ISD::SREM and ISD::UREM 2391 SDValue DAGCombiner::visitREM(SDNode *N) { 2392 unsigned Opcode = N->getOpcode(); 2393 SDValue N0 = N->getOperand(0); 2394 SDValue N1 = N->getOperand(1); 2395 EVT VT = N->getValueType(0); 2396 bool isSigned = (Opcode == ISD::SREM); 2397 SDLoc DL(N); 2398 2399 // fold (rem c1, c2) -> c1%c2 2400 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2401 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2402 if (N0C && N1C) 2403 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 2404 return Folded; 2405 2406 if (isSigned) { 2407 // If we know the sign bits of both operands are zero, strength reduce to a 2408 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2409 if (!VT.isVector()) { 2410 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2411 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 2412 } 2413 } else { 2414 // fold (urem x, pow2) -> (and x, pow2-1) 2415 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2416 N1C->getAPIntValue().isPowerOf2()) { 2417 return DAG.getNode(ISD::AND, DL, VT, N0, 2418 DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); 2419 } 2420 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2421 if (N1.getOpcode() == ISD::SHL) { 2422 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2423 if (SHC->getAPIntValue().isPowerOf2()) { 2424 SDValue Add = 2425 DAG.getNode(ISD::ADD, DL, VT, N1, 2426 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, 2427 VT)); 2428 AddToWorklist(Add.getNode()); 2429 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 2430 } 2431 } 2432 } 2433 } 2434 2435 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2436 2437 // If X/C can be simplified by the division-by-constant logic, lower 2438 // X%C to the equivalent of X-X/C*C. 2439 // To avoid mangling nodes, this simplification requires that the combine() 2440 // call for the speculative DIV must not cause a DIVREM conversion. We guard 2441 // against this by skipping the simplification if isIntDivCheap(). When 2442 // div is not cheap, combine will not return a DIVREM. Regardless, 2443 // checking cheapness here makes sense since the simplification results in 2444 // fatter code. 2445 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { 2446 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2447 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); 2448 AddToWorklist(Div.getNode()); 2449 SDValue OptimizedDiv = combine(Div.getNode()); 2450 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2451 assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && 2452 (OptimizedDiv.getOpcode() != ISD::SDIVREM)); 2453 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 2454 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 2455 AddToWorklist(Mul.getNode()); 2456 return Sub; 2457 } 2458 } 2459 2460 // sdiv, srem -> sdivrem 2461 if (SDValue DivRem = useDivRem(N)) 2462 return DivRem.getValue(1); 2463 2464 // undef % X -> 0 2465 if (N0.getOpcode() == ISD::UNDEF) 2466 return DAG.getConstant(0, DL, VT); 2467 // X % undef -> undef 2468 if (N1.getOpcode() == ISD::UNDEF) 2469 return N1; 2470 2471 return SDValue(); 2472 } 2473 2474 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2475 SDValue N0 = N->getOperand(0); 2476 SDValue N1 = N->getOperand(1); 2477 EVT VT = N->getValueType(0); 2478 SDLoc DL(N); 2479 2480 // fold (mulhs x, 0) -> 0 2481 if (isNullConstant(N1)) 2482 return N1; 2483 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2484 if (isOneConstant(N1)) { 2485 SDLoc DL(N); 2486 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 2487 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2488 DL, 2489 getShiftAmountTy(N0.getValueType()))); 2490 } 2491 // fold (mulhs x, undef) -> 0 2492 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2493 return DAG.getConstant(0, SDLoc(N), VT); 2494 2495 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2496 // plus a shift. 2497 if (VT.isSimple() && !VT.isVector()) { 2498 MVT Simple = VT.getSimpleVT(); 2499 unsigned SimpleSize = Simple.getSizeInBits(); 2500 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2501 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2502 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2503 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2504 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2505 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2506 DAG.getConstant(SimpleSize, DL, 2507 getShiftAmountTy(N1.getValueType()))); 2508 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2509 } 2510 } 2511 2512 return SDValue(); 2513 } 2514 2515 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2516 SDValue N0 = N->getOperand(0); 2517 SDValue N1 = N->getOperand(1); 2518 EVT VT = N->getValueType(0); 2519 SDLoc DL(N); 2520 2521 // fold (mulhu x, 0) -> 0 2522 if (isNullConstant(N1)) 2523 return N1; 2524 // fold (mulhu x, 1) -> 0 2525 if (isOneConstant(N1)) 2526 return DAG.getConstant(0, DL, N0.getValueType()); 2527 // fold (mulhu x, undef) -> 0 2528 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2529 return DAG.getConstant(0, DL, VT); 2530 2531 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2532 // plus a shift. 2533 if (VT.isSimple() && !VT.isVector()) { 2534 MVT Simple = VT.getSimpleVT(); 2535 unsigned SimpleSize = Simple.getSizeInBits(); 2536 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2537 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2538 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2539 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2540 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2541 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2542 DAG.getConstant(SimpleSize, DL, 2543 getShiftAmountTy(N1.getValueType()))); 2544 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2545 } 2546 } 2547 2548 return SDValue(); 2549 } 2550 2551 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2552 /// give the opcodes for the two computations that are being performed. Return 2553 /// true if a simplification was made. 2554 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2555 unsigned HiOp) { 2556 // If the high half is not needed, just compute the low half. 2557 bool HiExists = N->hasAnyUseOfValue(1); 2558 if (!HiExists && 2559 (!LegalOperations || 2560 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2561 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2562 return CombineTo(N, Res, Res); 2563 } 2564 2565 // If the low half is not needed, just compute the high half. 2566 bool LoExists = N->hasAnyUseOfValue(0); 2567 if (!LoExists && 2568 (!LegalOperations || 2569 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2570 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2571 return CombineTo(N, Res, Res); 2572 } 2573 2574 // If both halves are used, return as it is. 2575 if (LoExists && HiExists) 2576 return SDValue(); 2577 2578 // If the two computed results can be simplified separately, separate them. 2579 if (LoExists) { 2580 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2581 AddToWorklist(Lo.getNode()); 2582 SDValue LoOpt = combine(Lo.getNode()); 2583 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2584 (!LegalOperations || 2585 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2586 return CombineTo(N, LoOpt, LoOpt); 2587 } 2588 2589 if (HiExists) { 2590 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2591 AddToWorklist(Hi.getNode()); 2592 SDValue HiOpt = combine(Hi.getNode()); 2593 if (HiOpt.getNode() && HiOpt != Hi && 2594 (!LegalOperations || 2595 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2596 return CombineTo(N, HiOpt, HiOpt); 2597 } 2598 2599 return SDValue(); 2600 } 2601 2602 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2603 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 2604 return Res; 2605 2606 EVT VT = N->getValueType(0); 2607 SDLoc DL(N); 2608 2609 // If the type is twice as wide is legal, transform the mulhu to a wider 2610 // multiply plus a shift. 2611 if (VT.isSimple() && !VT.isVector()) { 2612 MVT Simple = VT.getSimpleVT(); 2613 unsigned SimpleSize = Simple.getSizeInBits(); 2614 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2615 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2616 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2617 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2618 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2619 // Compute the high part as N1. 2620 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2621 DAG.getConstant(SimpleSize, DL, 2622 getShiftAmountTy(Lo.getValueType()))); 2623 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2624 // Compute the low part as N0. 2625 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2626 return CombineTo(N, Lo, Hi); 2627 } 2628 } 2629 2630 return SDValue(); 2631 } 2632 2633 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2634 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 2635 return Res; 2636 2637 EVT VT = N->getValueType(0); 2638 SDLoc DL(N); 2639 2640 // If the type is twice as wide is legal, transform the mulhu to a wider 2641 // multiply plus a shift. 2642 if (VT.isSimple() && !VT.isVector()) { 2643 MVT Simple = VT.getSimpleVT(); 2644 unsigned SimpleSize = Simple.getSizeInBits(); 2645 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2646 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2647 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2648 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2649 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2650 // Compute the high part as N1. 2651 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2652 DAG.getConstant(SimpleSize, DL, 2653 getShiftAmountTy(Lo.getValueType()))); 2654 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2655 // Compute the low part as N0. 2656 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2657 return CombineTo(N, Lo, Hi); 2658 } 2659 } 2660 2661 return SDValue(); 2662 } 2663 2664 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2665 // (smulo x, 2) -> (saddo x, x) 2666 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2667 if (C2->getAPIntValue() == 2) 2668 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2669 N->getOperand(0), N->getOperand(0)); 2670 2671 return SDValue(); 2672 } 2673 2674 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2675 // (umulo x, 2) -> (uaddo x, x) 2676 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2677 if (C2->getAPIntValue() == 2) 2678 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2679 N->getOperand(0), N->getOperand(0)); 2680 2681 return SDValue(); 2682 } 2683 2684 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 2685 SDValue N0 = N->getOperand(0); 2686 SDValue N1 = N->getOperand(1); 2687 EVT VT = N0.getValueType(); 2688 2689 // fold vector ops 2690 if (VT.isVector()) 2691 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2692 return FoldedVOp; 2693 2694 // fold (add c1, c2) -> c1+c2 2695 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 2696 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2697 if (N0C && N1C) 2698 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 2699 2700 // canonicalize constant to RHS 2701 if (isConstantIntBuildVectorOrConstantInt(N0) && 2702 !isConstantIntBuildVectorOrConstantInt(N1)) 2703 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 2704 2705 return SDValue(); 2706 } 2707 2708 /// If this is a binary operator with two operands of the same opcode, try to 2709 /// simplify it. 2710 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2711 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2712 EVT VT = N0.getValueType(); 2713 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2714 2715 // Bail early if none of these transforms apply. 2716 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2717 2718 // For each of OP in AND/OR/XOR: 2719 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2720 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2721 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2722 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2723 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2724 // 2725 // do not sink logical op inside of a vector extend, since it may combine 2726 // into a vsetcc. 2727 EVT Op0VT = N0.getOperand(0).getValueType(); 2728 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2729 N0.getOpcode() == ISD::SIGN_EXTEND || 2730 N0.getOpcode() == ISD::BSWAP || 2731 // Avoid infinite looping with PromoteIntBinOp. 2732 (N0.getOpcode() == ISD::ANY_EXTEND && 2733 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2734 (N0.getOpcode() == ISD::TRUNCATE && 2735 (!TLI.isZExtFree(VT, Op0VT) || 2736 !TLI.isTruncateFree(Op0VT, VT)) && 2737 TLI.isTypeLegal(Op0VT))) && 2738 !VT.isVector() && 2739 Op0VT == N1.getOperand(0).getValueType() && 2740 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2741 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2742 N0.getOperand(0).getValueType(), 2743 N0.getOperand(0), N1.getOperand(0)); 2744 AddToWorklist(ORNode.getNode()); 2745 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2746 } 2747 2748 // For each of OP in SHL/SRL/SRA/AND... 2749 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2750 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2751 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2752 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2753 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2754 N0.getOperand(1) == N1.getOperand(1)) { 2755 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2756 N0.getOperand(0).getValueType(), 2757 N0.getOperand(0), N1.getOperand(0)); 2758 AddToWorklist(ORNode.getNode()); 2759 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2760 ORNode, N0.getOperand(1)); 2761 } 2762 2763 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2764 // Only perform this optimization after type legalization and before 2765 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2766 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2767 // we don't want to undo this promotion. 2768 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2769 // on scalars. 2770 if ((N0.getOpcode() == ISD::BITCAST || 2771 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2772 Level == AfterLegalizeTypes) { 2773 SDValue In0 = N0.getOperand(0); 2774 SDValue In1 = N1.getOperand(0); 2775 EVT In0Ty = In0.getValueType(); 2776 EVT In1Ty = In1.getValueType(); 2777 SDLoc DL(N); 2778 // If both incoming values are integers, and the original types are the 2779 // same. 2780 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2781 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2782 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2783 AddToWorklist(Op.getNode()); 2784 return BC; 2785 } 2786 } 2787 2788 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2789 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2790 // If both shuffles use the same mask, and both shuffle within a single 2791 // vector, then it is worthwhile to move the swizzle after the operation. 2792 // The type-legalizer generates this pattern when loading illegal 2793 // vector types from memory. In many cases this allows additional shuffle 2794 // optimizations. 2795 // There are other cases where moving the shuffle after the xor/and/or 2796 // is profitable even if shuffles don't perform a swizzle. 2797 // If both shuffles use the same mask, and both shuffles have the same first 2798 // or second operand, then it might still be profitable to move the shuffle 2799 // after the xor/and/or operation. 2800 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2801 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2802 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2803 2804 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2805 "Inputs to shuffles are not the same type"); 2806 2807 // Check that both shuffles use the same mask. The masks are known to be of 2808 // the same length because the result vector type is the same. 2809 // Check also that shuffles have only one use to avoid introducing extra 2810 // instructions. 2811 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2812 SVN0->getMask().equals(SVN1->getMask())) { 2813 SDValue ShOp = N0->getOperand(1); 2814 2815 // Don't try to fold this node if it requires introducing a 2816 // build vector of all zeros that might be illegal at this stage. 2817 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2818 if (!LegalTypes) 2819 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2820 else 2821 ShOp = SDValue(); 2822 } 2823 2824 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2825 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2826 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2827 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2828 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2829 N0->getOperand(0), N1->getOperand(0)); 2830 AddToWorklist(NewNode.getNode()); 2831 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2832 &SVN0->getMask()[0]); 2833 } 2834 2835 // Don't try to fold this node if it requires introducing a 2836 // build vector of all zeros that might be illegal at this stage. 2837 ShOp = N0->getOperand(0); 2838 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2839 if (!LegalTypes) 2840 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2841 else 2842 ShOp = SDValue(); 2843 } 2844 2845 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2846 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2847 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2848 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2849 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2850 N0->getOperand(1), N1->getOperand(1)); 2851 AddToWorklist(NewNode.getNode()); 2852 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2853 &SVN0->getMask()[0]); 2854 } 2855 } 2856 } 2857 2858 return SDValue(); 2859 } 2860 2861 /// This contains all DAGCombine rules which reduce two values combined by 2862 /// an And operation to a single value. This makes them reusable in the context 2863 /// of visitSELECT(). Rules involving constants are not included as 2864 /// visitSELECT() already handles those cases. 2865 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2866 SDNode *LocReference) { 2867 EVT VT = N1.getValueType(); 2868 2869 // fold (and x, undef) -> 0 2870 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2871 return DAG.getConstant(0, SDLoc(LocReference), VT); 2872 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2873 SDValue LL, LR, RL, RR, CC0, CC1; 2874 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2875 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2876 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2877 2878 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2879 LL.getValueType().isInteger()) { 2880 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2881 if (isNullConstant(LR) && Op1 == ISD::SETEQ) { 2882 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2883 LR.getValueType(), LL, RL); 2884 AddToWorklist(ORNode.getNode()); 2885 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2886 } 2887 if (isAllOnesConstant(LR)) { 2888 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2889 if (Op1 == ISD::SETEQ) { 2890 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2891 LR.getValueType(), LL, RL); 2892 AddToWorklist(ANDNode.getNode()); 2893 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2894 } 2895 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2896 if (Op1 == ISD::SETGT) { 2897 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2898 LR.getValueType(), LL, RL); 2899 AddToWorklist(ORNode.getNode()); 2900 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2901 } 2902 } 2903 } 2904 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2905 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2906 Op0 == Op1 && LL.getValueType().isInteger() && 2907 Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || 2908 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 2909 SDLoc DL(N0); 2910 SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), 2911 LL, DAG.getConstant(1, DL, 2912 LL.getValueType())); 2913 AddToWorklist(ADDNode.getNode()); 2914 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2915 DAG.getConstant(2, DL, LL.getValueType()), 2916 ISD::SETUGE); 2917 } 2918 // canonicalize equivalent to ll == rl 2919 if (LL == RR && LR == RL) { 2920 Op1 = ISD::getSetCCSwappedOperands(Op1); 2921 std::swap(RL, RR); 2922 } 2923 if (LL == RL && LR == RR) { 2924 bool isInteger = LL.getValueType().isInteger(); 2925 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2926 if (Result != ISD::SETCC_INVALID && 2927 (!LegalOperations || 2928 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2929 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 2930 EVT CCVT = getSetCCResultType(LL.getValueType()); 2931 if (N0.getValueType() == CCVT || 2932 (!LegalOperations && N0.getValueType() == MVT::i1)) 2933 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2934 LL, LR, Result); 2935 } 2936 } 2937 } 2938 2939 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2940 VT.getSizeInBits() <= 64) { 2941 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2942 APInt ADDC = ADDI->getAPIntValue(); 2943 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2944 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2945 // immediate for an add, but it is legal if its top c2 bits are set, 2946 // transform the ADD so the immediate doesn't need to be materialized 2947 // in a register. 2948 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2949 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2950 SRLI->getZExtValue()); 2951 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2952 ADDC |= Mask; 2953 if (TLI.isLegalAddImmediate(ADDC.