1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetLowering.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dagcombine" 44 45 STATISTIC(NodesCombined , "Number of dag nodes combined"); 46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50 STATISTIC(SlicedLoads, "Number of load sliced"); 51 52 namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65 #ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70 #endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84 //------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode *, 64> Worklist; 103 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode *, unsigned> WorklistMap; 110 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode *, 64> CombinedNodes; 116 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode *N) { 123 for (SDNode *Node : N->uses()) 124 AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode *N); 129 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode *N) { 134 // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode *N) { 145 CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode *N); 157 bool recursivelyDeleteUnusedNodes(SDNode *N); 158 159 /// Replaces all uses of the results of one DAG node with new values. 160 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 161 bool AddTo = true); 162 163 /// Replaces all uses of the results of one DAG node with new values. 164 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 165 return CombineTo(N, &Res, 1, AddTo); 166 } 167 168 /// Replaces all uses of the results of one DAG node with new values. 169 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 170 bool AddTo = true) { 171 SDValue To[] = { Res0, Res1 }; 172 return CombineTo(N, To, 2, AddTo); 173 } 174 175 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 176 177 private: 178 179 /// Check the specified integer node value to see if it can be simplified or 180 /// if things it uses can be simplified by bit propagation. 181 /// If so, return true. 182 bool SimplifyDemandedBits(SDValue Op) { 183 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 184 APInt Demanded = APInt::getAllOnesValue(BitWidth); 185 return SimplifyDemandedBits(Op, Demanded); 186 } 187 188 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 189 190 bool CombineToPreIndexedLoadStore(SDNode *N); 191 bool CombineToPostIndexedLoadStore(SDNode *N); 192 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 193 bool SliceUpLoad(SDNode *N); 194 195 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 196 /// load. 197 /// 198 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 199 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 200 /// \param EltNo index of the vector element to load. 201 /// \param OriginalLoad load that EVE came from to be replaced. 202 /// \returns EVE on success SDValue() on failure. 203 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 204 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 205 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 206 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 207 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 208 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 209 SDValue PromoteIntBinOp(SDValue Op); 210 SDValue PromoteIntShiftOp(SDValue Op); 211 SDValue PromoteExtend(SDValue Op); 212 bool PromoteLoad(SDValue Op); 213 214 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 215 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 216 ISD::NodeType ExtType); 217 218 /// Call the node-specific routine that knows how to fold each 219 /// particular type of node. If that doesn't do anything, try the 220 /// target-specific DAG combines. 221 SDValue combine(SDNode *N); 222 223 // Visitation implementation - Implement dag node combining for different 224 // node types. The semantics are as follows: 225 // Return Value: 226 // SDValue.getNode() == 0 - No change was made 227 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 228 // otherwise - N should be replaced by the returned Operand. 229 // 230 SDValue visitTokenFactor(SDNode *N); 231 SDValue visitMERGE_VALUES(SDNode *N); 232 SDValue visitADD(SDNode *N); 233 SDValue visitSUB(SDNode *N); 234 SDValue visitADDC(SDNode *N); 235 SDValue visitSUBC(SDNode *N); 236 SDValue visitADDE(SDNode *N); 237 SDValue visitSUBE(SDNode *N); 238 SDValue visitMUL(SDNode *N); 239 SDValue useDivRem(SDNode *N); 240 SDValue visitSDIV(SDNode *N); 241 SDValue visitUDIV(SDNode *N); 242 SDValue visitREM(SDNode *N); 243 SDValue visitMULHU(SDNode *N); 244 SDValue visitMULHS(SDNode *N); 245 SDValue visitSMUL_LOHI(SDNode *N); 246 SDValue visitUMUL_LOHI(SDNode *N); 247 SDValue visitSMULO(SDNode *N); 248 SDValue visitUMULO(SDNode *N); 249 SDValue visitIMINMAX(SDNode *N); 250 SDValue visitAND(SDNode *N); 251 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 252 SDValue visitOR(SDNode *N); 253 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 254 SDValue visitXOR(SDNode *N); 255 SDValue SimplifyVBinOp(SDNode *N); 256 SDValue visitSHL(SDNode *N); 257 SDValue visitSRA(SDNode *N); 258 SDValue visitSRL(SDNode *N); 259 SDValue visitRotate(SDNode *N); 260 SDValue visitBSWAP(SDNode *N); 261 SDValue visitCTLZ(SDNode *N); 262 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 263 SDValue visitCTTZ(SDNode *N); 264 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 265 SDValue visitCTPOP(SDNode *N); 266 SDValue visitSELECT(SDNode *N); 267 SDValue visitVSELECT(SDNode *N); 268 SDValue visitSELECT_CC(SDNode *N); 269 SDValue visitSETCC(SDNode *N); 270 SDValue visitSETCCE(SDNode *N); 271 SDValue visitSIGN_EXTEND(SDNode *N); 272 SDValue visitZERO_EXTEND(SDNode *N); 273 SDValue visitANY_EXTEND(SDNode *N); 274 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 275 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 276 SDValue visitTRUNCATE(SDNode *N); 277 SDValue visitBITCAST(SDNode *N); 278 SDValue visitBUILD_PAIR(SDNode *N); 279 SDValue visitFADD(SDNode *N); 280 SDValue visitFSUB(SDNode *N); 281 SDValue visitFMUL(SDNode *N); 282 SDValue visitFMA(SDNode *N); 283 SDValue visitFDIV(SDNode *N); 284 SDValue visitFREM(SDNode *N); 285 SDValue visitFSQRT(SDNode *N); 286 SDValue visitFCOPYSIGN(SDNode *N); 287 SDValue visitSINT_TO_FP(SDNode *N); 288 SDValue visitUINT_TO_FP(SDNode *N); 289 SDValue visitFP_TO_SINT(SDNode *N); 290 SDValue visitFP_TO_UINT(SDNode *N); 291 SDValue visitFP_ROUND(SDNode *N); 292 SDValue visitFP_ROUND_INREG(SDNode *N); 293 SDValue visitFP_EXTEND(SDNode *N); 294 SDValue visitFNEG(SDNode *N); 295 SDValue visitFABS(SDNode *N); 296 SDValue visitFCEIL(SDNode *N); 297 SDValue visitFTRUNC(SDNode *N); 298 SDValue visitFFLOOR(SDNode *N); 299 SDValue visitFMINNUM(SDNode *N); 300 SDValue visitFMAXNUM(SDNode *N); 301 SDValue visitBRCOND(SDNode *N); 302 SDValue visitBR_CC(SDNode *N); 303 SDValue visitLOAD(SDNode *N); 304 305 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 306 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 307 308 SDValue visitSTORE(SDNode *N); 309 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 310 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 311 SDValue visitBUILD_VECTOR(SDNode *N); 312 SDValue visitCONCAT_VECTORS(SDNode *N); 313 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 314 SDValue visitVECTOR_SHUFFLE(SDNode *N); 315 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 316 SDValue visitINSERT_SUBVECTOR(SDNode *N); 317 SDValue visitMLOAD(SDNode *N); 318 SDValue visitMSTORE(SDNode *N); 319 SDValue visitMGATHER(SDNode *N); 320 SDValue visitMSCATTER(SDNode *N); 321 SDValue visitFP_TO_FP16(SDNode *N); 322 SDValue visitFP16_TO_FP(SDNode *N); 323 324 SDValue visitFADDForFMACombine(SDNode *N); 325 SDValue visitFSUBForFMACombine(SDNode *N); 326 SDValue visitFMULForFMACombine(SDNode *N); 327 328 SDValue XformToShuffleWithZero(SDNode *N); 329 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 330 331 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 332 333 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 334 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 335 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 336 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 337 SDValue N3, ISD::CondCode CC, 338 bool NotExtCompare = false); 339 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 340 SDLoc DL, bool foldBooleans = true); 341 342 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 343 SDValue &CC) const; 344 bool isOneUseSetCC(SDValue N) const; 345 346 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 347 unsigned HiOp); 348 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 349 SDValue CombineExtLoad(SDNode *N); 350 SDValue combineRepeatedFPDivisors(SDNode *N); 351 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 352 SDValue BuildSDIV(SDNode *N); 353 SDValue BuildSDIVPow2(SDNode *N); 354 SDValue BuildUDIV(SDNode *N); 355 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); 356 SDValue BuildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); 357 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, 358 SDNodeFlags *Flags); 359 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, 360 SDNodeFlags *Flags); 361 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 362 bool DemandHighBits = true); 363 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 364 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 365 SDValue InnerPos, SDValue InnerNeg, 366 unsigned PosOpcode, unsigned NegOpcode, 367 SDLoc DL); 368 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 369 SDValue ReduceLoadWidth(SDNode *N); 370 SDValue ReduceLoadOpStoreWidth(SDNode *N); 371 SDValue TransformFPLoadStorePair(SDNode *N); 372 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 373 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 374 375 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 376 377 /// Walk up chain skipping non-aliasing memory nodes, 378 /// looking for aliasing nodes and adding them to the Aliases vector. 379 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 380 SmallVectorImpl<SDValue> &Aliases); 381 382 /// Return true if there is any possibility that the two addresses overlap. 383 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 384 385 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 386 /// chain (aliasing node.) 387 SDValue FindBetterChain(SDNode *N, SDValue Chain); 388 389 /// Do FindBetterChain for a store and any possibly adjacent stores on 390 /// consecutive chains. 391 bool findBetterNeighborChains(StoreSDNode *St); 392 393 /// Holds a pointer to an LSBaseSDNode as well as information on where it 394 /// is located in a sequence of memory operations connected by a chain. 395 struct MemOpLink { 396 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 397 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 398 // Ptr to the mem node. 399 LSBaseSDNode *MemNode; 400 // Offset from the base ptr. 401 int64_t OffsetFromBase; 402 // What is the sequence number of this mem node. 403 // Lowest mem operand in the DAG starts at zero. 404 unsigned SequenceNum; 405 }; 406 407 /// This is a helper function for visitMUL to check the profitability 408 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 409 /// MulNode is the original multiply, AddNode is (add x, c1), 410 /// and ConstNode is c2. 411 bool isMulAddWithConstProfitable(SDNode *MulNode, 412 SDValue &AddNode, 413 SDValue &ConstNode); 414 415 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a 416 /// constant build_vector of the stored constant values in Stores. 417 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, 418 SDLoc SL, 419 ArrayRef<MemOpLink> Stores, 420 SmallVectorImpl<SDValue> &Chains, 421 EVT Ty) const; 422 423 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 424 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 425 /// the type of the loaded value to be extended. LoadedVT returns the type 426 /// of the original loaded value. NarrowLoad returns whether the load would 427 /// need to be narrowed in order to match. 428 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 429 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 430 bool &NarrowLoad); 431 432 /// This is a helper function for MergeConsecutiveStores. When the source 433 /// elements of the consecutive stores are all constants or all extracted 434 /// vector elements, try to merge them into one larger store. 435 /// \return True if a merged store was created. 436 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 437 EVT MemVT, unsigned NumStores, 438 bool IsConstantSrc, bool UseVector); 439 440 /// This is a helper function for MergeConsecutiveStores. 441 /// Stores that may be merged are placed in StoreNodes. 442 /// Loads that may alias with those stores are placed in AliasLoadNodes. 443 void getStoreMergeAndAliasCandidates( 444 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 445 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); 446 447 /// Merge consecutive store operations into a wide store. 448 /// This optimization uses wide integers or vectors when possible. 449 /// \return True if some memory operations were changed. 450 bool MergeConsecutiveStores(StoreSDNode *N); 451 452 /// \brief Try to transform a truncation where C is a constant: 453 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 454 /// 455 /// \p N needs to be a truncation and its first operand an AND. Other 456 /// requirements are checked by the function (e.g. that trunc is 457 /// single-use) and if missed an empty SDValue is returned. 458 SDValue distributeTruncateThroughAnd(SDNode *N); 459 460 public: 461 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 462 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 463 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 464 ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); 465 } 466 467 /// Runs the dag combiner on all nodes in the work list 468 void Run(CombineLevel AtLevel); 469 470 SelectionDAG &getDAG() const { return DAG; } 471 472 /// Returns a type large enough to hold any valid shift amount - before type 473 /// legalization these can be huge. 474 EVT getShiftAmountTy(EVT LHSTy) { 475 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 476 if (LHSTy.isVector()) 477 return LHSTy; 478 auto &DL = DAG.getDataLayout(); 479 return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) 480 : TLI.getPointerTy(DL); 481 } 482 483 /// This method returns true if we are running before type legalization or 484 /// if the specified VT is legal. 485 bool isTypeLegal(const EVT &VT) { 486 if (!LegalTypes) return true; 487 return TLI.isTypeLegal(VT); 488 } 489 490 /// Convenience wrapper around TargetLowering::getSetCCResultType 491 EVT getSetCCResultType(EVT VT) const { 492 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 493 } 494 }; 495 } 496 497 498 namespace { 499 /// This class is a DAGUpdateListener that removes any deleted 500 /// nodes from the worklist. 501 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 502 DAGCombiner &DC; 503 public: 504 explicit WorklistRemover(DAGCombiner &dc) 505 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 506 507 void NodeDeleted(SDNode *N, SDNode *E) override { 508 DC.removeFromWorklist(N); 509 } 510 }; 511 } 512 513 //===----------------------------------------------------------------------===// 514 // TargetLowering::DAGCombinerInfo implementation 515 //===----------------------------------------------------------------------===// 516 517 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 518 ((DAGCombiner*)DC)->AddToWorklist(N); 519 } 520 521 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 522 ((DAGCombiner*)DC)->removeFromWorklist(N); 523 } 524 525 SDValue TargetLowering::DAGCombinerInfo:: 526 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 527 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 528 } 529 530 SDValue TargetLowering::DAGCombinerInfo:: 531 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 532 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 533 } 534 535 536 SDValue TargetLowering::DAGCombinerInfo:: 537 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 538 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 539 } 540 541 void TargetLowering::DAGCombinerInfo:: 542 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 543 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 544 } 545 546 //===----------------------------------------------------------------------===// 547 // Helper Functions 548 //===----------------------------------------------------------------------===// 549 550 void DAGCombiner::deleteAndRecombine(SDNode *N) { 551 removeFromWorklist(N); 552 553 // If the operands of this node are only used by the node, they will now be 554 // dead. Make sure to re-visit them and recursively delete dead nodes. 555 for (const SDValue &Op : N->ops()) 556 // For an operand generating multiple values, one of the values may 557 // become dead allowing further simplification (e.g. split index 558 // arithmetic from an indexed load). 559 if (Op->hasOneUse() || Op->getNumValues() > 1) 560 AddToWorklist(Op.getNode()); 561 562 DAG.DeleteNode(N); 563 } 564 565 /// Return 1 if we can compute the negated form of the specified expression for 566 /// the same cost as the expression itself, or 2 if we can compute the negated 567 /// form more cheaply than the expression itself. 568 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 569 const TargetLowering &TLI, 570 const TargetOptions *Options, 571 unsigned Depth = 0) { 572 // fneg is removable even if it has multiple uses. 573 if (Op.getOpcode() == ISD::FNEG) return 2; 574 575 // Don't allow anything with multiple uses. 576 if (!Op.hasOneUse()) return 0; 577 578 // Don't recurse exponentially. 579 if (Depth > 6) return 0; 580 581 switch (Op.getOpcode()) { 582 default: return false; 583 case ISD::ConstantFP: 584 // Don't invert constant FP values after legalize. The negated constant 585 // isn't necessarily legal. 586 return LegalOperations ? 0 : 1; 587 case ISD::FADD: 588 // FIXME: determine better conditions for this xform. 589 if (!Options->UnsafeFPMath) return 0; 590 591 // After operation legalization, it might not be legal to create new FSUBs. 592 if (LegalOperations && 593 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 594 return 0; 595 596 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 597 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 598 Options, Depth + 1)) 599 return V; 600 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 601 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 602 Depth + 1); 603 case ISD::FSUB: 604 // We can't turn -(A-B) into B-A when we honor signed zeros. 605 if (!Options->UnsafeFPMath) return 0; 606 607 // fold (fneg (fsub A, B)) -> (fsub B, A) 608 return 1; 609 610 case ISD::FMUL: 611 case ISD::FDIV: 612 if (Options->HonorSignDependentRoundingFPMath()) return 0; 613 614 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 615 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 616 Options, Depth + 1)) 617 return V; 618 619 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 620 Depth + 1); 621 622 case ISD::FP_EXTEND: 623 case ISD::FP_ROUND: 624 case ISD::FSIN: 625 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 626 Depth + 1); 627 } 628 } 629 630 /// If isNegatibleForFree returns true, return the newly negated expression. 631 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 632 bool LegalOperations, unsigned Depth = 0) { 633 const TargetOptions &Options = DAG.getTarget().Options; 634 // fneg is removable even if it has multiple uses. 635 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 636 637 // Don't allow anything with multiple uses. 638 assert(Op.hasOneUse() && "Unknown reuse!"); 639 640 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 641 642 const SDNodeFlags *Flags = Op.getNode()->getFlags(); 643 644 switch (Op.getOpcode()) { 645 default: llvm_unreachable("Unknown code"); 646 case ISD::ConstantFP: { 647 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 648 V.changeSign(); 649 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 650 } 651 case ISD::FADD: 652 // FIXME: determine better conditions for this xform. 653 assert(Options.UnsafeFPMath); 654 655 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 656 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 657 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 658 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 659 GetNegatedExpression(Op.getOperand(0), DAG, 660 LegalOperations, Depth+1), 661 Op.getOperand(1), Flags); 662 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 663 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 664 GetNegatedExpression(Op.getOperand(1), DAG, 665 LegalOperations, Depth+1), 666 Op.getOperand(0), Flags); 667 case ISD::FSUB: 668 // We can't turn -(A-B) into B-A when we honor signed zeros. 669 assert(Options.UnsafeFPMath); 670 671 // fold (fneg (fsub 0, B)) -> B 672 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 673 if (N0CFP->isZero()) 674 return Op.getOperand(1); 675 676 // fold (fneg (fsub A, B)) -> (fsub B, A) 677 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 678 Op.getOperand(1), Op.getOperand(0), Flags); 679 680 case ISD::FMUL: 681 case ISD::FDIV: 682 assert(!Options.HonorSignDependentRoundingFPMath()); 683 684 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 685 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 686 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 687 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 688 GetNegatedExpression(Op.getOperand(0), DAG, 689 LegalOperations, Depth+1), 690 Op.getOperand(1), Flags); 691 692 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 693 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 694 Op.getOperand(0), 695 GetNegatedExpression(Op.getOperand(1), DAG, 696 LegalOperations, Depth+1), Flags); 697 698 case ISD::FP_EXTEND: 699 case ISD::FSIN: 700 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 701 GetNegatedExpression(Op.getOperand(0), DAG, 702 LegalOperations, Depth+1)); 703 case ISD::FP_ROUND: 704 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 705 GetNegatedExpression(Op.getOperand(0), DAG, 706 LegalOperations, Depth+1), 707 Op.getOperand(1)); 708 } 709 } 710 711 // Return true if this node is a setcc, or is a select_cc 712 // that selects between the target values used for true and false, making it 713 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 714 // the appropriate nodes based on the type of node we are checking. This 715 // simplifies life a bit for the callers. 716 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 717 SDValue &CC) const { 718 if (N.getOpcode() == ISD::SETCC) { 719 LHS = N.getOperand(0); 720 RHS = N.getOperand(1); 721 CC = N.getOperand(2); 722 return true; 723 } 724 725 if (N.getOpcode() != ISD::SELECT_CC || 726 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 727 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 728 return false; 729 730 if (TLI.getBooleanContents(N.getValueType()) == 731 TargetLowering::UndefinedBooleanContent) 732 return false; 733 734 LHS = N.getOperand(0); 735 RHS = N.getOperand(1); 736 CC = N.getOperand(4); 737 return true; 738 } 739 740 /// Return true if this is a SetCC-equivalent operation with only one use. 741 /// If this is true, it allows the users to invert the operation for free when 742 /// it is profitable to do so. 743 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 744 SDValue N0, N1, N2; 745 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 746 return true; 747 return false; 748 } 749 750 /// Returns true if N is a BUILD_VECTOR node whose 751 /// elements are all the same constant or undefined. 752 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 753 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 754 if (!C) 755 return false; 756 757 APInt SplatUndef; 758 unsigned SplatBitSize; 759 bool HasAnyUndefs; 760 EVT EltVT = N->getValueType(0).getVectorElementType(); 761 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 762 HasAnyUndefs) && 763 EltVT.getSizeInBits() >= SplatBitSize); 764 } 765 766 // \brief Returns the SDNode if it is a constant integer BuildVector 767 // or constant integer. 768 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { 769 if (isa<ConstantSDNode>(N)) 770 return N.getNode(); 771 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 772 return N.getNode(); 773 return nullptr; 774 } 775 776 // \brief Returns the SDNode if it is a constant float BuildVector 777 // or constant float. 778 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 779 if (isa<ConstantFPSDNode>(N)) 780 return N.getNode(); 781 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 782 return N.getNode(); 783 return nullptr; 784 } 785 786 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 787 // int. 788 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 789 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 790 return CN; 791 792 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 793 BitVector UndefElements; 794 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 795 796 // BuildVectors can truncate their operands. Ignore that case here. 797 // FIXME: We blindly ignore splats which include undef which is overly 798 // pessimistic. 799 if (CN && UndefElements.none() && 800 CN->getValueType(0) == N.getValueType().getScalarType()) 801 return CN; 802 } 803 804 return nullptr; 805 } 806 807 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 808 // float. 809 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 810 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 811 return CN; 812 813 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 814 BitVector UndefElements; 815 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 816 817 if (CN && UndefElements.none()) 818 return CN; 819 } 820 821 return nullptr; 822 } 823 824 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 825 SDValue N0, SDValue N1) { 826 EVT VT = N0.getValueType(); 827 if (N0.getOpcode() == Opc) { 828 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 829 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { 830 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 831 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 832 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 833 return SDValue(); 834 } 835 if (N0.hasOneUse()) { 836 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 837 // use 838 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 839 if (!OpNode.getNode()) 840 return SDValue(); 841 AddToWorklist(OpNode.getNode()); 842 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 843 } 844 } 845 } 846 847 if (N1.getOpcode() == Opc) { 848 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 849 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { 850 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 851 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 852 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 853 return SDValue(); 854 } 855 if (N1.hasOneUse()) { 856 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 857 // use 858 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 859 if (!OpNode.getNode()) 860 return SDValue(); 861 AddToWorklist(OpNode.getNode()); 862 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 863 } 864 } 865 } 866 867 return SDValue(); 868 } 869 870 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 871 bool AddTo) { 872 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 873 ++NodesCombined; 874 DEBUG(dbgs() << "\nReplacing.1 "; 875 N->dump(&DAG); 876 dbgs() << "\nWith: "; 877 To[0].getNode()->dump(&DAG); 878 dbgs() << " and " << NumTo-1 << " other values\n"); 879 for (unsigned i = 0, e = NumTo; i != e; ++i) 880 assert((!To[i].getNode() || 881 N->getValueType(i) == To[i].getValueType()) && 882 "Cannot combine value to value of different type!"); 883 884 WorklistRemover DeadNodes(*this); 885 DAG.ReplaceAllUsesWith(N, To); 886 if (AddTo) { 887 // Push the new nodes and any users onto the worklist 888 for (unsigned i = 0, e = NumTo; i != e; ++i) { 889 if (To[i].getNode()) { 890 AddToWorklist(To[i].getNode()); 891 AddUsersToWorklist(To[i].getNode()); 892 } 893 } 894 } 895 896 // Finally, if the node is now dead, remove it from the graph. The node 897 // may not be dead if the replacement process recursively simplified to 898 // something else needing this node. 899 if (N->use_empty()) 900 deleteAndRecombine(N); 901 return SDValue(N, 0); 902 } 903 904 void DAGCombiner:: 905 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 906 // Replace all uses. If any nodes become isomorphic to other nodes and 907 // are deleted, make sure to remove them from our worklist. 908 WorklistRemover DeadNodes(*this); 909 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 910 911 // Push the new node and any (possibly new) users onto the worklist. 912 AddToWorklist(TLO.New.getNode()); 913 AddUsersToWorklist(TLO.New.getNode()); 914 915 // Finally, if the node is now dead, remove it from the graph. The node 916 // may not be dead if the replacement process recursively simplified to 917 // something else needing this node. 918 if (TLO.Old.getNode()->use_empty()) 919 deleteAndRecombine(TLO.Old.getNode()); 920 } 921 922 /// Check the specified integer node value to see if it can be simplified or if 923 /// things it uses can be simplified by bit propagation. If so, return true. 924 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 925 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 926 APInt KnownZero, KnownOne; 927 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 928 return false; 929 930 // Revisit the node. 931 AddToWorklist(Op.getNode()); 932 933 // Replace the old value with the new one. 934 ++NodesCombined; 935 DEBUG(dbgs() << "\nReplacing.2 "; 936 TLO.Old.getNode()->dump(&DAG); 937 dbgs() << "\nWith: "; 938 TLO.New.getNode()->dump(&DAG); 939 dbgs() << '\n'); 940 941 CommitTargetLoweringOpt(TLO); 942 return true; 943 } 944 945 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 946 SDLoc dl(Load); 947 EVT VT = Load->getValueType(0); 948 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 949 950 DEBUG(dbgs() << "\nReplacing.9 "; 951 Load->dump(&DAG); 952 dbgs() << "\nWith: "; 953 Trunc.getNode()->dump(&DAG); 954 dbgs() << '\n'); 955 WorklistRemover DeadNodes(*this); 956 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 957 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 958 deleteAndRecombine(Load); 959 AddToWorklist(Trunc.getNode()); 960 } 961 962 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 963 Replace = false; 964 SDLoc dl(Op); 965 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 966 EVT MemVT = LD->getMemoryVT(); 967 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 968 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 969 : ISD::EXTLOAD) 970 : LD->getExtensionType(); 971 Replace = true; 972 return DAG.getExtLoad(ExtType, dl, PVT, 973 LD->getChain(), LD->getBasePtr(), 974 MemVT, LD->getMemOperand()); 975 } 976 977 unsigned Opc = Op.getOpcode(); 978 switch (Opc) { 979 default: break; 980 case ISD::AssertSext: 981 return DAG.getNode(ISD::AssertSext, dl, PVT, 982 SExtPromoteOperand(Op.getOperand(0), PVT), 983 Op.getOperand(1)); 984 case ISD::AssertZext: 985 return DAG.getNode(ISD::AssertZext, dl, PVT, 986 ZExtPromoteOperand(Op.getOperand(0), PVT), 987 Op.getOperand(1)); 988 case ISD::Constant: { 989 unsigned ExtOpc = 990 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 991 return DAG.getNode(ExtOpc, dl, PVT, Op); 992 } 993 } 994 995 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 996 return SDValue(); 997 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 998 } 999 1000 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 1001 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 1002 return SDValue(); 1003 EVT OldVT = Op.getValueType(); 1004 SDLoc dl(Op); 1005 bool Replace = false; 1006 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1007 if (!NewOp.getNode()) 1008 return SDValue(); 1009 AddToWorklist(NewOp.getNode()); 1010 1011 if (Replace) 1012 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1013 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 1014 DAG.getValueType(OldVT)); 1015 } 1016 1017 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1018 EVT OldVT = Op.getValueType(); 1019 SDLoc dl(Op); 1020 bool Replace = false; 1021 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1022 if (!NewOp.getNode()) 1023 return SDValue(); 1024 AddToWorklist(NewOp.getNode()); 1025 1026 if (Replace) 1027 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1028 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 1029 } 1030 1031 /// Promote the specified integer binary operation if the target indicates it is 1032 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1033 /// i32 since i16 instructions are longer. 1034 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1035 if (!LegalOperations) 1036 return SDValue(); 1037 1038 EVT VT = Op.getValueType(); 1039 if (VT.isVector() || !VT.isInteger()) 1040 return SDValue(); 1041 1042 // If operation type is 'undesirable', e.g. i16 on x86, consider 1043 // promoting it. 1044 unsigned Opc = Op.getOpcode(); 1045 if (TLI.isTypeDesirableForOp(Opc, VT)) 1046 return SDValue(); 1047 1048 EVT PVT = VT; 1049 // Consult target whether it is a good idea to promote this operation and 1050 // what's the right type to promote it to. 1051 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1052 assert(PVT != VT && "Don't know what type to promote to!"); 1053 1054 bool Replace0 = false; 1055 SDValue N0 = Op.getOperand(0); 1056 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1057 if (!NN0.getNode()) 1058 return SDValue(); 1059 1060 bool Replace1 = false; 1061 SDValue N1 = Op.getOperand(1); 1062 SDValue NN1; 1063 if (N0 == N1) 1064 NN1 = NN0; 1065 else { 1066 NN1 = PromoteOperand(N1, PVT, Replace1); 1067 if (!NN1.getNode()) 1068 return SDValue(); 1069 } 1070 1071 AddToWorklist(NN0.getNode()); 1072 if (NN1.getNode()) 1073 AddToWorklist(NN1.getNode()); 1074 1075 if (Replace0) 1076 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1077 if (Replace1) 1078 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1079 1080 DEBUG(dbgs() << "\nPromoting "; 1081 Op.getNode()->dump(&DAG)); 1082 SDLoc dl(Op); 1083 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1084 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1085 } 1086 return SDValue(); 1087 } 1088 1089 /// Promote the specified integer shift operation if the target indicates it is 1090 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1091 /// i32 since i16 instructions are longer. 1092 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1093 if (!LegalOperations) 1094 return SDValue(); 1095 1096 EVT VT = Op.getValueType(); 1097 if (VT.isVector() || !VT.isInteger()) 1098 return SDValue(); 1099 1100 // If operation type is 'undesirable', e.g. i16 on x86, consider 1101 // promoting it. 1102 unsigned Opc = Op.getOpcode(); 1103 if (TLI.isTypeDesirableForOp(Opc, VT)) 1104 return SDValue(); 1105 1106 EVT PVT = VT; 1107 // Consult target whether it is a good idea to promote this operation and 1108 // what's the right type to promote it to. 1109 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1110 assert(PVT != VT && "Don't know what type to promote to!"); 1111 1112 bool Replace = false; 1113 SDValue N0 = Op.getOperand(0); 1114 if (Opc == ISD::SRA) 1115 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1116 else if (Opc == ISD::SRL) 1117 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1118 else 1119 N0 = PromoteOperand(N0, PVT, Replace); 1120 if (!N0.getNode()) 1121 return SDValue(); 1122 1123 AddToWorklist(N0.getNode()); 1124 if (Replace) 1125 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1126 1127 DEBUG(dbgs() << "\nPromoting "; 1128 Op.getNode()->dump(&DAG)); 1129 SDLoc dl(Op); 1130 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1131 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1132 } 1133 return SDValue(); 1134 } 1135 1136 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1137 if (!LegalOperations) 1138 return SDValue(); 1139 1140 EVT VT = Op.getValueType(); 1141 if (VT.isVector() || !VT.isInteger()) 1142 return SDValue(); 1143 1144 // If operation type is 'undesirable', e.g. i16 on x86, consider 1145 // promoting it. 1146 unsigned Opc = Op.getOpcode(); 1147 if (TLI.isTypeDesirableForOp(Opc, VT)) 1148 return SDValue(); 1149 1150 EVT PVT = VT; 1151 // Consult target whether it is a good idea to promote this operation and 1152 // what's the right type to promote it to. 1153 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1154 assert(PVT != VT && "Don't know what type to promote to!"); 1155 // fold (aext (aext x)) -> (aext x) 1156 // fold (aext (zext x)) -> (zext x) 1157 // fold (aext (sext x)) -> (sext x) 1158 DEBUG(dbgs() << "\nPromoting "; 1159 Op.getNode()->dump(&DAG)); 1160 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1161 } 1162 return SDValue(); 1163 } 1164 1165 bool DAGCombiner::PromoteLoad(SDValue Op) { 1166 if (!LegalOperations) 1167 return false; 1168 1169 EVT VT = Op.getValueType(); 1170 if (VT.isVector() || !VT.isInteger()) 1171 return false; 1172 1173 // If operation type is 'undesirable', e.g. i16 on x86, consider 1174 // promoting it. 1175 unsigned Opc = Op.getOpcode(); 1176 if (TLI.isTypeDesirableForOp(Opc, VT)) 1177 return false; 1178 1179 EVT PVT = VT; 1180 // Consult target whether it is a good idea to promote this operation and 1181 // what's the right type to promote it to. 1182 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1183 assert(PVT != VT && "Don't know what type to promote to!"); 1184 1185 SDLoc dl(Op); 1186 SDNode *N = Op.getNode(); 1187 LoadSDNode *LD = cast<LoadSDNode>(N); 1188 EVT MemVT = LD->getMemoryVT(); 1189 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1190 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1191 : ISD::EXTLOAD) 1192 : LD->getExtensionType(); 1193 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1194 LD->getChain(), LD->getBasePtr(), 1195 MemVT, LD->getMemOperand()); 1196 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1197 1198 DEBUG(dbgs() << "\nPromoting "; 1199 N->dump(&DAG); 1200 dbgs() << "\nTo: "; 1201 Result.getNode()->dump(&DAG); 1202 dbgs() << '\n'); 1203 WorklistRemover DeadNodes(*this); 1204 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1205 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1206 deleteAndRecombine(N); 1207 AddToWorklist(Result.getNode()); 1208 return true; 1209 } 1210 return false; 1211 } 1212 1213 /// \brief Recursively delete a node which has no uses and any operands for 1214 /// which it is the only use. 1215 /// 1216 /// Note that this both deletes the nodes and removes them from the worklist. 1217 /// It also adds any nodes who have had a user deleted to the worklist as they 1218 /// may now have only one use and subject to other combines. 1219 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1220 if (!N->use_empty()) 1221 return false; 1222 1223 SmallSetVector<SDNode *, 16> Nodes; 1224 Nodes.insert(N); 1225 do { 1226 N = Nodes.pop_back_val(); 1227 if (!N) 1228 continue; 1229 1230 if (N->use_empty()) { 1231 for (const SDValue &ChildN : N->op_values()) 1232 Nodes.insert(ChildN.getNode()); 1233 1234 removeFromWorklist(N); 1235 DAG.DeleteNode(N); 1236 } else { 1237 AddToWorklist(N); 1238 } 1239 } while (!Nodes.empty()); 1240 return true; 1241 } 1242 1243 //===----------------------------------------------------------------------===// 1244 // Main DAG Combiner implementation 1245 //===----------------------------------------------------------------------===// 1246 1247 void DAGCombiner::Run(CombineLevel AtLevel) { 1248 // set the instance variables, so that the various visit routines may use it. 1249 Level = AtLevel; 1250 LegalOperations = Level >= AfterLegalizeVectorOps; 1251 LegalTypes = Level >= AfterLegalizeTypes; 1252 1253 // Add all the dag nodes to the worklist. 1254 for (SDNode &Node : DAG.allnodes()) 1255 AddToWorklist(&Node); 1256 1257 // Create a dummy node (which is not added to allnodes), that adds a reference 1258 // to the root node, preventing it from being deleted, and tracking any 1259 // changes of the root. 1260 HandleSDNode Dummy(DAG.getRoot()); 1261 1262 // while the worklist isn't empty, find a node and 1263 // try and combine it. 1264 while (!WorklistMap.empty()) { 1265 SDNode *N; 1266 // The Worklist holds the SDNodes in order, but it may contain null entries. 1267 do { 1268 N = Worklist.pop_back_val(); 1269 } while (!N); 1270 1271 bool GoodWorklistEntry = WorklistMap.erase(N); 1272 (void)GoodWorklistEntry; 1273 assert(GoodWorklistEntry && 1274 "Found a worklist entry without a corresponding map entry!"); 1275 1276 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1277 // N is deleted from the DAG, since they too may now be dead or may have a 1278 // reduced number of uses, allowing other xforms. 1279 if (recursivelyDeleteUnusedNodes(N)) 1280 continue; 1281 1282 WorklistRemover DeadNodes(*this); 1283 1284 // If this combine is running after legalizing the DAG, re-legalize any 1285 // nodes pulled off the worklist. 1286 if (Level == AfterLegalizeDAG) { 1287 SmallSetVector<SDNode *, 16> UpdatedNodes; 1288 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1289 1290 for (SDNode *LN : UpdatedNodes) { 1291 AddToWorklist(LN); 1292 AddUsersToWorklist(LN); 1293 } 1294 if (!NIsValid) 1295 continue; 1296 } 1297 1298 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1299 1300 // Add any operands of the new node which have not yet been combined to the 1301 // worklist as well. Because the worklist uniques things already, this 1302 // won't repeatedly process the same operand. 1303 CombinedNodes.insert(N); 1304 for (const SDValue &ChildN : N->op_values()) 1305 if (!CombinedNodes.count(ChildN.getNode())) 1306 AddToWorklist(ChildN.getNode()); 1307 1308 SDValue RV = combine(N); 1309 1310 if (!RV.getNode()) 1311 continue; 1312 1313 ++NodesCombined; 1314 1315 // If we get back the same node we passed in, rather than a new node or 1316 // zero, we know that the node must have defined multiple values and 1317 // CombineTo was used. Since CombineTo takes care of the worklist 1318 // mechanics for us, we have no work to do in this case. 1319 if (RV.getNode() == N) 1320 continue; 1321 1322 assert(N->getOpcode() != ISD::DELETED_NODE && 1323 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1324 "Node was deleted but visit returned new node!"); 1325 1326 DEBUG(dbgs() << " ... into: "; 1327 RV.getNode()->dump(&DAG)); 1328 1329 // Transfer debug value. 1330 DAG.TransferDbgValues(SDValue(N, 0), RV); 1331 if (N->getNumValues() == RV.getNode()->getNumValues()) 1332 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1333 else { 1334 assert(N->getValueType(0) == RV.getValueType() && 1335 N->getNumValues() == 1 && "Type mismatch"); 1336 SDValue OpV = RV; 1337 DAG.ReplaceAllUsesWith(N, &OpV); 1338 } 1339 1340 // Push the new node and any users onto the worklist 1341 AddToWorklist(RV.getNode()); 1342 AddUsersToWorklist(RV.getNode()); 1343 1344 // Finally, if the node is now dead, remove it from the graph. The node 1345 // may not be dead if the replacement process recursively simplified to 1346 // something else needing this node. This will also take care of adding any 1347 // operands which have lost a user to the worklist. 1348 recursivelyDeleteUnusedNodes(N); 1349 } 1350 1351 // If the root changed (e.g. it was a dead load, update the root). 1352 DAG.setRoot(Dummy.getValue()); 1353 DAG.RemoveDeadNodes(); 1354 } 1355 1356 SDValue DAGCombiner::visit(SDNode *N) { 1357 switch (N->getOpcode()) { 1358 default: break; 1359 case ISD::TokenFactor: return visitTokenFactor(N); 1360 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1361 case ISD::ADD: return visitADD(N); 1362 case ISD::SUB: return visitSUB(N); 1363 case ISD::ADDC: return visitADDC(N); 1364 case ISD::SUBC: return visitSUBC(N); 1365 case ISD::ADDE: return visitADDE(N); 1366 case ISD::SUBE: return visitSUBE(N); 1367 case ISD::MUL: return visitMUL(N); 1368 case ISD::SDIV: return visitSDIV(N); 1369 case ISD::UDIV: return visitUDIV(N); 1370 case ISD::SREM: 1371 case ISD::UREM: return visitREM(N); 1372 case ISD::MULHU: return visitMULHU(N); 1373 case ISD::MULHS: return visitMULHS(N); 1374 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1375 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1376 case ISD::SMULO: return visitSMULO(N); 1377 case ISD::UMULO: return visitUMULO(N); 1378 case ISD::SMIN: 1379 case ISD::SMAX: 1380 case ISD::UMIN: 1381 case ISD::UMAX: return visitIMINMAX(N); 1382 case ISD::AND: return visitAND(N); 1383 case ISD::OR: return visitOR(N); 1384 case ISD::XOR: return visitXOR(N); 1385 case ISD::SHL: return visitSHL(N); 1386 case ISD::SRA: return visitSRA(N); 1387 case ISD::SRL: return visitSRL(N); 1388 case ISD::ROTR: 1389 case ISD::ROTL: return visitRotate(N); 1390 case ISD::BSWAP: return visitBSWAP(N); 1391 case ISD::CTLZ: return visitCTLZ(N); 1392 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1393 case ISD::CTTZ: return visitCTTZ(N); 1394 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1395 case ISD::CTPOP: return visitCTPOP(N); 1396 case ISD::SELECT: return visitSELECT(N); 1397 case ISD::VSELECT: return visitVSELECT(N); 1398 case ISD::SELECT_CC: return visitSELECT_CC(N); 1399 case ISD::SETCC: return visitSETCC(N); 1400 case ISD::SETCCE: return visitSETCCE(N); 1401 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1402 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1403 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1404 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1405 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1406 case ISD::TRUNCATE: return visitTRUNCATE(N); 1407 case ISD::BITCAST: return visitBITCAST(N); 1408 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1409 case ISD::FADD: return visitFADD(N); 1410 case ISD::FSUB: return visitFSUB(N); 1411 case ISD::FMUL: return visitFMUL(N); 1412 case ISD::FMA: return visitFMA(N); 1413 case ISD::FDIV: return visitFDIV(N); 1414 case ISD::FREM: return visitFREM(N); 1415 case ISD::FSQRT: return visitFSQRT(N); 1416 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1417 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1418 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1419 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1420 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1421 case ISD::FP_ROUND: return visitFP_ROUND(N); 1422 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1423 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1424 case ISD::FNEG: return visitFNEG(N); 1425 case ISD::FABS: return visitFABS(N); 1426 case ISD::FFLOOR: return visitFFLOOR(N); 1427 case ISD::FMINNUM: return visitFMINNUM(N); 1428 case ISD::FMAXNUM: return visitFMAXNUM(N); 1429 case ISD::FCEIL: return visitFCEIL(N); 1430 case ISD::FTRUNC: return visitFTRUNC(N); 1431 case ISD::BRCOND: return visitBRCOND(N); 1432 case ISD::BR_CC: return visitBR_CC(N); 1433 case ISD::LOAD: return visitLOAD(N); 1434 case ISD::STORE: return visitSTORE(N); 1435 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1436 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1437 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1438 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1439 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1440 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1441 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1442 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1443 case ISD::MGATHER: return visitMGATHER(N); 1444 case ISD::MLOAD: return visitMLOAD(N); 1445 case ISD::MSCATTER: return visitMSCATTER(N); 1446 case ISD::MSTORE: return visitMSTORE(N); 1447 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1448 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1449 } 1450 return SDValue(); 1451 } 1452 1453 SDValue DAGCombiner::combine(SDNode *N) { 1454 SDValue RV = visit(N); 1455 1456 // If nothing happened, try a target-specific DAG combine. 1457 if (!RV.getNode()) { 1458 assert(N->getOpcode() != ISD::DELETED_NODE && 1459 "Node was deleted but visit returned NULL!"); 1460 1461 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1462 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1463 1464 // Expose the DAG combiner to the target combiner impls. 1465 TargetLowering::DAGCombinerInfo 1466 DagCombineInfo(DAG, Level, false, this); 1467 1468 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1469 } 1470 } 1471 1472 // If nothing happened still, try promoting the operation. 1473 if (!RV.getNode()) { 1474 switch (N->getOpcode()) { 1475 default: break; 1476 case ISD::ADD: 1477 case ISD::SUB: 1478 case ISD::MUL: 1479 case ISD::AND: 1480 case ISD::OR: 1481 case ISD::XOR: 1482 RV = PromoteIntBinOp(SDValue(N, 0)); 1483 break; 1484 case ISD::SHL: 1485 case ISD::SRA: 1486 case ISD::SRL: 1487 RV = PromoteIntShiftOp(SDValue(N, 0)); 1488 break; 1489 case ISD::SIGN_EXTEND: 1490 case ISD::ZERO_EXTEND: 1491 case ISD::ANY_EXTEND: 1492 RV = PromoteExtend(SDValue(N, 0)); 1493 break; 1494 case ISD::LOAD: 1495 if (PromoteLoad(SDValue(N, 0))) 1496 RV = SDValue(N, 0); 1497 break; 1498 } 1499 } 1500 1501 // If N is a commutative binary node, try commuting it to enable more 1502 // sdisel CSE. 1503 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1504 N->getNumValues() == 1) { 1505 SDValue N0 = N->getOperand(0); 1506 SDValue N1 = N->getOperand(1); 1507 1508 // Constant operands are canonicalized to RHS. 1509 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1510 SDValue Ops[] = {N1, N0}; 1511 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1512 N->getFlags()); 1513 if (CSENode) 1514 return SDValue(CSENode, 0); 1515 } 1516 } 1517 1518 return RV; 1519 } 1520 1521 /// Given a node, return its input chain if it has one, otherwise return a null 1522 /// sd operand. 1523 static SDValue getInputChainForNode(SDNode *N) { 1524 if (unsigned NumOps = N->getNumOperands()) { 1525 if (N->getOperand(0).getValueType() == MVT::Other) 1526 return N->getOperand(0); 1527 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1528 return N->getOperand(NumOps-1); 1529 for (unsigned i = 1; i < NumOps-1; ++i) 1530 if (N->getOperand(i).getValueType() == MVT::Other) 1531 return N->getOperand(i); 1532 } 1533 return SDValue(); 1534 } 1535 1536 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1537 // If N has two operands, where one has an input chain equal to the other, 1538 // the 'other' chain is redundant. 1539 if (N->getNumOperands() == 2) { 1540 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1541 return N->getOperand(0); 1542 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1543 return N->getOperand(1); 1544 } 1545 1546 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1547 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1548 SmallPtrSet<SDNode*, 16> SeenOps; 1549 bool Changed = false; // If we should replace this token factor. 1550 1551 // Start out with this token factor. 1552 TFs.push_back(N); 1553 1554 // Iterate through token factors. The TFs grows when new token factors are 1555 // encountered. 1556 for (unsigned i = 0; i < TFs.size(); ++i) { 1557 SDNode *TF = TFs[i]; 1558 1559 // Check each of the operands. 1560 for (const SDValue &Op : TF->op_values()) { 1561 1562 switch (Op.getOpcode()) { 1563 case ISD::EntryToken: 1564 // Entry tokens don't need to be added to the list. They are 1565 // redundant. 1566 Changed = true; 1567 break; 1568 1569 case ISD::TokenFactor: 1570 if (Op.hasOneUse() && 1571 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1572 // Queue up for processing. 1573 TFs.push_back(Op.getNode()); 1574 // Clean up in case the token factor is removed. 1575 AddToWorklist(Op.getNode()); 1576 Changed = true; 1577 break; 1578 } 1579 // Fall thru 1580 1581 default: 1582 // Only add if it isn't already in the list. 1583 if (SeenOps.insert(Op.getNode()).second) 1584 Ops.push_back(Op); 1585 else 1586 Changed = true; 1587 break; 1588 } 1589 } 1590 } 1591 1592 SDValue Result; 1593 1594 // If we've changed things around then replace token factor. 1595 if (Changed) { 1596 if (Ops.empty()) { 1597 // The entry token is the only possible outcome. 1598 Result = DAG.getEntryNode(); 1599 } else { 1600 // New and improved token factor. 1601 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1602 } 1603 1604 // Add users to worklist if AA is enabled, since it may introduce 1605 // a lot of new chained token factors while removing memory deps. 1606 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1607 : DAG.getSubtarget().useAA(); 1608 return CombineTo(N, Result, UseAA /*add to worklist*/); 1609 } 1610 1611 return Result; 1612 } 1613 1614 /// MERGE_VALUES can always be eliminated. 1615 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1616 WorklistRemover DeadNodes(*this); 1617 // Replacing results may cause a different MERGE_VALUES to suddenly 1618 // be CSE'd with N, and carry its uses with it. Iterate until no 1619 // uses remain, to ensure that the node can be safely deleted. 1620 // First add the users of this node to the work list so that they 1621 // can be tried again once they have new operands. 1622 AddUsersToWorklist(N); 1623 do { 1624 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1625 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1626 } while (!N->use_empty()); 1627 deleteAndRecombine(N); 1628 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1629 } 1630 1631 /// If \p N is a ContantSDNode with isOpaque() == false return it casted to a 1632 /// ContantSDNode pointer else nullptr. 1633 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1634 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1635 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1636 } 1637 1638 SDValue DAGCombiner::visitADD(SDNode *N) { 1639 SDValue N0 = N->getOperand(0); 1640 SDValue N1 = N->getOperand(1); 1641 EVT VT = N0.getValueType(); 1642 1643 // fold vector ops 1644 if (VT.isVector()) { 1645 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1646 return FoldedVOp; 1647 1648 // fold (add x, 0) -> x, vector edition 1649 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1650 return N0; 1651 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1652 return N1; 1653 } 1654 1655 // fold (add x, undef) -> undef 1656 if (N0.getOpcode() == ISD::UNDEF) 1657 return N0; 1658 if (N1.getOpcode() == ISD::UNDEF) 1659 return N1; 1660 // fold (add c1, c2) -> c1+c2 1661 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1662 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1663 if (N0C && N1C) 1664 return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, N0C, N1C); 1665 // canonicalize constant to RHS 1666 if (isConstantIntBuildVectorOrConstantInt(N0) && 1667 !isConstantIntBuildVectorOrConstantInt(N1)) 1668 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1669 // fold (add x, 0) -> x 1670 if (isNullConstant(N1)) 1671 return N0; 1672 // fold (add Sym, c) -> Sym+c 1673 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1674 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1675 GA->getOpcode() == ISD::GlobalAddress) 1676 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1677 GA->getOffset() + 1678 (uint64_t)N1C->getSExtValue()); 1679 // fold ((c1-A)+c2) -> (c1+c2)-A 1680 if (N1C && N0.getOpcode() == ISD::SUB) 1681 if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { 1682 SDLoc DL(N); 1683 return DAG.getNode(ISD::SUB, DL, VT, 1684 DAG.getConstant(N1C->getAPIntValue()+ 1685 N0C->getAPIntValue(), DL, VT), 1686 N0.getOperand(1)); 1687 } 1688 // reassociate add 1689 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1690 return RADD; 1691 // fold ((0-A) + B) -> B-A 1692 if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) 1693 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1694 // fold (A + (0-B)) -> A-B 1695 if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) 1696 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1697 // fold (A+(B-A)) -> B 1698 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1699 return N1.getOperand(0); 1700 // fold ((B-A)+A) -> B 1701 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1702 return N0.getOperand(0); 1703 // fold (A+(B-(A+C))) to (B-C) 1704 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1705 N0 == N1.getOperand(1).getOperand(0)) 1706 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1707 N1.getOperand(1).getOperand(1)); 1708 // fold (A+(B-(C+A))) to (B-C) 1709 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1710 N0 == N1.getOperand(1).getOperand(1)) 1711 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1712 N1.getOperand(1).getOperand(0)); 1713 // fold (A+((B-A)+or-C)) to (B+or-C) 1714 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1715 N1.getOperand(0).getOpcode() == ISD::SUB && 1716 N0 == N1.getOperand(0).getOperand(1)) 1717 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1718 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1719 1720 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1721 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1722 SDValue N00 = N0.getOperand(0); 1723 SDValue N01 = N0.getOperand(1); 1724 SDValue N10 = N1.getOperand(0); 1725 SDValue N11 = N1.getOperand(1); 1726 1727 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1728 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1729 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1730 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1731 } 1732 1733 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1734 return SDValue(N, 0); 1735 1736 // fold (a+b) -> (a|b) iff a and b share no bits. 1737 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 1738 VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) 1739 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1740 1741 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1742 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 1743 isNullConstant(N1.getOperand(0).getOperand(0))) 1744 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1745 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1746 N1.getOperand(0).getOperand(1), 1747 N1.getOperand(1))); 1748 if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && 1749 isNullConstant(N0.getOperand(0).getOperand(0))) 1750 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1751 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1752 N0.getOperand(0).getOperand(1), 1753 N0.getOperand(1))); 1754 1755 if (N1.getOpcode() == ISD::AND) { 1756 SDValue AndOp0 = N1.getOperand(0); 1757 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1758 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1759 1760 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1761 // and similar xforms where the inner op is either ~0 or 0. 1762 if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { 1763 SDLoc DL(N); 1764 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1765 } 1766 } 1767 1768 // add (sext i1), X -> sub X, (zext i1) 1769 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1770 N0.getOperand(0).getValueType() == MVT::i1 && 1771 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1772 SDLoc DL(N); 1773 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1774 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1775 } 1776 1777 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1778 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1779 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1780 if (TN->getVT() == MVT::i1) { 1781 SDLoc DL(N); 1782 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1783 DAG.getConstant(1, DL, VT)); 1784 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1785 } 1786 } 1787 1788 return SDValue(); 1789 } 1790 1791 SDValue DAGCombiner::visitADDC(SDNode *N) { 1792 SDValue N0 = N->getOperand(0); 1793 SDValue N1 = N->getOperand(1); 1794 EVT VT = N0.getValueType(); 1795 1796 // If the flag result is dead, turn this into an ADD. 1797 if (!N->hasAnyUseOfValue(1)) 1798 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1799 DAG.getNode(ISD::CARRY_FALSE, 1800 SDLoc(N), MVT::Glue)); 1801 1802 // canonicalize constant to RHS. 1803 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1804 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1805 if (N0C && !N1C) 1806 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1807 1808 // fold (addc x, 0) -> x + no carry out 1809 if (isNullConstant(N1)) 1810 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1811 SDLoc(N), MVT::Glue)); 1812 1813 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1814 APInt LHSZero, LHSOne; 1815 APInt RHSZero, RHSOne; 1816 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1817 1818 if (LHSZero.getBoolValue()) { 1819 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1820 1821 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1822 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1823 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1824 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1825 DAG.getNode(ISD::CARRY_FALSE, 1826 SDLoc(N), MVT::Glue)); 1827 } 1828 1829 return SDValue(); 1830 } 1831 1832 SDValue DAGCombiner::visitADDE(SDNode *N) { 1833 SDValue N0 = N->getOperand(0); 1834 SDValue N1 = N->getOperand(1); 1835 SDValue CarryIn = N->getOperand(2); 1836 1837 // canonicalize constant to RHS 1838 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1839 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1840 if (N0C && !N1C) 1841 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1842 N1, N0, CarryIn); 1843 1844 // fold (adde x, y, false) -> (addc x, y) 1845 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1846 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1847 1848 return SDValue(); 1849 } 1850 1851 // Since it may not be valid to emit a fold to zero for vector initializers 1852 // check if we can before folding. 1853 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1854 SelectionDAG &DAG, 1855 bool LegalOperations, bool LegalTypes) { 1856 if (!VT.isVector()) 1857 return DAG.getConstant(0, DL, VT); 1858 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1859 return DAG.getConstant(0, DL, VT); 1860 return SDValue(); 1861 } 1862 1863 SDValue DAGCombiner::visitSUB(SDNode *N) { 1864 SDValue N0 = N->getOperand(0); 1865 SDValue N1 = N->getOperand(1); 1866 EVT VT = N0.getValueType(); 1867 1868 // fold vector ops 1869 if (VT.isVector()) { 1870 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1871 return FoldedVOp; 1872 1873 // fold (sub x, 0) -> x, vector edition 1874 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1875 return N0; 1876 } 1877 1878 // fold (sub x, x) -> 0 1879 // FIXME: Refactor this and xor and other similar operations together. 1880 if (N0 == N1) 1881 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1882 // fold (sub c1, c2) -> c1-c2 1883 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1884 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1885 if (N0C && N1C) 1886 return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, N0C, N1C); 1887 // fold (sub x, c) -> (add x, -c) 1888 if (N1C) { 1889 SDLoc DL(N); 1890 return DAG.getNode(ISD::ADD, DL, VT, N0, 1891 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 1892 } 1893 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1894 if (isAllOnesConstant(N0)) 1895 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1896 // fold A-(A-B) -> B 1897 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1898 return N1.getOperand(1); 1899 // fold (A+B)-A -> B 1900 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1901 return N0.getOperand(1); 1902 // fold (A+B)-B -> A 1903 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1904 return N0.getOperand(0); 1905 // fold C2-(A+C1) -> (C2-C1)-A 1906 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1907 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1908 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1909 SDLoc DL(N); 1910 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1911 DL, VT); 1912 return DAG.getNode(ISD::SUB, DL, VT, NewC, 1913 N1.getOperand(0)); 1914 } 1915 // fold ((A+(B+or-C))-B) -> A+or-C 1916 if (N0.getOpcode() == ISD::ADD && 1917 (N0.getOperand(1).getOpcode() == ISD::SUB || 1918 N0.getOperand(1).getOpcode() == ISD::ADD) && 1919 N0.getOperand(1).getOperand(0) == N1) 1920 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1921 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1922 // fold ((A+(C+B))-B) -> A+C 1923 if (N0.getOpcode() == ISD::ADD && 1924 N0.getOperand(1).getOpcode() == ISD::ADD && 1925 N0.getOperand(1).getOperand(1) == N1) 1926 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1927 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1928 // fold ((A-(B-C))-C) -> A-B 1929 if (N0.getOpcode() == ISD::SUB && 1930 N0.getOperand(1).getOpcode() == ISD::SUB && 1931 N0.getOperand(1).getOperand(1) == N1) 1932 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1933 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1934 1935 // If either operand of a sub is undef, the result is undef 1936 if (N0.getOpcode() == ISD::UNDEF) 1937 return N0; 1938 if (N1.getOpcode() == ISD::UNDEF) 1939 return N1; 1940 1941 // If the relocation model supports it, consider symbol offsets. 1942 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1943 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1944 // fold (sub Sym, c) -> Sym-c 1945 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1946 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1947 GA->getOffset() - 1948 (uint64_t)N1C->getSExtValue()); 1949 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1950 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1951 if (GA->getGlobal() == GB->getGlobal()) 1952 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1953 SDLoc(N), VT); 1954 } 1955 1956 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1957 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1958 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1959 if (TN->getVT() == MVT::i1) { 1960 SDLoc DL(N); 1961 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1962 DAG.getConstant(1, DL, VT)); 1963 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1964 } 1965 } 1966 1967 return SDValue(); 1968 } 1969 1970 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1971 SDValue N0 = N->getOperand(0); 1972 SDValue N1 = N->getOperand(1); 1973 EVT VT = N0.getValueType(); 1974 SDLoc DL(N); 1975 1976 // If the flag result is dead, turn this into an SUB. 1977 if (!N->hasAnyUseOfValue(1)) 1978 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 1979 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1980 1981 // fold (subc x, x) -> 0 + no borrow 1982 if (N0 == N1) 1983 return CombineTo(N, DAG.getConstant(0, DL, VT), 1984 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1985 1986 // fold (subc x, 0) -> x + no borrow 1987 if (isNullConstant(N1)) 1988 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1989 1990 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1991 if (isAllOnesConstant(N0)) 1992 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 1993 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1994 1995 return SDValue(); 1996 } 1997 1998 SDValue DAGCombiner::visitSUBE(SDNode *N) { 1999 SDValue N0 = N->getOperand(0); 2000 SDValue N1 = N->getOperand(1); 2001 SDValue CarryIn = N->getOperand(2); 2002 2003 // fold (sube x, y, false) -> (subc x, y) 2004 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 2005 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 2006 2007 return SDValue(); 2008 } 2009 2010 SDValue DAGCombiner::visitMUL(SDNode *N) { 2011 SDValue N0 = N->getOperand(0); 2012 SDValue N1 = N->getOperand(1); 2013 EVT VT = N0.getValueType(); 2014 2015 // fold (mul x, undef) -> 0 2016 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2017 return DAG.getConstant(0, SDLoc(N), VT); 2018 2019 bool N0IsConst = false; 2020 bool N1IsConst = false; 2021 bool N1IsOpaqueConst = false; 2022 bool N0IsOpaqueConst = false; 2023 APInt ConstValue0, ConstValue1; 2024 // fold vector ops 2025 if (VT.isVector()) { 2026 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2027 return FoldedVOp; 2028 2029 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 2030 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 2031 } else { 2032 N0IsConst = isa<ConstantSDNode>(N0); 2033 if (N0IsConst) { 2034 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2035 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2036 } 2037 N1IsConst = isa<ConstantSDNode>(N1); 2038 if (N1IsConst) { 2039 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2040 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2041 } 2042 } 2043 2044 // fold (mul c1, c2) -> c1*c2 2045 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2046 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2047 N0.getNode(), N1.getNode()); 2048 2049 // canonicalize constant to RHS (vector doesn't have to splat) 2050 if (isConstantIntBuildVectorOrConstantInt(N0) && 2051 !isConstantIntBuildVectorOrConstantInt(N1)) 2052 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2053 // fold (mul x, 0) -> 0 2054 if (N1IsConst && ConstValue1 == 0) 2055 return N1; 2056 // We require a splat of the entire scalar bit width for non-contiguous 2057 // bit patterns. 2058 bool IsFullSplat = 2059 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2060 // fold (mul x, 1) -> x 2061 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2062 return N0; 2063 // fold (mul x, -1) -> 0-x 2064 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2065 SDLoc DL(N); 2066 return DAG.getNode(ISD::SUB, DL, VT, 2067 DAG.getConstant(0, DL, VT), N0); 2068 } 2069 // fold (mul x, (1 << c)) -> x << c 2070 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && 2071 IsFullSplat) { 2072 SDLoc DL(N); 2073 return DAG.getNode(ISD::SHL, DL, VT, N0, 2074 DAG.getConstant(ConstValue1.logBase2(), DL, 2075 getShiftAmountTy(N0.getValueType()))); 2076 } 2077 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2078 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && 2079 IsFullSplat) { 2080 unsigned Log2Val = (-ConstValue1).logBase2(); 2081 SDLoc DL(N); 2082 // FIXME: If the input is something that is easily negated (e.g. a 2083 // single-use add), we should put the negate there. 2084 return DAG.getNode(ISD::SUB, DL, VT, 2085 DAG.getConstant(0, DL, VT), 2086 DAG.getNode(ISD::SHL, DL, VT, N0, 2087 DAG.getConstant(Log2Val, DL, 2088 getShiftAmountTy(N0.getValueType())))); 2089 } 2090 2091 APInt Val; 2092 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2093 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2094 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2095 isa<ConstantSDNode>(N0.getOperand(1)))) { 2096 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2097 N1, N0.getOperand(1)); 2098 AddToWorklist(C3.getNode()); 2099 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2100 N0.getOperand(0), C3); 2101 } 2102 2103 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2104 // use. 2105 { 2106 SDValue Sh(nullptr,0), Y(nullptr,0); 2107 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2108 if (N0.getOpcode() == ISD::SHL && 2109 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2110 isa<ConstantSDNode>(N0.getOperand(1))) && 2111 N0.getNode()->hasOneUse()) { 2112 Sh = N0; Y = N1; 2113 } else if (N1.getOpcode() == ISD::SHL && 2114 isa<ConstantSDNode>(N1.getOperand(1)) && 2115 N1.getNode()->hasOneUse()) { 2116 Sh = N1; Y = N0; 2117 } 2118 2119 if (Sh.getNode()) { 2120 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2121 Sh.getOperand(0), Y); 2122 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2123 Mul, Sh.getOperand(1)); 2124 } 2125 } 2126 2127 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2128 if (isConstantIntBuildVectorOrConstantInt(N1) && 2129 N0.getOpcode() == ISD::ADD && 2130 isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2131 isMulAddWithConstProfitable(N, N0, N1)) 2132 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2133 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2134 N0.getOperand(0), N1), 2135 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2136 N0.getOperand(1), N1)); 2137 2138 // reassociate mul 2139 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2140 return RMUL; 2141 2142 return SDValue(); 2143 } 2144 2145 /// Return true if divmod libcall is available. 2146 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2147 const TargetLowering &TLI) { 2148 RTLIB::Libcall LC; 2149 switch (Node->getSimpleValueType(0).SimpleTy) { 2150 default: return false; // No libcall for vector types. 2151 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2152 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2153 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2154 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2155 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 2156 } 2157 2158 return TLI.getLibcallName(LC) != nullptr; 2159 } 2160 2161 /// Issue divrem if both quotient and remainder are needed. 2162 SDValue DAGCombiner::useDivRem(SDNode *Node) { 2163 if (Node->use_empty()) 2164 return SDValue(); // This is a dead node, leave it alone. 2165 2166 EVT VT = Node->getValueType(0); 2167 if (!TLI.isTypeLegal(VT)) 2168 return SDValue(); 2169 2170 unsigned Opcode = Node->getOpcode(); 2171 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 2172 2173 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 2174 // If DIVREM is going to get expanded into a libcall, 2175 // but there is no libcall available, then don't combine. 2176 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 2177 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 2178 return SDValue(); 2179 2180 // If div is legal, it's better to do the normal expansion 2181 unsigned OtherOpcode = 0; 2182 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 2183 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 2184 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 2185 return SDValue(); 2186 } else { 2187 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2188 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 2189 return SDValue(); 2190 } 2191 2192 SDValue Op0 = Node->getOperand(0); 2193 SDValue Op1 = Node->getOperand(1); 2194 SDValue combined; 2195 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 2196 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 2197 SDNode *User = *UI; 2198 if (User == Node || User->use_empty()) 2199 continue; 2200 // Convert the other matching node(s), too; 2201 // otherwise, the DIVREM may get target-legalized into something 2202 // target-specific that we won't be able to recognize. 2203 unsigned UserOpc = User->getOpcode(); 2204 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 2205 User->getOperand(0) == Op0 && 2206 User->getOperand(1) == Op1) { 2207 if (!combined) { 2208 if (UserOpc == OtherOpcode) { 2209 SDVTList VTs = DAG.getVTList(VT, VT); 2210 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 2211 } else if (UserOpc == DivRemOpc) { 2212 combined = SDValue(User, 0); 2213 } else { 2214 assert(UserOpc == Opcode); 2215 continue; 2216 } 2217 } 2218 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 2219 CombineTo(User, combined); 2220 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 2221 CombineTo(User, combined.getValue(1)); 2222 } 2223 } 2224 return combined; 2225 } 2226 2227 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2228 SDValue N0 = N->getOperand(0); 2229 SDValue N1 = N->getOperand(1); 2230 EVT VT = N->getValueType(0); 2231 2232 // fold vector ops 2233 if (VT.isVector()) 2234 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2235 return FoldedVOp; 2236 2237 SDLoc DL(N); 2238 2239 // fold (sdiv c1, c2) -> c1/c2 2240 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2241 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2242 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 2243 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 2244 // fold (sdiv X, 1) -> X 2245 if (N1C && N1C->isOne()) 2246 return N0; 2247 // fold (sdiv X, -1) -> 0-X 2248 if (N1C && N1C->isAllOnesValue()) 2249 return DAG.getNode(ISD::SUB, DL, VT, 2250 DAG.getConstant(0, DL, VT), N0); 2251 2252 // If we know the sign bits of both operands are zero, strength reduce to a 2253 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2254 if (!VT.isVector()) { 2255 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2256 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 2257 } 2258 2259 // fold (sdiv X, pow2) -> simple ops after legalize 2260 // FIXME: We check for the exact bit here because the generic lowering gives 2261 // better results in that case. The target-specific lowering should learn how 2262 // to handle exact sdivs efficiently. 2263 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2264 !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && 2265 (N1C->getAPIntValue().isPowerOf2() || 2266 (-N1C->getAPIntValue()).isPowerOf2())) { 2267 // Target-specific implementation of sdiv x, pow2. 2268 if (SDValue Res = BuildSDIVPow2(N)) 2269 return Res; 2270 2271 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2272 2273 // Splat the sign bit into the register 2274 SDValue SGN = 2275 DAG.getNode(ISD::SRA, DL, VT, N0, 2276 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, 2277 getShiftAmountTy(N0.getValueType()))); 2278 AddToWorklist(SGN.getNode()); 2279 2280 // Add (N0 < 0) ? abs2 - 1 : 0; 2281 SDValue SRL = 2282 DAG.getNode(ISD::SRL, DL, VT, SGN, 2283 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, 2284 getShiftAmountTy(SGN.getValueType()))); 2285 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); 2286 AddToWorklist(SRL.getNode()); 2287 AddToWorklist(ADD.getNode()); // Divide by pow2 2288 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, 2289 DAG.getConstant(lg2, DL, 2290 getShiftAmountTy(ADD.getValueType()))); 2291 2292 // If we're dividing by a positive value, we're done. Otherwise, we must 2293 // negate the result. 2294 if (N1C->getAPIntValue().isNonNegative()) 2295 return SRA; 2296 2297 AddToWorklist(SRA.getNode()); 2298 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); 2299 } 2300 2301 // If integer divide is expensive and we satisfy the requirements, emit an 2302 // alternate sequence. Targets may check function attributes for size/speed 2303 // trade-offs. 2304 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2305 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2306 if (SDValue Op = BuildSDIV(N)) 2307 return Op; 2308 2309 // sdiv, srem -> sdivrem 2310 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2311 // Otherwise, we break the simplification logic in visitREM(). 2312 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2313 if (SDValue DivRem = useDivRem(N)) 2314 return DivRem; 2315 2316 // undef / X -> 0 2317 if (N0.getOpcode() == ISD::UNDEF) 2318 return DAG.getConstant(0, DL, VT); 2319 // X / undef -> undef 2320 if (N1.getOpcode() == ISD::UNDEF) 2321 return N1; 2322 2323 return SDValue(); 2324 } 2325 2326 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2327 SDValue N0 = N->getOperand(0); 2328 SDValue N1 = N->getOperand(1); 2329 EVT VT = N->getValueType(0); 2330 2331 // fold vector ops 2332 if (VT.isVector()) 2333 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2334 return FoldedVOp; 2335 2336 SDLoc DL(N); 2337 2338 // fold (udiv c1, c2) -> c1/c2 2339 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2340 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2341 if (N0C && N1C) 2342 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 2343 N0C, N1C)) 2344 return Folded; 2345 // fold (udiv x, (1 << c)) -> x >>u c 2346 if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) 2347 return DAG.getNode(ISD::SRL, DL, VT, N0, 2348 DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, 2349 getShiftAmountTy(N0.getValueType()))); 2350 2351 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2352 if (N1.getOpcode() == ISD::SHL) { 2353 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2354 if (SHC->getAPIntValue().isPowerOf2()) { 2355 EVT ADDVT = N1.getOperand(1).getValueType(); 2356 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, 2357 N1.getOperand(1), 2358 DAG.getConstant(SHC->getAPIntValue() 2359 .logBase2(), 2360 DL, ADDVT)); 2361 AddToWorklist(Add.getNode()); 2362 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 2363 } 2364 } 2365 } 2366 2367 // fold (udiv x, c) -> alternate 2368 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2369 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2370 if (SDValue Op = BuildUDIV(N)) 2371 return Op; 2372 2373 // sdiv, srem -> sdivrem 2374 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2375 // Otherwise, we break the simplification logic in visitREM(). 2376 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2377 if (SDValue DivRem = useDivRem(N)) 2378 return DivRem; 2379 2380 // undef / X -> 0 2381 if (N0.getOpcode() == ISD::UNDEF) 2382 return DAG.getConstant(0, DL, VT); 2383 // X / undef -> undef 2384 if (N1.getOpcode() == ISD::UNDEF) 2385 return N1; 2386 2387 return SDValue(); 2388 } 2389 2390 // handles ISD::SREM and ISD::UREM 2391 SDValue DAGCombiner::visitREM(SDNode *N) { 2392 unsigned Opcode = N->getOpcode(); 2393 SDValue N0 = N->getOperand(0); 2394 SDValue N1 = N->getOperand(1); 2395 EVT VT = N->getValueType(0); 2396 bool isSigned = (Opcode == ISD::SREM); 2397 SDLoc DL(N); 2398 2399 // fold (rem c1, c2) -> c1%c2 2400 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2401 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2402 if (N0C && N1C) 2403 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 2404 return Folded; 2405 2406 if (isSigned) { 2407 // If we know the sign bits of both operands are zero, strength reduce to a 2408 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2409 if (!VT.isVector()) { 2410 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2411 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 2412 } 2413 } else { 2414 // fold (urem x, pow2) -> (and x, pow2-1) 2415 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2416 N1C->getAPIntValue().isPowerOf2()) { 2417 return DAG.getNode(ISD::AND, DL, VT, N0, 2418 DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); 2419 } 2420 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2421 if (N1.getOpcode() == ISD::SHL) { 2422 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2423 if (SHC->getAPIntValue().isPowerOf2()) { 2424 SDValue Add = 2425 DAG.getNode(ISD::ADD, DL, VT, N1, 2426 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), DL, 2427 VT)); 2428 AddToWorklist(Add.getNode()); 2429 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 2430 } 2431 } 2432 } 2433 } 2434 2435 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2436 2437 // If X/C can be simplified by the division-by-constant logic, lower 2438 // X%C to the equivalent of X-X/C*C. 2439 // To avoid mangling nodes, this simplification requires that the combine() 2440 // call for the speculative DIV must not cause a DIVREM conversion. We guard 2441 // against this by skipping the simplification if isIntDivCheap(). When 2442 // div is not cheap, combine will not return a DIVREM. Regardless, 2443 // checking cheapness here makes sense since the simplification results in 2444 // fatter code. 2445 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { 2446 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2447 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); 2448 AddToWorklist(Div.getNode()); 2449 SDValue OptimizedDiv = combine(Div.getNode()); 2450 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2451 assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && 2452 (OptimizedDiv.getOpcode() != ISD::SDIVREM)); 2453 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 2454 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 2455 AddToWorklist(Mul.getNode()); 2456 return Sub; 2457 } 2458 } 2459 2460 // sdiv, srem -> sdivrem 2461 if (SDValue DivRem = useDivRem(N)) 2462 return DivRem.getValue(1); 2463 2464 // undef % X -> 0 2465 if (N0.getOpcode() == ISD::UNDEF) 2466 return DAG.getConstant(0, DL, VT); 2467 // X % undef -> undef 2468 if (N1.getOpcode() == ISD::UNDEF) 2469 return N1; 2470 2471 return SDValue(); 2472 } 2473 2474 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2475 SDValue N0 = N->getOperand(0); 2476 SDValue N1 = N->getOperand(1); 2477 EVT VT = N->getValueType(0); 2478 SDLoc DL(N); 2479 2480 // fold (mulhs x, 0) -> 0 2481 if (isNullConstant(N1)) 2482 return N1; 2483 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2484 if (isOneConstant(N1)) { 2485 SDLoc DL(N); 2486 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 2487 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2488 DL, 2489 getShiftAmountTy(N0.getValueType()))); 2490 } 2491 // fold (mulhs x, undef) -> 0 2492 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2493 return DAG.getConstant(0, SDLoc(N), VT); 2494 2495 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2496 // plus a shift. 2497 if (VT.isSimple() && !VT.isVector()) { 2498 MVT Simple = VT.getSimpleVT(); 2499 unsigned SimpleSize = Simple.getSizeInBits(); 2500 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2501 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2502 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2503 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2504 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2505 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2506 DAG.getConstant(SimpleSize, DL, 2507 getShiftAmountTy(N1.getValueType()))); 2508 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2509 } 2510 } 2511 2512 return SDValue(); 2513 } 2514 2515 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2516 SDValue N0 = N->getOperand(0); 2517 SDValue N1 = N->getOperand(1); 2518 EVT VT = N->getValueType(0); 2519 SDLoc DL(N); 2520 2521 // fold (mulhu x, 0) -> 0 2522 if (isNullConstant(N1)) 2523 return N1; 2524 // fold (mulhu x, 1) -> 0 2525 if (isOneConstant(N1)) 2526 return DAG.getConstant(0, DL, N0.getValueType()); 2527 // fold (mulhu x, undef) -> 0 2528 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2529 return DAG.getConstant(0, DL, VT); 2530 2531 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2532 // plus a shift. 2533 if (VT.isSimple() && !VT.isVector()) { 2534 MVT Simple = VT.getSimpleVT(); 2535 unsigned SimpleSize = Simple.getSizeInBits(); 2536 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2537 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2538 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2539 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2540 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2541 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2542 DAG.getConstant(SimpleSize, DL, 2543 getShiftAmountTy(N1.getValueType()))); 2544 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2545 } 2546 } 2547 2548 return SDValue(); 2549 } 2550 2551 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2552 /// give the opcodes for the two computations that are being performed. Return 2553 /// true if a simplification was made. 2554 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2555 unsigned HiOp) { 2556 // If the high half is not needed, just compute the low half. 2557 bool HiExists = N->hasAnyUseOfValue(1); 2558 if (!HiExists && 2559 (!LegalOperations || 2560 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2561 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2562 return CombineTo(N, Res, Res); 2563 } 2564 2565 // If the low half is not needed, just compute the high half. 2566 bool LoExists = N->hasAnyUseOfValue(0); 2567 if (!LoExists && 2568 (!LegalOperations || 2569 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2570 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2571 return CombineTo(N, Res, Res); 2572 } 2573 2574 // If both halves are used, return as it is. 2575 if (LoExists && HiExists) 2576 return SDValue(); 2577 2578 // If the two computed results can be simplified separately, separate them. 2579 if (LoExists) { 2580 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2581 AddToWorklist(Lo.getNode()); 2582 SDValue LoOpt = combine(Lo.getNode()); 2583 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2584 (!LegalOperations || 2585 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2586 return CombineTo(N, LoOpt, LoOpt); 2587 } 2588 2589 if (HiExists) { 2590 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2591 AddToWorklist(Hi.getNode()); 2592 SDValue HiOpt = combine(Hi.getNode()); 2593 if (HiOpt.getNode() && HiOpt != Hi && 2594 (!LegalOperations || 2595 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2596 return CombineTo(N, HiOpt, HiOpt); 2597 } 2598 2599 return SDValue(); 2600 } 2601 2602 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2603 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 2604 return Res; 2605 2606 EVT VT = N->getValueType(0); 2607 SDLoc DL(N); 2608 2609 // If the type is twice as wide is legal, transform the mulhu to a wider 2610 // multiply plus a shift. 2611 if (VT.isSimple() && !VT.isVector()) { 2612 MVT Simple = VT.getSimpleVT(); 2613 unsigned SimpleSize = Simple.getSizeInBits(); 2614 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2615 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2616 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2617 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2618 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2619 // Compute the high part as N1. 2620 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2621 DAG.getConstant(SimpleSize, DL, 2622 getShiftAmountTy(Lo.getValueType()))); 2623 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2624 // Compute the low part as N0. 2625 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2626 return CombineTo(N, Lo, Hi); 2627 } 2628 } 2629 2630 return SDValue(); 2631 } 2632 2633 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2634 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 2635 return Res; 2636 2637 EVT VT = N->getValueType(0); 2638 SDLoc DL(N); 2639 2640 // If the type is twice as wide is legal, transform the mulhu to a wider 2641 // multiply plus a shift. 2642 if (VT.isSimple() && !VT.isVector()) { 2643 MVT Simple = VT.getSimpleVT(); 2644 unsigned SimpleSize = Simple.getSizeInBits(); 2645 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2646 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2647 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2648 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2649 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2650 // Compute the high part as N1. 2651 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2652 DAG.getConstant(SimpleSize, DL, 2653 getShiftAmountTy(Lo.getValueType()))); 2654 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2655 // Compute the low part as N0. 2656 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2657 return CombineTo(N, Lo, Hi); 2658 } 2659 } 2660 2661 return SDValue(); 2662 } 2663 2664 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2665 // (smulo x, 2) -> (saddo x, x) 2666 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2667 if (C2->getAPIntValue() == 2) 2668 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2669 N->getOperand(0), N->getOperand(0)); 2670 2671 return SDValue(); 2672 } 2673 2674 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2675 // (umulo x, 2) -> (uaddo x, x) 2676 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2677 if (C2->getAPIntValue() == 2) 2678 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2679 N->getOperand(0), N->getOperand(0)); 2680 2681 return SDValue(); 2682 } 2683 2684 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 2685 SDValue N0 = N->getOperand(0); 2686 SDValue N1 = N->getOperand(1); 2687 EVT VT = N0.getValueType(); 2688 2689 // fold vector ops 2690 if (VT.isVector()) 2691 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2692 return FoldedVOp; 2693 2694 // fold (add c1, c2) -> c1+c2 2695 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 2696 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2697 if (N0C && N1C) 2698 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 2699 2700 // canonicalize constant to RHS 2701 if (isConstantIntBuildVectorOrConstantInt(N0) && 2702 !isConstantIntBuildVectorOrConstantInt(N1)) 2703 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 2704 2705 return SDValue(); 2706 } 2707 2708 /// If this is a binary operator with two operands of the same opcode, try to 2709 /// simplify it. 2710 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2711 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2712 EVT VT = N0.getValueType(); 2713 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2714 2715 // Bail early if none of these transforms apply. 2716 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2717 2718 // For each of OP in AND/OR/XOR: 2719 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2720 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2721 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2722 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2723 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2724 // 2725 // do not sink logical op inside of a vector extend, since it may combine 2726 // into a vsetcc. 2727 EVT Op0VT = N0.getOperand(0).getValueType(); 2728 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2729 N0.getOpcode() == ISD::SIGN_EXTEND || 2730 N0.getOpcode() == ISD::BSWAP || 2731 // Avoid infinite looping with PromoteIntBinOp. 2732 (N0.getOpcode() == ISD::ANY_EXTEND && 2733 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2734 (N0.getOpcode() == ISD::TRUNCATE && 2735 (!TLI.isZExtFree(VT, Op0VT) || 2736 !TLI.isTruncateFree(Op0VT, VT)) && 2737 TLI.isTypeLegal(Op0VT))) && 2738 !VT.isVector() && 2739 Op0VT == N1.getOperand(0).getValueType() && 2740 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2741 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2742 N0.getOperand(0).getValueType(), 2743 N0.getOperand(0), N1.getOperand(0)); 2744 AddToWorklist(ORNode.getNode()); 2745 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2746 } 2747 2748 // For each of OP in SHL/SRL/SRA/AND... 2749 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2750 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2751 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2752 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2753 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2754 N0.getOperand(1) == N1.getOperand(1)) { 2755 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2756 N0.getOperand(0).getValueType(), 2757 N0.getOperand(0), N1.getOperand(0)); 2758 AddToWorklist(ORNode.getNode()); 2759 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2760 ORNode, N0.getOperand(1)); 2761 } 2762 2763 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2764 // Only perform this optimization after type legalization and before 2765 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2766 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2767 // we don't want to undo this promotion. 2768 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2769 // on scalars. 2770 if ((N0.getOpcode() == ISD::BITCAST || 2771 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2772 Level == AfterLegalizeTypes) { 2773 SDValue In0 = N0.getOperand(0); 2774 SDValue In1 = N1.getOperand(0); 2775 EVT In0Ty = In0.getValueType(); 2776 EVT In1Ty = In1.getValueType(); 2777 SDLoc DL(N); 2778 // If both incoming values are integers, and the original types are the 2779 // same. 2780 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2781 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2782 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2783 AddToWorklist(Op.getNode()); 2784 return BC; 2785 } 2786 } 2787 2788 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2789 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2790 // If both shuffles use the same mask, and both shuffle within a single 2791 // vector, then it is worthwhile to move the swizzle after the operation. 2792 // The type-legalizer generates this pattern when loading illegal 2793 // vector types from memory. In many cases this allows additional shuffle 2794 // optimizations. 2795 // There are other cases where moving the shuffle after the xor/and/or 2796 // is profitable even if shuffles don't perform a swizzle. 2797 // If both shuffles use the same mask, and both shuffles have the same first 2798 // or second operand, then it might still be profitable to move the shuffle 2799 // after the xor/and/or operation. 2800 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2801 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2802 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2803 2804 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2805 "Inputs to shuffles are not the same type"); 2806 2807 // Check that both shuffles use the same mask. The masks are known to be of 2808 // the same length because the result vector type is the same. 2809 // Check also that shuffles have only one use to avoid introducing extra 2810 // instructions. 2811 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2812 SVN0->getMask().equals(SVN1->getMask())) { 2813 SDValue ShOp = N0->getOperand(1); 2814 2815 // Don't try to fold this node if it requires introducing a 2816 // build vector of all zeros that might be illegal at this stage. 2817 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2818 if (!LegalTypes) 2819 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2820 else 2821 ShOp = SDValue(); 2822 } 2823 2824 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2825 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2826 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2827 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2828 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2829 N0->getOperand(0), N1->getOperand(0)); 2830 AddToWorklist(NewNode.getNode()); 2831 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2832 &SVN0->getMask()[0]); 2833 } 2834 2835 // Don't try to fold this node if it requires introducing a 2836 // build vector of all zeros that might be illegal at this stage. 2837 ShOp = N0->getOperand(0); 2838 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2839 if (!LegalTypes) 2840 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2841 else 2842 ShOp = SDValue(); 2843 } 2844 2845 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2846 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2847 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2848 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2849 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2850 N0->getOperand(1), N1->getOperand(1)); 2851 AddToWorklist(NewNode.getNode()); 2852 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2853 &SVN0->getMask()[0]); 2854 } 2855 } 2856 } 2857 2858 return SDValue(); 2859 } 2860 2861 /// This contains all DAGCombine rules which reduce two values combined by 2862 /// an And operation to a single value. This makes them reusable in the context 2863 /// of visitSELECT(). Rules involving constants are not included as 2864 /// visitSELECT() already handles those cases. 2865 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2866 SDNode *LocReference) { 2867 EVT VT = N1.getValueType(); 2868 2869 // fold (and x, undef) -> 0 2870 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2871 return DAG.getConstant(0, SDLoc(LocReference), VT); 2872 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2873 SDValue LL, LR, RL, RR, CC0, CC1; 2874 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2875 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2876 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2877 2878 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2879 LL.getValueType().isInteger()) { 2880 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2881 if (isNullConstant(LR) && Op1 == ISD::SETEQ) { 2882 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2883 LR.getValueType(), LL, RL); 2884 AddToWorklist(ORNode.getNode()); 2885 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2886 } 2887 if (isAllOnesConstant(LR)) { 2888 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2889 if (Op1 == ISD::SETEQ) { 2890 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2891 LR.getValueType(), LL, RL); 2892 AddToWorklist(ANDNode.getNode()); 2893 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2894 } 2895 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2896 if (Op1 == ISD::SETGT) { 2897 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2898 LR.getValueType(), LL, RL); 2899 AddToWorklist(ORNode.getNode()); 2900 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2901 } 2902 } 2903 } 2904 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2905 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2906 Op0 == Op1 && LL.getValueType().isInteger() && 2907 Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || 2908 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 2909 SDLoc DL(N0); 2910 SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), 2911 LL, DAG.getConstant(1, DL, 2912 LL.getValueType())); 2913 AddToWorklist(ADDNode.getNode()); 2914 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2915 DAG.getConstant(2, DL, LL.getValueType()), 2916 ISD::SETUGE); 2917 } 2918 // canonicalize equivalent to ll == rl 2919 if (LL == RR && LR == RL) { 2920 Op1 = ISD::getSetCCSwappedOperands(Op1); 2921 std::swap(RL, RR); 2922 } 2923 if (LL == RL && LR == RR) { 2924 bool isInteger = LL.getValueType().isInteger(); 2925 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2926 if (Result != ISD::SETCC_INVALID && 2927 (!LegalOperations || 2928 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2929 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 2930 EVT CCVT = getSetCCResultType(LL.getValueType()); 2931 if (N0.getValueType() == CCVT || 2932 (!LegalOperations && N0.getValueType() == MVT::i1)) 2933 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2934 LL, LR, Result); 2935 } 2936 } 2937 } 2938 2939 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2940 VT.getSizeInBits() <= 64) { 2941 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2942 APInt ADDC = ADDI->getAPIntValue(); 2943 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2944 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2945 // immediate for an add, but it is legal if its top c2 bits are set, 2946 // transform the ADD so the immediate doesn't need to be materialized 2947 // in a register. 2948 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2949 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2950 SRLI->getZExtValue()); 2951 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2952 ADDC |= Mask; 2953 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2954 SDLoc DL(N0); 2955 SDValue NewAdd = 2956 DAG.getNode(ISD::ADD, DL, VT, 2957 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 2958 CombineTo(N0.getNode(), NewAdd); 2959 // Return N so it doesn't get rechecked! 2960 return SDValue(LocReference, 0); 2961 } 2962 } 2963 } 2964 } 2965 } 2966 } 2967 2968 return SDValue(); 2969 } 2970 2971 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 2972 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 2973 bool &NarrowLoad) { 2974 uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); 2975 2976 if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) 2977 return false; 2978 2979 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2980 LoadedVT = LoadN->getMemoryVT(); 2981 2982 if (ExtVT == LoadedVT && 2983 (!LegalOperations || 2984 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 2985 // ZEXTLOAD will match without needing to change the size of the value being 2986 // loaded. 2987 NarrowLoad = false; 2988 return true; 2989 } 2990 2991 // Do not change the width of a volatile load. 2992 if (LoadN->isVolatile()) 2993 return false; 2994 2995 // Do not generate loads of non-round integer types since these can 2996 // be expensive (and would be wrong if the type is not byte sized). 2997 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 2998 return false; 2999 3000 if (LegalOperations && 3001 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 3002 return false; 3003 3004 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 3005 return false; 3006 3007 NarrowLoad = true; 3008 return true; 3009 } 3010 3011 SDValue DAGCombiner::visitAND(SDNode *N) { 3012 SDValue N0 = N->getOperand(0); 3013 SDValue N1 = N->getOperand(1); 3014 EVT VT = N1.getValueType(); 3015 3016 // fold vector ops 3017 if (VT.isVector()) { 3018 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3019 return FoldedVOp; 3020 3021 // fold (and x, 0) -> 0, vector edition 3022 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3023 // do not return N0, because undef node may exist in N0 3024 return DAG.getConstant( 3025 APInt::getNullValue( 3026 N0.getValueType().getScalarType().getSizeInBits()), 3027 SDLoc(N), N0.getValueType()); 3028 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3029 // do not return N1, because undef node may exist in N1 3030 return DAG.getConstant( 3031 APInt::getNullValue( 3032 N1.getValueType().getScalarType().getSizeInBits()), 3033 SDLoc(N), N1.getValueType()); 3034 3035 // fold (and x, -1) -> x, vector edition 3036 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3037 return N1; 3038 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3039 return N0; 3040 } 3041 3042 // fold (and c1, c2) -> c1&c2 3043 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3044 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3045 if (N0C && N1C && !N1C->isOpaque()) 3046 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 3047 // canonicalize constant to RHS 3048 if (isConstantIntBuildVectorOrConstantInt(N0) && 3049 !isConstantIntBuildVectorOrConstantInt(N1)) 3050 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 3051 // fold (and x, -1) -> x 3052 if (isAllOnesConstant(N1)) 3053 return N0; 3054 // if (and x, c) is known to be zero, return 0 3055 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 3056 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3057 APInt::getAllOnesValue(BitWidth))) 3058 return DAG.getConstant(0, SDLoc(N), VT); 3059 // reassociate and 3060 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 3061 return RAND; 3062 // fold (and (or x, C), D) -> D if (C & D) == D 3063 if (N1C && N0.getOpcode() == ISD::OR) 3064 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 3065 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 3066 return N1; 3067 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 3068 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3069 SDValue N0Op0 = N0.getOperand(0); 3070 APInt Mask = ~N1C->getAPIntValue(); 3071 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 3072 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 3073 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 3074 N0.getValueType(), N0Op0); 3075 3076 // Replace uses of the AND with uses of the Zero extend node. 3077 CombineTo(N, Zext); 3078 3079 // We actually want to replace all uses of the any_extend with the 3080 // zero_extend, to avoid duplicating things. This will later cause this 3081 // AND to be folded. 3082 CombineTo(N0.getNode(), Zext); 3083 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3084 } 3085 } 3086 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 3087 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 3088 // already be zero by virtue of the width of the base type of the load. 3089 // 3090 // the 'X' node here can either be nothing or an extract_vector_elt to catch 3091 // more cases. 3092 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 3093 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && 3094 N0.getOperand(0).getOpcode() == ISD::LOAD) || 3095 N0.getOpcode() == ISD::LOAD) { 3096 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 3097 N0 : N0.getOperand(0) ); 3098 3099 // Get the constant (if applicable) the zero'th operand is being ANDed with. 3100 // This can be a pure constant or a vector splat, in which case we treat the 3101 // vector as a scalar and use the splat value. 3102 APInt Constant = APInt::getNullValue(1); 3103 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 3104 Constant = C->getAPIntValue(); 3105 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 3106 APInt SplatValue, SplatUndef; 3107 unsigned SplatBitSize; 3108 bool HasAnyUndefs; 3109 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 3110 SplatBitSize, HasAnyUndefs); 3111 if (IsSplat) { 3112 // Undef bits can contribute to a possible optimisation if set, so 3113 // set them. 3114 SplatValue |= SplatUndef; 3115 3116 // The splat value may be something like "0x00FFFFFF", which means 0 for 3117 // the first vector value and FF for the rest, repeating. We need a mask 3118 // that will apply equally to all members of the vector, so AND all the 3119 // lanes of the constant together. 3120 EVT VT = Vector->getValueType(0); 3121 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 3122 3123 // If the splat value has been compressed to a bitlength lower 3124 // than the size of the vector lane, we need to re-expand it to 3125 // the lane size. 3126 if (BitWidth > SplatBitSize) 3127 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 3128 SplatBitSize < BitWidth; 3129 SplatBitSize = SplatBitSize * 2) 3130 SplatValue |= SplatValue.shl(SplatBitSize); 3131 3132 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 3133 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 3134 if (SplatBitSize % BitWidth == 0) { 3135 Constant = APInt::getAllOnesValue(BitWidth); 3136 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 3137 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 3138 } 3139 } 3140 } 3141 3142 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 3143 // actually legal and isn't going to get expanded, else this is a false 3144 // optimisation. 3145 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 3146 Load->getValueType(0), 3147 Load->getMemoryVT()); 3148 3149 // Resize the constant to the same size as the original memory access before 3150 // extension. If it is still the AllOnesValue then this AND is completely 3151 // unneeded. 3152 Constant = 3153 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 3154 3155 bool B; 3156 switch (Load->getExtensionType()) { 3157 default: B = false; break; 3158 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 3159 case ISD::ZEXTLOAD: 3160 case ISD::NON_EXTLOAD: B = true; break; 3161 } 3162 3163 if (B && Constant.isAllOnesValue()) { 3164 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 3165 // preserve semantics once we get rid of the AND. 3166 SDValue NewLoad(Load, 0); 3167 if (Load->getExtensionType() == ISD::EXTLOAD) { 3168 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 3169 Load->getValueType(0), SDLoc(Load), 3170 Load->getChain(), Load->getBasePtr(), 3171 Load->getOffset(), Load->getMemoryVT(), 3172 Load->getMemOperand()); 3173 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 3174 if (Load->getNumValues() == 3) { 3175 // PRE/POST_INC loads have 3 values. 3176 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 3177 NewLoad.getValue(2) }; 3178 CombineTo(Load, To, 3, true); 3179 } else { 3180 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 3181 } 3182 } 3183 3184 // Fold the AND away, taking care not to fold to the old load node if we 3185 // replaced it. 3186 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 3187 3188 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3189 } 3190 } 3191 3192 // fold (and (load x), 255) -> (zextload x, i8) 3193 // fold (and (extload x, i16), 255) -> (zextload x, i8) 3194 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 3195 if (N1C && (N0.getOpcode() == ISD::LOAD || 3196 (N0.getOpcode() == ISD::ANY_EXTEND && 3197 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 3198 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 3199 LoadSDNode *LN0 = HasAnyExt 3200 ? cast<LoadSDNode>(N0.getOperand(0)) 3201 : cast<LoadSDNode>(N0); 3202 if (LN0->getExtensionType() != ISD::SEXTLOAD && 3203 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 3204 auto NarrowLoad = false; 3205 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 3206 EVT ExtVT, LoadedVT; 3207 if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, 3208 NarrowLoad)) { 3209 if (!NarrowLoad) { 3210 SDValue NewLoad = 3211 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3212 LN0->getChain(), LN0->getBasePtr(), ExtVT, 3213 LN0->getMemOperand()); 3214 AddToWorklist(N); 3215 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 3216 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3217 } else { 3218 EVT PtrType = LN0->getOperand(1).getValueType(); 3219 3220 unsigned Alignment = LN0->getAlignment(); 3221 SDValue NewPtr = LN0->getBasePtr(); 3222 3223 // For big endian targets, we need to add an offset to the pointer 3224 // to load the correct bytes. For little endian systems, we merely 3225 // need to read fewer bytes from the same pointer. 3226 if (DAG.getDataLayout().isBigEndian()) { 3227 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3228 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3229 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3230 SDLoc DL(LN0); 3231 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, 3232 NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); 3233 Alignment = MinAlign(Alignment, PtrOff); 3234 } 3235 3236 AddToWorklist(NewPtr.getNode()); 3237 3238 SDValue Load = 3239 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3240 LN0->getChain(), NewPtr, 3241 LN0->getPointerInfo(), 3242 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3243 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3244 AddToWorklist(N); 3245 CombineTo(LN0, Load, Load.getValue(1)); 3246 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3247 } 3248 } 3249 } 3250 } 3251 3252 if (SDValue Combined = visitANDLike(N0, N1, N)) 3253 return Combined; 3254 3255 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 3256 if (N0.getOpcode() == N1.getOpcode()) 3257 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3258 return Tmp; 3259 3260 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 3261 // fold (and (sra)) -> (and (srl)) when possible. 3262 if (!VT.isVector() && 3263 SimplifyDemandedBits(SDValue(N, 0))) 3264 return SDValue(N, 0); 3265 3266 // fold (zext_inreg (extload x)) -> (zextload x) 3267 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 3268 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3269 EVT MemVT = LN0->getMemoryVT(); 3270 // If we zero all the possible extended bits, then we can turn this into 3271 // a zextload if we are running before legalize or the operation is legal. 3272 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3273 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3274 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3275 ((!LegalOperations && !LN0->isVolatile()) || 3276 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3277 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3278 LN0->getChain(), LN0->getBasePtr(), 3279 MemVT, LN0->getMemOperand()); 3280 AddToWorklist(N); 3281 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3282 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3283 } 3284 } 3285 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 3286 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3287 N0.hasOneUse()) { 3288 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3289 EVT MemVT = LN0->getMemoryVT(); 3290 // If we zero all the possible extended bits, then we can turn this into 3291 // a zextload if we are running before legalize or the operation is legal. 3292 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3293 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3294 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3295 ((!LegalOperations && !LN0->isVolatile()) || 3296 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3297 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3298 LN0->getChain(), LN0->getBasePtr(), 3299 MemVT, LN0->getMemOperand()); 3300 AddToWorklist(N); 3301 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3302 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3303 } 3304 } 3305 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3306 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3307 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3308 N0.getOperand(1), false); 3309 if (BSwap.getNode()) 3310 return BSwap; 3311 } 3312 3313 return SDValue(); 3314 } 3315 3316 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3317 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3318 bool DemandHighBits) { 3319 if (!LegalOperations) 3320 return SDValue(); 3321 3322 EVT VT = N->getValueType(0); 3323 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3324 return SDValue(); 3325 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3326 return SDValue(); 3327 3328 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3329 bool LookPassAnd0 = false; 3330 bool LookPassAnd1 = false; 3331 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3332 std::swap(N0, N1); 3333 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3334 std::swap(N0, N1); 3335 if (N0.getOpcode() == ISD::AND) { 3336 if (!N0.getNode()->hasOneUse()) 3337 return SDValue(); 3338 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3339 if (!N01C || N01C->getZExtValue() != 0xFF00) 3340 return SDValue(); 3341 N0 = N0.getOperand(0); 3342 LookPassAnd0 = true; 3343 } 3344 3345 if (N1.getOpcode() == ISD::AND) { 3346 if (!N1.getNode()->hasOneUse()) 3347 return SDValue(); 3348 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3349 if (!N11C || N11C->getZExtValue() != 0xFF) 3350 return SDValue(); 3351 N1 = N1.getOperand(0); 3352 LookPassAnd1 = true; 3353 } 3354 3355 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3356 std::swap(N0, N1); 3357 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3358 return SDValue(); 3359 if (!N0.getNode()->hasOneUse() || 3360 !N1.getNode()->hasOneUse()) 3361 return SDValue(); 3362 3363 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3364 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3365 if (!N01C || !N11C) 3366 return SDValue(); 3367 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3368 return SDValue(); 3369 3370 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3371 SDValue N00 = N0->getOperand(0); 3372 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3373 if (!N00.getNode()->hasOneUse()) 3374 return SDValue(); 3375 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3376 if (!N001C || N001C->getZExtValue() != 0xFF) 3377 return SDValue(); 3378 N00 = N00.getOperand(0); 3379 LookPassAnd0 = true; 3380 } 3381 3382 SDValue N10 = N1->getOperand(0); 3383 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3384 if (!N10.getNode()->hasOneUse()) 3385 return SDValue(); 3386 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3387 if (!N101C || N101C->getZExtValue() != 0xFF00) 3388 return SDValue(); 3389 N10 = N10.getOperand(0); 3390 LookPassAnd1 = true; 3391 } 3392 3393 if (N00 != N10) 3394 return SDValue(); 3395 3396 // Make sure everything beyond the low halfword gets set to zero since the SRL 3397 // 16 will clear the top bits. 3398 unsigned OpSizeInBits = VT.getSizeInBits(); 3399 if (DemandHighBits && OpSizeInBits > 16) { 3400 // If the left-shift isn't masked out then the only way this is a bswap is 3401 // if all bits beyond the low 8 are 0. In that case the entire pattern 3402 // reduces to a left shift anyway: leave it for other parts of the combiner. 3403 if (!LookPassAnd0) 3404 return SDValue(); 3405 3406 // However, if the right shift isn't masked out then it might be because 3407 // it's not needed. See if we can spot that too. 3408 if (!LookPassAnd1 && 3409 !DAG.MaskedValueIsZero( 3410 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3411 return SDValue(); 3412 } 3413 3414 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3415 if (OpSizeInBits > 16) { 3416 SDLoc DL(N); 3417 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 3418 DAG.getConstant(OpSizeInBits - 16, DL, 3419 getShiftAmountTy(VT))); 3420 } 3421 return Res; 3422 } 3423 3424 /// Return true if the specified node is an element that makes up a 32-bit 3425 /// packed halfword byteswap. 3426 /// ((x & 0x000000ff) << 8) | 3427 /// ((x & 0x0000ff00) >> 8) | 3428 /// ((x & 0x00ff0000) << 8) | 3429 /// ((x & 0xff000000) >> 8) 3430 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3431 if (!N.getNode()->hasOneUse()) 3432 return false; 3433 3434 unsigned Opc = N.getOpcode(); 3435 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3436 return false; 3437 3438 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3439 if (!N1C) 3440 return false; 3441 3442 unsigned Num; 3443 switch (N1C->getZExtValue()) { 3444 default: 3445 return false; 3446 case 0xFF: Num = 0; break; 3447 case 0xFF00: Num = 1; break; 3448 case 0xFF0000: Num = 2; break; 3449 case 0xFF000000: Num = 3; break; 3450 } 3451 3452 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3453 SDValue N0 = N.getOperand(0); 3454 if (Opc == ISD::AND) { 3455 if (Num == 0 || Num == 2) { 3456 // (x >> 8) & 0xff 3457 // (x >> 8) & 0xff0000 3458 if (N0.getOpcode() != ISD::SRL) 3459 return false; 3460 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3461 if (!C || C->getZExtValue() != 8) 3462 return false; 3463 } else { 3464 // (x << 8) & 0xff00 3465 // (x << 8) & 0xff000000 3466 if (N0.getOpcode() != ISD::SHL) 3467 return false; 3468 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3469 if (!C || C->getZExtValue() != 8) 3470 return false; 3471 } 3472 } else if (Opc == ISD::SHL) { 3473 // (x & 0xff) << 8 3474 // (x & 0xff0000) << 8 3475 if (Num != 0 && Num != 2) 3476 return false; 3477 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3478 if (!C || C->getZExtValue() != 8) 3479 return false; 3480 } else { // Opc == ISD::SRL 3481 // (x & 0xff00) >> 8 3482 // (x & 0xff000000) >> 8 3483 if (Num != 1 && Num != 3) 3484 return false; 3485 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3486 if (!C || C->getZExtValue() != 8) 3487 return false; 3488 } 3489 3490 if (Parts[Num]) 3491 return false; 3492 3493 Parts[Num] = N0.getOperand(0).getNode(); 3494 return true; 3495 } 3496 3497 /// Match a 32-bit packed halfword bswap. That is 3498 /// ((x & 0x000000ff) << 8) | 3499 /// ((x & 0x0000ff00) >> 8) | 3500 /// ((x & 0x00ff0000) << 8) | 3501 /// ((x & 0xff000000) >> 8) 3502 /// => (rotl (bswap x), 16) 3503 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3504 if (!LegalOperations) 3505 return SDValue(); 3506 3507 EVT VT = N->getValueType(0); 3508 if (VT != MVT::i32) 3509 return SDValue(); 3510 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3511 return SDValue(); 3512 3513 // Look for either 3514 // (or (or (and), (and)), (or (and), (and))) 3515 // (or (or (or (and), (and)), (and)), (and)) 3516 if (N0.getOpcode() != ISD::OR) 3517 return SDValue(); 3518 SDValue N00 = N0.getOperand(0); 3519 SDValue N01 = N0.getOperand(1); 3520 SDNode *Parts[4] = {}; 3521 3522 if (N1.getOpcode() == ISD::OR && 3523 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3524 // (or (or (and), (and)), (or (and), (and))) 3525 SDValue N000 = N00.getOperand(0); 3526 if (!isBSwapHWordElement(N000, Parts)) 3527 return SDValue(); 3528 3529 SDValue N001 = N00.getOperand(1); 3530 if (!isBSwapHWordElement(N001, Parts)) 3531 return SDValue(); 3532 SDValue N010 = N01.getOperand(0); 3533 if (!isBSwapHWordElement(N010, Parts)) 3534 return SDValue(); 3535 SDValue N011 = N01.getOperand(1); 3536 if (!isBSwapHWordElement(N011, Parts)) 3537 return SDValue(); 3538 } else { 3539 // (or (or (or (and), (and)), (and)), (and)) 3540 if (!isBSwapHWordElement(N1, Parts)) 3541 return SDValue(); 3542 if (!isBSwapHWordElement(N01, Parts)) 3543 return SDValue(); 3544 if (N00.getOpcode() != ISD::OR) 3545 return SDValue(); 3546 SDValue N000 = N00.getOperand(0); 3547 if (!isBSwapHWordElement(N000, Parts)) 3548 return SDValue(); 3549 SDValue N001 = N00.getOperand(1); 3550 if (!isBSwapHWordElement(N001, Parts)) 3551 return SDValue(); 3552 } 3553 3554 // Make sure the parts are all coming from the same node. 3555 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3556 return SDValue(); 3557 3558 SDLoc DL(N); 3559 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 3560 SDValue(Parts[0], 0)); 3561 3562 // Result of the bswap should be rotated by 16. If it's not legal, then 3563 // do (x << 16) | (x >> 16). 3564 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 3565 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3566 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 3567 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3568 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 3569 return DAG.getNode(ISD::OR, DL, VT, 3570 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 3571 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 3572 } 3573 3574 /// This contains all DAGCombine rules which reduce two values combined by 3575 /// an Or operation to a single value \see visitANDLike(). 3576 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { 3577 EVT VT = N1.getValueType(); 3578 // fold (or x, undef) -> -1 3579 if (!LegalOperations && 3580 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3581 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3582 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), 3583 SDLoc(LocReference), VT); 3584 } 3585 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3586 SDValue LL, LR, RL, RR, CC0, CC1; 3587 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3588 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3589 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3590 3591 if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { 3592 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3593 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3594 if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3595 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3596 LR.getValueType(), LL, RL); 3597 AddToWorklist(ORNode.getNode()); 3598 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 3599 } 3600 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3601 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3602 if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3603 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3604 LR.getValueType(), LL, RL); 3605 AddToWorklist(ANDNode.getNode()); 3606 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 3607 } 3608 } 3609 // canonicalize equivalent to ll == rl 3610 if (LL == RR && LR == RL) { 3611 Op1 = ISD::getSetCCSwappedOperands(Op1); 3612 std::swap(RL, RR); 3613 } 3614 if (LL == RL && LR == RR) { 3615 bool isInteger = LL.getValueType().isInteger(); 3616 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3617 if (Result != ISD::SETCC_INVALID && 3618 (!LegalOperations || 3619 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3620 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 3621 EVT CCVT = getSetCCResultType(LL.getValueType()); 3622 if (N0.getValueType() == CCVT || 3623 (!LegalOperations && N0.getValueType() == MVT::i1)) 3624 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 3625 LL, LR, Result); 3626 } 3627 } 3628 } 3629 3630 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3631 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3632 // Don't increase # computations. 3633 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3634 // We can only do this xform if we know that bits from X that are set in C2 3635 // but not in C1 are already zero. Likewise for Y. 3636 if (const ConstantSDNode *N0O1C = 3637 getAsNonOpaqueConstant(N0.getOperand(1))) { 3638 if (const ConstantSDNode *N1O1C = 3639 getAsNonOpaqueConstant(N1.getOperand(1))) { 3640 // We can only do this xform if we know that bits from X that are set in 3641 // C2 but not in C1 are already zero. Likewise for Y. 3642 const APInt &LHSMask = N0O1C->getAPIntValue(); 3643 const APInt &RHSMask = N1O1C->getAPIntValue(); 3644 3645 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3646 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3647 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3648 N0.getOperand(0), N1.getOperand(0)); 3649 SDLoc DL(LocReference); 3650 return DAG.getNode(ISD::AND, DL, VT, X, 3651 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 3652 } 3653 } 3654 } 3655 } 3656 3657 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 3658 if (N0.getOpcode() == ISD::AND && 3659 N1.getOpcode() == ISD::AND && 3660 N0.getOperand(0) == N1.getOperand(0) && 3661 // Don't increase # computations. 3662 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3663 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3664 N0.getOperand(1), N1.getOperand(1)); 3665 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); 3666 } 3667 3668 return SDValue(); 3669 } 3670 3671 SDValue DAGCombiner::visitOR(SDNode *N) { 3672 SDValue N0 = N->getOperand(0); 3673 SDValue N1 = N->getOperand(1); 3674 EVT VT = N1.getValueType(); 3675 3676 // fold vector ops 3677 if (VT.isVector()) { 3678 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3679 return FoldedVOp; 3680 3681 // fold (or x, 0) -> x, vector edition 3682 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3683 return N1; 3684 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3685 return N0; 3686 3687 // fold (or x, -1) -> -1, vector edition 3688 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3689 // do not return N0, because undef node may exist in N0 3690 return DAG.getConstant( 3691 APInt::getAllOnesValue( 3692 N0.getValueType().getScalarType().getSizeInBits()), 3693 SDLoc(N), N0.getValueType()); 3694 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3695 // do not return N1, because undef node may exist in N1 3696 return DAG.getConstant( 3697 APInt::getAllOnesValue( 3698 N1.getValueType().getScalarType().getSizeInBits()), 3699 SDLoc(N), N1.getValueType()); 3700 3701 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3702 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3703 // Do this only if the resulting shuffle is legal. 3704 if (isa<ShuffleVectorSDNode>(N0) && 3705 isa<ShuffleVectorSDNode>(N1) && 3706 // Avoid folding a node with illegal type. 3707 TLI.isTypeLegal(VT) && 3708 N0->getOperand(1) == N1->getOperand(1) && 3709 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3710 bool CanFold = true; 3711 unsigned NumElts = VT.getVectorNumElements(); 3712 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3713 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3714 // We construct two shuffle masks: 3715 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3716 // and N1 as the second operand. 3717 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3718 // and N0 as the second operand. 3719 // We do this because OR is commutable and therefore there might be 3720 // two ways to fold this node into a shuffle. 3721 SmallVector<int,4> Mask1; 3722 SmallVector<int,4> Mask2; 3723 3724 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3725 int M0 = SV0->getMaskElt(i); 3726 int M1 = SV1->getMaskElt(i); 3727 3728 // Both shuffle indexes are undef. Propagate Undef. 3729 if (M0 < 0 && M1 < 0) { 3730 Mask1.push_back(M0); 3731 Mask2.push_back(M0); 3732 continue; 3733 } 3734 3735 if (M0 < 0 || M1 < 0 || 3736 (M0 < (int)NumElts && M1 < (int)NumElts) || 3737 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3738 CanFold = false; 3739 break; 3740 } 3741 3742 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3743 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3744 } 3745 3746 if (CanFold) { 3747 // Fold this sequence only if the resulting shuffle is 'legal'. 3748 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3749 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3750 N1->getOperand(0), &Mask1[0]); 3751 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3752 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3753 N0->getOperand(0), &Mask2[0]); 3754 } 3755 } 3756 } 3757 3758 // fold (or c1, c2) -> c1|c2 3759 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3760 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3761 if (N0C && N1C && !N1C->isOpaque()) 3762 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 3763 // canonicalize constant to RHS 3764 if (isConstantIntBuildVectorOrConstantInt(N0) && 3765 !isConstantIntBuildVectorOrConstantInt(N1)) 3766 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3767 // fold (or x, 0) -> x 3768 if (isNullConstant(N1)) 3769 return N0; 3770 // fold (or x, -1) -> -1 3771 if (isAllOnesConstant(N1)) 3772 return N1; 3773 // fold (or x, c) -> c iff (x & ~c) == 0 3774 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3775 return N1; 3776 3777 if (SDValue Combined = visitORLike(N0, N1, N)) 3778 return Combined; 3779 3780 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3781 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 3782 return BSwap; 3783 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 3784 return BSwap; 3785 3786 // reassociate or 3787 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 3788 return ROR; 3789 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3790 // iff (c1 & c2) == 0. 3791 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3792 isa<ConstantSDNode>(N0.getOperand(1))) { 3793 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3794 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3795 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, 3796 N1C, C1)) 3797 return DAG.getNode( 3798 ISD::AND, SDLoc(N), VT, 3799 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3800 return SDValue(); 3801 } 3802 } 3803 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3804 if (N0.getOpcode() == N1.getOpcode()) 3805 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3806 return Tmp; 3807 3808 // See if this is some rotate idiom. 3809 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3810 return SDValue(Rot, 0); 3811 3812 // Simplify the operands using demanded-bits information. 3813 if (!VT.isVector() && 3814 SimplifyDemandedBits(SDValue(N, 0))) 3815 return SDValue(N, 0); 3816 3817 return SDValue(); 3818 } 3819 3820 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 3821 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3822 if (Op.getOpcode() == ISD::AND) { 3823 if (isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 3824 Mask = Op.getOperand(1); 3825 Op = Op.getOperand(0); 3826 } else { 3827 return false; 3828 } 3829 } 3830 3831 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3832 Shift = Op; 3833 return true; 3834 } 3835 3836 return false; 3837 } 3838 3839 // Return true if we can prove that, whenever Neg and Pos are both in the 3840 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 3841 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3842 // 3843 // (or (shift1 X, Neg), (shift2 X, Pos)) 3844 // 3845 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3846 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 3847 // to consider shift amounts with defined behavior. 3848 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { 3849 // If EltSize is a power of 2 then: 3850 // 3851 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 3852 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 3853 // 3854 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 3855 // for the stronger condition: 3856 // 3857 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 3858 // 3859 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 3860 // we can just replace Neg with Neg' for the rest of the function. 3861 // 3862 // In other cases we check for the even stronger condition: 3863 // 3864 // Neg == EltSize - Pos [B] 3865 // 3866 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3867 // behavior if Pos == 0 (and consequently Neg == EltSize). 3868 // 3869 // We could actually use [A] whenever EltSize is a power of 2, but the 3870 // only extra cases that it would match are those uninteresting ones 3871 // where Neg and Pos are never in range at the same time. E.g. for 3872 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3873 // as well as (sub 32, Pos), but: 3874 // 3875 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3876 // 3877 // always invokes undefined behavior for 32-bit X. 3878 // 3879 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 3880 unsigned MaskLoBits = 0; 3881 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 3882 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 3883 if (NegC->getAPIntValue() == EltSize - 1) { 3884 Neg = Neg.getOperand(0); 3885 MaskLoBits = Log2_64(EltSize); 3886 } 3887 } 3888 } 3889 3890 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3891 if (Neg.getOpcode() != ISD::SUB) 3892 return false; 3893 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 3894 if (!NegC) 3895 return false; 3896 SDValue NegOp1 = Neg.getOperand(1); 3897 3898 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 3899 // Pos'. The truncation is redundant for the purpose of the equality. 3900 if (MaskLoBits && Pos.getOpcode() == ISD::AND) 3901 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3902 if (PosC->getAPIntValue() == EltSize - 1) 3903 Pos = Pos.getOperand(0); 3904 3905 // The condition we need is now: 3906 // 3907 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 3908 // 3909 // If NegOp1 == Pos then we need: 3910 // 3911 // EltSize & Mask == NegC & Mask 3912 // 3913 // (because "x & Mask" is a truncation and distributes through subtraction). 3914 APInt Width; 3915 if (Pos == NegOp1) 3916 Width = NegC->getAPIntValue(); 3917 3918 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3919 // Then the condition we want to prove becomes: 3920 // 3921 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 3922 // 3923 // which, again because "x & Mask" is a truncation, becomes: 3924 // 3925 // NegC & Mask == (EltSize - PosC) & Mask 3926 // EltSize & Mask == (NegC + PosC) & Mask 3927 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 3928 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3929 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 3930 else 3931 return false; 3932 } else 3933 return false; 3934 3935 // Now we just need to check that EltSize & Mask == Width & Mask. 3936 if (MaskLoBits) 3937 // EltSize & Mask is 0 since Mask is EltSize - 1. 3938 return Width.getLoBits(MaskLoBits) == 0; 3939 return Width == EltSize; 3940 } 3941 3942 // A subroutine of MatchRotate used once we have found an OR of two opposite 3943 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3944 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3945 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3946 // Neg with outer conversions stripped away. 3947 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3948 SDValue Neg, SDValue InnerPos, 3949 SDValue InnerNeg, unsigned PosOpcode, 3950 unsigned NegOpcode, SDLoc DL) { 3951 // fold (or (shl x, (*ext y)), 3952 // (srl x, (*ext (sub 32, y)))) -> 3953 // (rotl x, y) or (rotr x, (sub 32, y)) 3954 // 3955 // fold (or (shl x, (*ext (sub 32, y))), 3956 // (srl x, (*ext y))) -> 3957 // (rotr x, y) or (rotl x, (sub 32, y)) 3958 EVT VT = Shifted.getValueType(); 3959 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { 3960 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3961 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3962 HasPos ? Pos : Neg).getNode(); 3963 } 3964 3965 return nullptr; 3966 } 3967 3968 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 3969 // idioms for rotate, and if the target supports rotation instructions, generate 3970 // a rot[lr]. 3971 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3972 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3973 EVT VT = LHS.getValueType(); 3974 if (!TLI.isTypeLegal(VT)) return nullptr; 3975 3976 // The target must have at least one rotate flavor. 3977 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3978 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3979 if (!HasROTL && !HasROTR) return nullptr; 3980 3981 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3982 SDValue LHSShift; // The shift. 3983 SDValue LHSMask; // AND value if any. 3984 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3985 return nullptr; // Not part of a rotate. 3986 3987 SDValue RHSShift; // The shift. 3988 SDValue RHSMask; // AND value if any. 3989 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3990 return nullptr; // Not part of a rotate. 3991 3992 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3993 return nullptr; // Not shifting the same value. 3994 3995 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3996 return nullptr; // Shifts must disagree. 3997 3998 // Canonicalize shl to left side in a shl/srl pair. 3999 if (RHSShift.getOpcode() == ISD::SHL) { 4000 std::swap(LHS, RHS); 4001 std::swap(LHSShift, RHSShift); 4002 std::swap(LHSMask, RHSMask); 4003 } 4004 4005 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4006 SDValue LHSShiftArg = LHSShift.getOperand(0); 4007 SDValue LHSShiftAmt = LHSShift.getOperand(1); 4008 SDValue RHSShiftArg = RHSShift.getOperand(0); 4009 SDValue RHSShiftAmt = RHSShift.getOperand(1); 4010 4011 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 4012 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 4013 if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { 4014 uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); 4015 uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); 4016 if ((LShVal + RShVal) != EltSizeInBits) 4017 return nullptr; 4018 4019 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 4020 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 4021 4022 // If there is an AND of either shifted operand, apply it to the result. 4023 if (LHSMask.getNode() || RHSMask.getNode()) { 4024 APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); 4025 SDValue Mask = DAG.getConstant(AllBits, DL, VT); 4026 4027 if (LHSMask.getNode()) { 4028 APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); 4029 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4030 DAG.getNode(ISD::OR, DL, VT, LHSMask, 4031 DAG.getConstant(RHSBits, DL, VT))); 4032 } 4033 if (RHSMask.getNode()) { 4034 APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); 4035 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4036 DAG.getNode(ISD::OR, DL, VT, RHSMask, 4037 DAG.getConstant(LHSBits, DL, VT))); 4038 } 4039 4040 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 4041 } 4042 4043 return Rot.getNode(); 4044 } 4045 4046 // If there is a mask here, and we have a variable shift, we can't be sure 4047 // that we're masking out the right stuff. 4048 if (LHSMask.getNode() || RHSMask.getNode()) 4049 return nullptr; 4050 4051 // If the shift amount is sign/zext/any-extended just peel it off. 4052 SDValue LExtOp0 = LHSShiftAmt; 4053 SDValue RExtOp0 = RHSShiftAmt; 4054 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4055 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4056 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4057 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 4058 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4059 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4060 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4061 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 4062 LExtOp0 = LHSShiftAmt.getOperand(0); 4063 RExtOp0 = RHSShiftAmt.getOperand(0); 4064 } 4065 4066 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 4067 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 4068 if (TryL) 4069 return TryL; 4070 4071 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 4072 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 4073 if (TryR) 4074 return TryR; 4075 4076 return nullptr; 4077 } 4078 4079 SDValue DAGCombiner::visitXOR(SDNode *N) { 4080 SDValue N0 = N->getOperand(0); 4081 SDValue N1 = N->getOperand(1); 4082 EVT VT = N0.getValueType(); 4083 4084 // fold vector ops 4085 if (VT.isVector()) { 4086 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4087 return FoldedVOp; 4088 4089 // fold (xor x, 0) -> x, vector edition 4090 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4091 return N1; 4092 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4093 return N0; 4094 } 4095 4096 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 4097 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 4098 return DAG.getConstant(0, SDLoc(N), VT); 4099 // fold (xor x, undef) -> undef 4100 if (N0.getOpcode() == ISD::UNDEF) 4101 return N0; 4102 if (N1.getOpcode() == ISD::UNDEF) 4103 return N1; 4104 // fold (xor c1, c2) -> c1^c2 4105 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4106 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 4107 if (N0C && N1C) 4108 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); 4109 // canonicalize constant to RHS 4110 if (isConstantIntBuildVectorOrConstantInt(N0) && 4111 !isConstantIntBuildVectorOrConstantInt(N1)) 4112 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 4113 // fold (xor x, 0) -> x 4114 if (isNullConstant(N1)) 4115 return N0; 4116 // reassociate xor 4117 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 4118 return RXOR; 4119 4120 // fold !(x cc y) -> (x !cc y) 4121 SDValue LHS, RHS, CC; 4122 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 4123 bool isInt = LHS.getValueType().isInteger(); 4124 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 4125 isInt); 4126 4127 if (!LegalOperations || 4128 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 4129 switch (N0.getOpcode()) { 4130 default: 4131 llvm_unreachable("Unhandled SetCC Equivalent!"); 4132 case ISD::SETCC: 4133 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 4134 case ISD::SELECT_CC: 4135 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 4136 N0.getOperand(3), NotCC); 4137 } 4138 } 4139 } 4140 4141 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 4142 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && 4143 N0.getNode()->hasOneUse() && 4144 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 4145 SDValue V = N0.getOperand(0); 4146 SDLoc DL(N0); 4147 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, 4148 DAG.getConstant(1, DL, V.getValueType())); 4149 AddToWorklist(V.getNode()); 4150 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 4151 } 4152 4153 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 4154 if (isOneConstant(N1) && VT == MVT::i1 && 4155 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4156 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4157 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 4158 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4159 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4160 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4161 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4162 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4163 } 4164 } 4165 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 4166 if (isAllOnesConstant(N1) && 4167 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4168 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4169 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 4170 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4171 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4172 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4173 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4174 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4175 } 4176 } 4177 // fold (xor (and x, y), y) -> (and (not x), y) 4178 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 4179 N0->getOperand(1) == N1) { 4180 SDValue X = N0->getOperand(0); 4181 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 4182 AddToWorklist(NotX.getNode()); 4183 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 4184 } 4185 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 4186 if (N1C && N0.getOpcode() == ISD::XOR) { 4187 if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { 4188 SDLoc DL(N); 4189 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), 4190 DAG.getConstant(N1C->getAPIntValue() ^ 4191 N00C->getAPIntValue(), DL, VT)); 4192 } 4193 if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { 4194 SDLoc DL(N); 4195 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), 4196 DAG.getConstant(N1C->getAPIntValue() ^ 4197 N01C->getAPIntValue(), DL, VT)); 4198 } 4199 } 4200 // fold (xor x, x) -> 0 4201 if (N0 == N1) 4202 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 4203 4204 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 4205 // Here is a concrete example of this equivalence: 4206 // i16 x == 14 4207 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 4208 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 4209 // 4210 // => 4211 // 4212 // i16 ~1 == 0b1111111111111110 4213 // i16 rol(~1, 14) == 0b1011111111111111 4214 // 4215 // Some additional tips to help conceptualize this transform: 4216 // - Try to see the operation as placing a single zero in a value of all ones. 4217 // - There exists no value for x which would allow the result to contain zero. 4218 // - Values of x larger than the bitwidth are undefined and do not require a 4219 // consistent result. 4220 // - Pushing the zero left requires shifting one bits in from the right. 4221 // A rotate left of ~1 is a nice way of achieving the desired result. 4222 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL 4223 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 4224 SDLoc DL(N); 4225 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 4226 N0.getOperand(1)); 4227 } 4228 4229 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 4230 if (N0.getOpcode() == N1.getOpcode()) 4231 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4232 return Tmp; 4233 4234 // Simplify the expression using non-local knowledge. 4235 if (!VT.isVector() && 4236 SimplifyDemandedBits(SDValue(N, 0))) 4237 return SDValue(N, 0); 4238 4239 return SDValue(); 4240 } 4241 4242 /// Handle transforms common to the three shifts, when the shift amount is a 4243 /// constant. 4244 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 4245 SDNode *LHS = N->getOperand(0).getNode(); 4246 if (!LHS->hasOneUse()) return SDValue(); 4247 4248 // We want to pull some binops through shifts, so that we have (and (shift)) 4249 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 4250 // thing happens with address calculations, so it's important to canonicalize 4251 // it. 4252 bool HighBitSet = false; // Can we transform this if the high bit is set? 4253 4254 switch (LHS->getOpcode()) { 4255 default: return SDValue(); 4256 case ISD::OR: 4257 case ISD::XOR: 4258 HighBitSet = false; // We can only transform sra if the high bit is clear. 4259 break; 4260 case ISD::AND: 4261 HighBitSet = true; // We can only transform sra if the high bit is set. 4262 break; 4263 case ISD::ADD: 4264 if (N->getOpcode() != ISD::SHL) 4265 return SDValue(); // only shl(add) not sr[al](add). 4266 HighBitSet = false; // We can only transform sra if the high bit is clear. 4267 break; 4268 } 4269 4270 // We require the RHS of the binop to be a constant and not opaque as well. 4271 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 4272 if (!BinOpCst) return SDValue(); 4273 4274 // FIXME: disable this unless the input to the binop is a shift by a constant. 4275 // If it is not a shift, it pessimizes some common cases like: 4276 // 4277 // void foo(int *X, int i) { X[i & 1235] = 1; } 4278 // int bar(int *X, int i) { return X[i & 255]; } 4279 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 4280 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 4281 BinOpLHSVal->getOpcode() != ISD::SRA && 4282 BinOpLHSVal->getOpcode() != ISD::SRL) || 4283 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 4284 return SDValue(); 4285 4286 EVT VT = N->getValueType(0); 4287 4288 // If this is a signed shift right, and the high bit is modified by the 4289 // logical operation, do not perform the transformation. The highBitSet 4290 // boolean indicates the value of the high bit of the constant which would 4291 // cause it to be modified for this operation. 4292 if (N->getOpcode() == ISD::SRA) { 4293 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 4294 if (BinOpRHSSignSet != HighBitSet) 4295 return SDValue(); 4296 } 4297 4298 if (!TLI.isDesirableToCommuteWithShift(LHS)) 4299 return SDValue(); 4300 4301 // Fold the constants, shifting the binop RHS by the shift amount. 4302 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 4303 N->getValueType(0), 4304 LHS->getOperand(1), N->getOperand(1)); 4305 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4306 4307 // Create the new shift. 4308 SDValue NewShift = DAG.getNode(N->getOpcode(), 4309 SDLoc(LHS->getOperand(0)), 4310 VT, LHS->getOperand(0), N->getOperand(1)); 4311 4312 // Create the new binop. 4313 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4314 } 4315 4316 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4317 assert(N->getOpcode() == ISD::TRUNCATE); 4318 assert(N->getOperand(0).getOpcode() == ISD::AND); 4319 4320 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4321 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4322 SDValue N01 = N->getOperand(0).getOperand(1); 4323 4324 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4325 if (!N01C->isOpaque()) { 4326 EVT TruncVT = N->getValueType(0); 4327 SDValue N00 = N->getOperand(0).getOperand(0); 4328 APInt TruncC = N01C->getAPIntValue(); 4329 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4330 SDLoc DL(N); 4331 4332 return DAG.getNode(ISD::AND, DL, TruncVT, 4333 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), 4334 DAG.getConstant(TruncC, DL, TruncVT)); 4335 } 4336 } 4337 } 4338 4339 return SDValue(); 4340 } 4341 4342 SDValue DAGCombiner::visitRotate(SDNode *N) { 4343 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4344 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4345 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4346 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4347 if (NewOp1.getNode()) 4348 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4349 N->getOperand(0), NewOp1); 4350 } 4351 return SDValue(); 4352 } 4353 4354 SDValue DAGCombiner::visitSHL(SDNode *N) { 4355 SDValue N0 = N->getOperand(0); 4356 SDValue N1 = N->getOperand(1); 4357 EVT VT = N0.getValueType(); 4358 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4359 4360 // fold vector ops 4361 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4362 if (VT.isVector()) { 4363 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4364 return FoldedVOp; 4365 4366 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4367 // If setcc produces all-one true value then: 4368 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4369 if (N1CV && N1CV->isConstant()) { 4370 if (N0.getOpcode() == ISD::AND) { 4371 SDValue N00 = N0->getOperand(0); 4372 SDValue N01 = N0->getOperand(1); 4373 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4374 4375 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4376 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4377 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4378 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 4379 N01CV, N1CV)) 4380 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4381 } 4382 } else { 4383 N1C = isConstOrConstSplat(N1); 4384 } 4385 } 4386 } 4387 4388 // fold (shl c1, c2) -> c1<<c2 4389 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4390 if (N0C && N1C && !N1C->isOpaque()) 4391 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 4392 // fold (shl 0, x) -> 0 4393 if (isNullConstant(N0)) 4394 return N0; 4395 // fold (shl x, c >= size(x)) -> undef 4396 if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) 4397 return DAG.getUNDEF(VT); 4398 // fold (shl x, 0) -> x 4399 if (N1C && N1C->isNullValue()) 4400 return N0; 4401 // fold (shl undef, x) -> 0 4402 if (N0.getOpcode() == ISD::UNDEF) 4403 return DAG.getConstant(0, SDLoc(N), VT); 4404 // if (shl x, c) is known to be zero, return 0 4405 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4406 APInt::getAllOnesValue(OpSizeInBits))) 4407 return DAG.getConstant(0, SDLoc(N), VT); 4408 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4409 if (N1.getOpcode() == ISD::TRUNCATE && 4410 N1.getOperand(0).getOpcode() == ISD::AND) { 4411 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4412 if (NewOp1.getNode()) 4413 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4414 } 4415 4416 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4417 return SDValue(N, 0); 4418 4419 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4420 if (N1C && N0.getOpcode() == ISD::SHL) { 4421 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4422 uint64_t c1 = N0C1->getZExtValue(); 4423 uint64_t c2 = N1C->getZExtValue(); 4424 SDLoc DL(N); 4425 if (c1 + c2 >= OpSizeInBits) 4426 return DAG.getConstant(0, DL, VT); 4427 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4428 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4429 } 4430 } 4431 4432 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4433 // For this to be valid, the second form must not preserve any of the bits 4434 // that are shifted out by the inner shift in the first form. This means 4435 // the outer shift size must be >= the number of bits added by the ext. 4436 // As a corollary, we don't care what kind of ext it is. 4437 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4438 N0.getOpcode() == ISD::ANY_EXTEND || 4439 N0.getOpcode() == ISD::SIGN_EXTEND) && 4440 N0.getOperand(0).getOpcode() == ISD::SHL) { 4441 SDValue N0Op0 = N0.getOperand(0); 4442 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4443 uint64_t c1 = N0Op0C1->getZExtValue(); 4444 uint64_t c2 = N1C->getZExtValue(); 4445 EVT InnerShiftVT = N0Op0.getValueType(); 4446 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4447 if (c2 >= OpSizeInBits - InnerShiftSize) { 4448 SDLoc DL(N0); 4449 if (c1 + c2 >= OpSizeInBits) 4450 return DAG.getConstant(0, DL, VT); 4451 return DAG.getNode(ISD::SHL, DL, VT, 4452 DAG.getNode(N0.getOpcode(), DL, VT, 4453 N0Op0->getOperand(0)), 4454 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4455 } 4456 } 4457 } 4458 4459 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4460 // Only fold this if the inner zext has no other uses to avoid increasing 4461 // the total number of instructions. 4462 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4463 N0.getOperand(0).getOpcode() == ISD::SRL) { 4464 SDValue N0Op0 = N0.getOperand(0); 4465 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4466 uint64_t c1 = N0Op0C1->getZExtValue(); 4467 if (c1 < VT.getScalarSizeInBits()) { 4468 uint64_t c2 = N1C->getZExtValue(); 4469 if (c1 == c2) { 4470 SDValue NewOp0 = N0.getOperand(0); 4471 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4472 SDLoc DL(N); 4473 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 4474 NewOp0, 4475 DAG.getConstant(c2, DL, CountVT)); 4476 AddToWorklist(NewSHL.getNode()); 4477 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4478 } 4479 } 4480 } 4481 } 4482 4483 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 4484 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 4485 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 4486 cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { 4487 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4488 uint64_t C1 = N0C1->getZExtValue(); 4489 uint64_t C2 = N1C->getZExtValue(); 4490 SDLoc DL(N); 4491 if (C1 <= C2) 4492 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4493 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 4494 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 4495 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 4496 } 4497 } 4498 4499 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4500 // (and (srl x, (sub c1, c2), MASK) 4501 // Only fold this if the inner shift has no other uses -- if it does, folding 4502 // this will increase the total number of instructions. 4503 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4504 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4505 uint64_t c1 = N0C1->getZExtValue(); 4506 if (c1 < OpSizeInBits) { 4507 uint64_t c2 = N1C->getZExtValue(); 4508 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4509 SDValue Shift; 4510 if (c2 > c1) { 4511 Mask = Mask.shl(c2 - c1); 4512 SDLoc DL(N); 4513 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4514 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 4515 } else {