1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/CodeGen/SelectionDAGTargetInfo.h" 28 #include "llvm/IR/DataLayout.h" 29 #include "llvm/IR/DerivedTypes.h" 30 #include "llvm/IR/Function.h" 31 #include "llvm/IR/LLVMContext.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/ErrorHandling.h" 35 #include "llvm/Support/MathExtras.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetLowering.h" 38 #include "llvm/Target/TargetOptions.h" 39 #include "llvm/Target/TargetRegisterInfo.h" 40 #include "llvm/Target/TargetSubtargetInfo.h" 41 #include <algorithm> 42 using namespace llvm; 43 44 #define DEBUG_TYPE "dagcombine" 45 46 STATISTIC(NodesCombined , "Number of dag nodes combined"); 47 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 48 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 49 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 50 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 51 STATISTIC(SlicedLoads, "Number of load sliced"); 52 53 namespace { 54 static cl::opt<bool> 55 CombinerAA("combiner-alias-analysis", cl::Hidden, 56 cl::desc("Enable DAG combiner alias-analysis heuristics")); 57 58 static cl::opt<bool> 59 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 60 cl::desc("Enable DAG combiner's use of IR alias analysis")); 61 62 static cl::opt<bool> 63 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 64 cl::desc("Enable DAG combiner's use of TBAA")); 65 66 #ifndef NDEBUG 67 static cl::opt<std::string> 68 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 69 cl::desc("Only use DAG-combiner alias analysis in this" 70 " function")); 71 #endif 72 73 /// Hidden option to stress test load slicing, i.e., when this option 74 /// is enabled, load slicing bypasses most of its profitability guards. 75 static cl::opt<bool> 76 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 77 cl::desc("Bypass the profitability model of load " 78 "slicing"), 79 cl::init(false)); 80 81 static cl::opt<bool> 82 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 83 cl::desc("DAG combiner may split indexing from loads")); 84 85 //------------------------------ DAGCombiner ---------------------------------// 86 87 class DAGCombiner { 88 SelectionDAG &DAG; 89 const TargetLowering &TLI; 90 CombineLevel Level; 91 CodeGenOpt::Level OptLevel; 92 bool LegalOperations; 93 bool LegalTypes; 94 bool ForCodeSize; 95 96 /// \brief Worklist of all of the nodes that need to be simplified. 97 /// 98 /// This must behave as a stack -- new nodes to process are pushed onto the 99 /// back and when processing we pop off of the back. 100 /// 101 /// The worklist will not contain duplicates but may contain null entries 102 /// due to nodes being deleted from the underlying DAG. 103 SmallVector<SDNode *, 64> Worklist; 104 105 /// \brief Mapping from an SDNode to its position on the worklist. 106 /// 107 /// This is used to find and remove nodes from the worklist (by nulling 108 /// them) when they are deleted from the underlying DAG. It relies on 109 /// stable indices of nodes within the worklist. 110 DenseMap<SDNode *, unsigned> WorklistMap; 111 112 /// \brief Set of nodes which have been combined (at least once). 113 /// 114 /// This is used to allow us to reliably add any operands of a DAG node 115 /// which have not yet been combined to the worklist. 116 SmallPtrSet<SDNode *, 32> CombinedNodes; 117 118 // AA - Used for DAG load/store alias analysis. 119 AliasAnalysis &AA; 120 121 /// When an instruction is simplified, add all users of the instruction to 122 /// the work lists because they might get more simplified now. 123 void AddUsersToWorklist(SDNode *N) { 124 for (SDNode *Node : N->uses()) 125 AddToWorklist(Node); 126 } 127 128 /// Call the node-specific routine that folds each particular type of node. 129 SDValue visit(SDNode *N); 130 131 public: 132 /// Add to the worklist making sure its instance is at the back (next to be 133 /// processed.) 134 void AddToWorklist(SDNode *N) { 135 // Skip handle nodes as they can't usefully be combined and confuse the 136 // zero-use deletion strategy. 137 if (N->getOpcode() == ISD::HANDLENODE) 138 return; 139 140 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 141 Worklist.push_back(N); 142 } 143 144 /// Remove all instances of N from the worklist. 145 void removeFromWorklist(SDNode *N) { 146 CombinedNodes.erase(N); 147 148 auto It = WorklistMap.find(N); 149 if (It == WorklistMap.end()) 150 return; // Not in the worklist. 151 152 // Null out the entry rather than erasing it to avoid a linear operation. 153 Worklist[It->second] = nullptr; 154 WorklistMap.erase(It); 155 } 156 157 void deleteAndRecombine(SDNode *N); 158 bool recursivelyDeleteUnusedNodes(SDNode *N); 159 160 /// Replaces all uses of the results of one DAG node with new values. 161 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 162 bool AddTo = true); 163 164 /// Replaces all uses of the results of one DAG node with new values. 165 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 166 return CombineTo(N, &Res, 1, AddTo); 167 } 168 169 /// Replaces all uses of the results of one DAG node with new values. 170 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 171 bool AddTo = true) { 172 SDValue To[] = { Res0, Res1 }; 173 return CombineTo(N, To, 2, AddTo); 174 } 175 176 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 177 178 private: 179 180 /// Check the specified integer node value to see if it can be simplified or 181 /// if things it uses can be simplified by bit propagation. 182 /// If so, return true. 183 bool SimplifyDemandedBits(SDValue Op) { 184 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 185 APInt Demanded = APInt::getAllOnesValue(BitWidth); 186 return SimplifyDemandedBits(Op, Demanded); 187 } 188 189 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 190 191 bool CombineToPreIndexedLoadStore(SDNode *N); 192 bool CombineToPostIndexedLoadStore(SDNode *N); 193 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 194 bool SliceUpLoad(SDNode *N); 195 196 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 197 /// load. 198 /// 199 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 200 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 201 /// \param EltNo index of the vector element to load. 202 /// \param OriginalLoad load that EVE came from to be replaced. 203 /// \returns EVE on success SDValue() on failure. 204 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 205 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 206 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 207 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 208 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 209 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 210 SDValue PromoteIntBinOp(SDValue Op); 211 SDValue PromoteIntShiftOp(SDValue Op); 212 SDValue PromoteExtend(SDValue Op); 213 bool PromoteLoad(SDValue Op); 214 215 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, SDValue Trunc, 216 SDValue ExtLoad, const SDLoc &DL, 217 ISD::NodeType ExtType); 218 219 /// Call the node-specific routine that knows how to fold each 220 /// particular type of node. If that doesn't do anything, try the 221 /// target-specific DAG combines. 222 SDValue combine(SDNode *N); 223 224 // Visitation implementation - Implement dag node combining for different 225 // node types. The semantics are as follows: 226 // Return Value: 227 // SDValue.getNode() == 0 - No change was made 228 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 229 // otherwise - N should be replaced by the returned Operand. 230 // 231 SDValue visitTokenFactor(SDNode *N); 232 SDValue visitMERGE_VALUES(SDNode *N); 233 SDValue visitADD(SDNode *N); 234 SDValue visitSUB(SDNode *N); 235 SDValue visitADDC(SDNode *N); 236 SDValue visitSUBC(SDNode *N); 237 SDValue visitADDE(SDNode *N); 238 SDValue visitSUBE(SDNode *N); 239 SDValue visitMUL(SDNode *N); 240 SDValue useDivRem(SDNode *N); 241 SDValue visitSDIV(SDNode *N); 242 SDValue visitUDIV(SDNode *N); 243 SDValue visitREM(SDNode *N); 244 SDValue visitMULHU(SDNode *N); 245 SDValue visitMULHS(SDNode *N); 246 SDValue visitSMUL_LOHI(SDNode *N); 247 SDValue visitUMUL_LOHI(SDNode *N); 248 SDValue visitSMULO(SDNode *N); 249 SDValue visitUMULO(SDNode *N); 250 SDValue visitIMINMAX(SDNode *N); 251 SDValue visitAND(SDNode *N); 252 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 253 SDValue visitOR(SDNode *N); 254 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 255 SDValue visitXOR(SDNode *N); 256 SDValue SimplifyVBinOp(SDNode *N); 257 SDValue visitSHL(SDNode *N); 258 SDValue visitSRA(SDNode *N); 259 SDValue visitSRL(SDNode *N); 260 SDValue visitRotate(SDNode *N); 261 SDValue visitBSWAP(SDNode *N); 262 SDValue visitBITREVERSE(SDNode *N); 263 SDValue visitCTLZ(SDNode *N); 264 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 265 SDValue visitCTTZ(SDNode *N); 266 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 267 SDValue visitCTPOP(SDNode *N); 268 SDValue visitSELECT(SDNode *N); 269 SDValue visitVSELECT(SDNode *N); 270 SDValue visitSELECT_CC(SDNode *N); 271 SDValue visitSETCC(SDNode *N); 272 SDValue visitSETCCE(SDNode *N); 273 SDValue visitSIGN_EXTEND(SDNode *N); 274 SDValue visitZERO_EXTEND(SDNode *N); 275 SDValue visitANY_EXTEND(SDNode *N); 276 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 277 SDValue visitSIGN_EXTEND_VECTOR_INREG(SDNode *N); 278 SDValue visitZERO_EXTEND_VECTOR_INREG(SDNode *N); 279 SDValue visitTRUNCATE(SDNode *N); 280 SDValue visitBITCAST(SDNode *N); 281 SDValue visitBUILD_PAIR(SDNode *N); 282 SDValue visitFADD(SDNode *N); 283 SDValue visitFSUB(SDNode *N); 284 SDValue visitFMUL(SDNode *N); 285 SDValue visitFMA(SDNode *N); 286 SDValue visitFDIV(SDNode *N); 287 SDValue visitFREM(SDNode *N); 288 SDValue visitFSQRT(SDNode *N); 289 SDValue visitFCOPYSIGN(SDNode *N); 290 SDValue visitSINT_TO_FP(SDNode *N); 291 SDValue visitUINT_TO_FP(SDNode *N); 292 SDValue visitFP_TO_SINT(SDNode *N); 293 SDValue visitFP_TO_UINT(SDNode *N); 294 SDValue visitFP_ROUND(SDNode *N); 295 SDValue visitFP_ROUND_INREG(SDNode *N); 296 SDValue visitFP_EXTEND(SDNode *N); 297 SDValue visitFNEG(SDNode *N); 298 SDValue visitFABS(SDNode *N); 299 SDValue visitFCEIL(SDNode *N); 300 SDValue visitFTRUNC(SDNode *N); 301 SDValue visitFFLOOR(SDNode *N); 302 SDValue visitFMINNUM(SDNode *N); 303 SDValue visitFMAXNUM(SDNode *N); 304 SDValue visitBRCOND(SDNode *N); 305 SDValue visitBR_CC(SDNode *N); 306 SDValue visitLOAD(SDNode *N); 307 308 SDValue replaceStoreChain(StoreSDNode *ST, SDValue BetterChain); 309 SDValue replaceStoreOfFPConstant(StoreSDNode *ST); 310 311 SDValue visitSTORE(SDNode *N); 312 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 313 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 314 SDValue visitBUILD_VECTOR(SDNode *N); 315 SDValue visitCONCAT_VECTORS(SDNode *N); 316 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 317 SDValue visitVECTOR_SHUFFLE(SDNode *N); 318 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 319 SDValue visitINSERT_SUBVECTOR(SDNode *N); 320 SDValue visitMLOAD(SDNode *N); 321 SDValue visitMSTORE(SDNode *N); 322 SDValue visitMGATHER(SDNode *N); 323 SDValue visitMSCATTER(SDNode *N); 324 SDValue visitFP_TO_FP16(SDNode *N); 325 SDValue visitFP16_TO_FP(SDNode *N); 326 327 SDValue visitFADDForFMACombine(SDNode *N); 328 SDValue visitFSUBForFMACombine(SDNode *N); 329 SDValue visitFMULForFMACombine(SDNode *N); 330 331 SDValue XformToShuffleWithZero(SDNode *N); 332 SDValue ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue LHS, 333 SDValue RHS); 334 335 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 336 337 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 338 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 339 SDValue SimplifySelect(const SDLoc &DL, SDValue N0, SDValue N1, SDValue N2); 340 SDValue SimplifySelectCC(const SDLoc &DL, SDValue N0, SDValue N1, 341 SDValue N2, SDValue N3, ISD::CondCode CC, 342 bool NotExtCompare = false); 343 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 344 const SDLoc &DL, bool foldBooleans = true); 345 346 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 347 SDValue &CC) const; 348 bool isOneUseSetCC(SDValue N) const; 349 350 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 351 unsigned HiOp); 352 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 353 SDValue CombineExtLoad(SDNode *N); 354 SDValue combineRepeatedFPDivisors(SDNode *N); 355 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 356 SDValue BuildSDIV(SDNode *N); 357 SDValue BuildSDIVPow2(SDNode *N); 358 SDValue BuildUDIV(SDNode *N); 359 SDValue BuildReciprocalEstimate(SDValue Op, SDNodeFlags *Flags); 360 SDValue buildRsqrtEstimate(SDValue Op, SDNodeFlags *Flags); 361 SDValue buildSqrtEstimate(SDValue Op, SDNodeFlags *Flags); 362 SDValue buildSqrtEstimateImpl(SDValue Op, SDNodeFlags *Flags, bool Recip); 363 SDValue buildSqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations, 364 SDNodeFlags *Flags, bool Reciprocal); 365 SDValue buildSqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations, 366 SDNodeFlags *Flags, bool Reciprocal); 367 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 368 bool DemandHighBits = true); 369 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 370 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 371 SDValue InnerPos, SDValue InnerNeg, 372 unsigned PosOpcode, unsigned NegOpcode, 373 const SDLoc &DL); 374 SDNode *MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL); 375 SDValue ReduceLoadWidth(SDNode *N); 376 SDValue ReduceLoadOpStoreWidth(SDNode *N); 377 SDValue TransformFPLoadStorePair(SDNode *N); 378 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 379 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 380 381 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 382 383 /// Walk up chain skipping non-aliasing memory nodes, 384 /// looking for aliasing nodes and adding them to the Aliases vector. 385 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 386 SmallVectorImpl<SDValue> &Aliases); 387 388 /// Return true if there is any possibility that the two addresses overlap. 389 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 390 391 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 392 /// chain (aliasing node.) 393 SDValue FindBetterChain(SDNode *N, SDValue Chain); 394 395 /// Try to replace a store and any possibly adjacent stores on 396 /// consecutive chains with better chains. Return true only if St is 397 /// replaced. 398 /// 399 /// Notice that other chains may still be replaced even if the function 400 /// returns false. 401 bool findBetterNeighborChains(StoreSDNode *St); 402 403 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 404 bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask); 405 406 /// Holds a pointer to an LSBaseSDNode as well as information on where it 407 /// is located in a sequence of memory operations connected by a chain. 408 struct MemOpLink { 409 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 410 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 411 // Ptr to the mem node. 412 LSBaseSDNode *MemNode; 413 // Offset from the base ptr. 414 int64_t OffsetFromBase; 415 // What is the sequence number of this mem node. 416 // Lowest mem operand in the DAG starts at zero. 417 unsigned SequenceNum; 418 }; 419 420 /// This is a helper function for visitMUL to check the profitability 421 /// of folding (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2). 422 /// MulNode is the original multiply, AddNode is (add x, c1), 423 /// and ConstNode is c2. 424 bool isMulAddWithConstProfitable(SDNode *MulNode, 425 SDValue &AddNode, 426 SDValue &ConstNode); 427 428 /// This is a helper function for MergeStoresOfConstantsOrVecElts. Returns a 429 /// constant build_vector of the stored constant values in Stores. 430 SDValue getMergedConstantVectorStore(SelectionDAG &DAG, const SDLoc &SL, 431 ArrayRef<MemOpLink> Stores, 432 SmallVectorImpl<SDValue> &Chains, 433 EVT Ty) const; 434 435 /// This is a helper function for visitAND and visitZERO_EXTEND. Returns 436 /// true if the (and (load x) c) pattern matches an extload. ExtVT returns 437 /// the type of the loaded value to be extended. LoadedVT returns the type 438 /// of the original loaded value. NarrowLoad returns whether the load would 439 /// need to be narrowed in order to match. 440 bool isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 441 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 442 bool &NarrowLoad); 443 444 /// This is a helper function for MergeConsecutiveStores. When the source 445 /// elements of the consecutive stores are all constants or all extracted 446 /// vector elements, try to merge them into one larger store. 447 /// \return True if a merged store was created. 448 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 449 EVT MemVT, unsigned NumStores, 450 bool IsConstantSrc, bool UseVector); 451 452 /// This is a helper function for MergeConsecutiveStores. 453 /// Stores that may be merged are placed in StoreNodes. 454 /// Loads that may alias with those stores are placed in AliasLoadNodes. 455 void getStoreMergeAndAliasCandidates( 456 StoreSDNode* St, SmallVectorImpl<MemOpLink> &StoreNodes, 457 SmallVectorImpl<LSBaseSDNode*> &AliasLoadNodes); 458 459 /// Helper function for MergeConsecutiveStores. Checks if 460 /// Candidate stores have indirect dependency through their 461 /// operands. \return True if safe to merge 462 bool checkMergeStoreCandidatesForDependencies( 463 SmallVectorImpl<MemOpLink> &StoreNodes); 464 465 /// Merge consecutive store operations into a wide store. 466 /// This optimization uses wide integers or vectors when possible. 467 /// \return True if some memory operations were changed. 468 bool MergeConsecutiveStores(StoreSDNode *N); 469 470 /// \brief Try to transform a truncation where C is a constant: 471 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 472 /// 473 /// \p N needs to be a truncation and its first operand an AND. Other 474 /// requirements are checked by the function (e.g. that trunc is 475 /// single-use) and if missed an empty SDValue is returned. 476 SDValue distributeTruncateThroughAnd(SDNode *N); 477 478 public: 479 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 480 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 481 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 482 ForCodeSize = DAG.getMachineFunction().getFunction()->optForSize(); 483 } 484 485 /// Runs the dag combiner on all nodes in the work list 486 void Run(CombineLevel AtLevel); 487 488 SelectionDAG &getDAG() const { return DAG; } 489 490 /// Returns a type large enough to hold any valid shift amount - before type 491 /// legalization these can be huge. 492 EVT getShiftAmountTy(EVT LHSTy) { 493 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 494 if (LHSTy.isVector()) 495 return LHSTy; 496 auto &DL = DAG.getDataLayout(); 497 return LegalTypes ? TLI.getScalarShiftAmountTy(DL, LHSTy) 498 : TLI.getPointerTy(DL); 499 } 500 501 /// This method returns true if we are running before type legalization or 502 /// if the specified VT is legal. 503 bool isTypeLegal(const EVT &VT) { 504 if (!LegalTypes) return true; 505 return TLI.isTypeLegal(VT); 506 } 507 508 /// Convenience wrapper around TargetLowering::getSetCCResultType 509 EVT getSetCCResultType(EVT VT) const { 510 return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); 511 } 512 }; 513 } 514 515 516 namespace { 517 /// This class is a DAGUpdateListener that removes any deleted 518 /// nodes from the worklist. 519 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 520 DAGCombiner &DC; 521 public: 522 explicit WorklistRemover(DAGCombiner &dc) 523 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 524 525 void NodeDeleted(SDNode *N, SDNode *E) override { 526 DC.removeFromWorklist(N); 527 } 528 }; 529 } 530 531 //===----------------------------------------------------------------------===// 532 // TargetLowering::DAGCombinerInfo implementation 533 //===----------------------------------------------------------------------===// 534 535 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 536 ((DAGCombiner*)DC)->AddToWorklist(N); 537 } 538 539 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 540 ((DAGCombiner*)DC)->removeFromWorklist(N); 541 } 542 543 SDValue TargetLowering::DAGCombinerInfo:: 544 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 545 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 546 } 547 548 SDValue TargetLowering::DAGCombinerInfo:: 549 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 550 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 551 } 552 553 554 SDValue TargetLowering::DAGCombinerInfo:: 555 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 556 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 557 } 558 559 void TargetLowering::DAGCombinerInfo:: 560 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 561 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 562 } 563 564 //===----------------------------------------------------------------------===// 565 // Helper Functions 566 //===----------------------------------------------------------------------===// 567 568 void DAGCombiner::deleteAndRecombine(SDNode *N) { 569 removeFromWorklist(N); 570 571 // If the operands of this node are only used by the node, they will now be 572 // dead. Make sure to re-visit them and recursively delete dead nodes. 573 for (const SDValue &Op : N->ops()) 574 // For an operand generating multiple values, one of the values may 575 // become dead allowing further simplification (e.g. split index 576 // arithmetic from an indexed load). 577 if (Op->hasOneUse() || Op->getNumValues() > 1) 578 AddToWorklist(Op.getNode()); 579 580 DAG.DeleteNode(N); 581 } 582 583 /// Return 1 if we can compute the negated form of the specified expression for 584 /// the same cost as the expression itself, or 2 if we can compute the negated 585 /// form more cheaply than the expression itself. 586 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 587 const TargetLowering &TLI, 588 const TargetOptions *Options, 589 unsigned Depth = 0) { 590 // fneg is removable even if it has multiple uses. 591 if (Op.getOpcode() == ISD::FNEG) return 2; 592 593 // Don't allow anything with multiple uses. 594 if (!Op.hasOneUse()) return 0; 595 596 // Don't recurse exponentially. 597 if (Depth > 6) return 0; 598 599 switch (Op.getOpcode()) { 600 default: return false; 601 case ISD::ConstantFP: 602 // Don't invert constant FP values after legalize. The negated constant 603 // isn't necessarily legal. 604 return LegalOperations ? 0 : 1; 605 case ISD::FADD: 606 // FIXME: determine better conditions for this xform. 607 if (!Options->UnsafeFPMath) return 0; 608 609 // After operation legalization, it might not be legal to create new FSUBs. 610 if (LegalOperations && 611 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 612 return 0; 613 614 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 615 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 616 Options, Depth + 1)) 617 return V; 618 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 619 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 620 Depth + 1); 621 case ISD::FSUB: 622 // We can't turn -(A-B) into B-A when we honor signed zeros. 623 if (!Options->UnsafeFPMath) return 0; 624 625 // fold (fneg (fsub A, B)) -> (fsub B, A) 626 return 1; 627 628 case ISD::FMUL: 629 case ISD::FDIV: 630 if (Options->HonorSignDependentRoundingFPMath()) return 0; 631 632 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 633 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 634 Options, Depth + 1)) 635 return V; 636 637 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 638 Depth + 1); 639 640 case ISD::FP_EXTEND: 641 case ISD::FP_ROUND: 642 case ISD::FSIN: 643 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 644 Depth + 1); 645 } 646 } 647 648 /// If isNegatibleForFree returns true, return the newly negated expression. 649 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 650 bool LegalOperations, unsigned Depth = 0) { 651 const TargetOptions &Options = DAG.getTarget().Options; 652 // fneg is removable even if it has multiple uses. 653 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 654 655 // Don't allow anything with multiple uses. 656 assert(Op.hasOneUse() && "Unknown reuse!"); 657 658 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 659 660 const SDNodeFlags *Flags = Op.getNode()->getFlags(); 661 662 switch (Op.getOpcode()) { 663 default: llvm_unreachable("Unknown code"); 664 case ISD::ConstantFP: { 665 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 666 V.changeSign(); 667 return DAG.getConstantFP(V, SDLoc(Op), Op.getValueType()); 668 } 669 case ISD::FADD: 670 // FIXME: determine better conditions for this xform. 671 assert(Options.UnsafeFPMath); 672 673 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 674 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 675 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 676 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 677 GetNegatedExpression(Op.getOperand(0), DAG, 678 LegalOperations, Depth+1), 679 Op.getOperand(1), Flags); 680 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 681 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 682 GetNegatedExpression(Op.getOperand(1), DAG, 683 LegalOperations, Depth+1), 684 Op.getOperand(0), Flags); 685 case ISD::FSUB: 686 // We can't turn -(A-B) into B-A when we honor signed zeros. 687 assert(Options.UnsafeFPMath); 688 689 // fold (fneg (fsub 0, B)) -> B 690 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 691 if (N0CFP->isZero()) 692 return Op.getOperand(1); 693 694 // fold (fneg (fsub A, B)) -> (fsub B, A) 695 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 696 Op.getOperand(1), Op.getOperand(0), Flags); 697 698 case ISD::FMUL: 699 case ISD::FDIV: 700 assert(!Options.HonorSignDependentRoundingFPMath()); 701 702 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 703 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 704 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 705 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 706 GetNegatedExpression(Op.getOperand(0), DAG, 707 LegalOperations, Depth+1), 708 Op.getOperand(1), Flags); 709 710 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 711 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 712 Op.getOperand(0), 713 GetNegatedExpression(Op.getOperand(1), DAG, 714 LegalOperations, Depth+1), Flags); 715 716 case ISD::FP_EXTEND: 717 case ISD::FSIN: 718 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 719 GetNegatedExpression(Op.getOperand(0), DAG, 720 LegalOperations, Depth+1)); 721 case ISD::FP_ROUND: 722 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 723 GetNegatedExpression(Op.getOperand(0), DAG, 724 LegalOperations, Depth+1), 725 Op.getOperand(1)); 726 } 727 } 728 729 // Return true if this node is a setcc, or is a select_cc 730 // that selects between the target values used for true and false, making it 731 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 732 // the appropriate nodes based on the type of node we are checking. This 733 // simplifies life a bit for the callers. 734 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 735 SDValue &CC) const { 736 if (N.getOpcode() == ISD::SETCC) { 737 LHS = N.getOperand(0); 738 RHS = N.getOperand(1); 739 CC = N.getOperand(2); 740 return true; 741 } 742 743 if (N.getOpcode() != ISD::SELECT_CC || 744 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 745 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 746 return false; 747 748 if (TLI.getBooleanContents(N.getValueType()) == 749 TargetLowering::UndefinedBooleanContent) 750 return false; 751 752 LHS = N.getOperand(0); 753 RHS = N.getOperand(1); 754 CC = N.getOperand(4); 755 return true; 756 } 757 758 /// Return true if this is a SetCC-equivalent operation with only one use. 759 /// If this is true, it allows the users to invert the operation for free when 760 /// it is profitable to do so. 761 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 762 SDValue N0, N1, N2; 763 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 764 return true; 765 return false; 766 } 767 768 // \brief Returns the SDNode if it is a constant float BuildVector 769 // or constant float. 770 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 771 if (isa<ConstantFPSDNode>(N)) 772 return N.getNode(); 773 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 774 return N.getNode(); 775 return nullptr; 776 } 777 778 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 779 // int. 780 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 781 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 782 return CN; 783 784 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 785 BitVector UndefElements; 786 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 787 788 // BuildVectors can truncate their operands. Ignore that case here. 789 // FIXME: We blindly ignore splats which include undef which is overly 790 // pessimistic. 791 if (CN && UndefElements.none() && 792 CN->getValueType(0) == N.getValueType().getScalarType()) 793 return CN; 794 } 795 796 return nullptr; 797 } 798 799 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 800 // float. 801 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 802 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 803 return CN; 804 805 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 806 BitVector UndefElements; 807 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 808 809 if (CN && UndefElements.none()) 810 return CN; 811 } 812 813 return nullptr; 814 } 815 816 SDValue DAGCombiner::ReassociateOps(unsigned Opc, const SDLoc &DL, SDValue N0, 817 SDValue N1) { 818 EVT VT = N0.getValueType(); 819 if (N0.getOpcode() == Opc) { 820 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 821 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 822 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 823 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, L, R)) 824 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 825 return SDValue(); 826 } 827 if (N0.hasOneUse()) { 828 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 829 // use 830 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 831 if (!OpNode.getNode()) 832 return SDValue(); 833 AddToWorklist(OpNode.getNode()); 834 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 835 } 836 } 837 } 838 839 if (N1.getOpcode() == Opc) { 840 if (SDNode *R = DAG.isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 841 if (SDNode *L = DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 842 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 843 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, DL, VT, R, L)) 844 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 845 return SDValue(); 846 } 847 if (N1.hasOneUse()) { 848 // reassoc. (op x, (op y, c1)) -> (op (op x, y), c1) iff x+c1 has one 849 // use 850 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0, N1.getOperand(0)); 851 if (!OpNode.getNode()) 852 return SDValue(); 853 AddToWorklist(OpNode.getNode()); 854 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 855 } 856 } 857 } 858 859 return SDValue(); 860 } 861 862 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 863 bool AddTo) { 864 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 865 ++NodesCombined; 866 DEBUG(dbgs() << "\nReplacing.1 "; 867 N->dump(&DAG); 868 dbgs() << "\nWith: "; 869 To[0].getNode()->dump(&DAG); 870 dbgs() << " and " << NumTo-1 << " other values\n"); 871 for (unsigned i = 0, e = NumTo; i != e; ++i) 872 assert((!To[i].getNode() || 873 N->getValueType(i) == To[i].getValueType()) && 874 "Cannot combine value to value of different type!"); 875 876 WorklistRemover DeadNodes(*this); 877 DAG.ReplaceAllUsesWith(N, To); 878 if (AddTo) { 879 // Push the new nodes and any users onto the worklist 880 for (unsigned i = 0, e = NumTo; i != e; ++i) { 881 if (To[i].getNode()) { 882 AddToWorklist(To[i].getNode()); 883 AddUsersToWorklist(To[i].getNode()); 884 } 885 } 886 } 887 888 // Finally, if the node is now dead, remove it from the graph. The node 889 // may not be dead if the replacement process recursively simplified to 890 // something else needing this node. 891 if (N->use_empty()) 892 deleteAndRecombine(N); 893 return SDValue(N, 0); 894 } 895 896 void DAGCombiner:: 897 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 898 // Replace all uses. If any nodes become isomorphic to other nodes and 899 // are deleted, make sure to remove them from our worklist. 900 WorklistRemover DeadNodes(*this); 901 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 902 903 // Push the new node and any (possibly new) users onto the worklist. 904 AddToWorklist(TLO.New.getNode()); 905 AddUsersToWorklist(TLO.New.getNode()); 906 907 // Finally, if the node is now dead, remove it from the graph. The node 908 // may not be dead if the replacement process recursively simplified to 909 // something else needing this node. 910 if (TLO.Old.getNode()->use_empty()) 911 deleteAndRecombine(TLO.Old.getNode()); 912 } 913 914 /// Check the specified integer node value to see if it can be simplified or if 915 /// things it uses can be simplified by bit propagation. If so, return true. 916 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 917 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 918 APInt KnownZero, KnownOne; 919 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 920 return false; 921 922 // Revisit the node. 923 AddToWorklist(Op.getNode()); 924 925 // Replace the old value with the new one. 926 ++NodesCombined; 927 DEBUG(dbgs() << "\nReplacing.2 "; 928 TLO.Old.getNode()->dump(&DAG); 929 dbgs() << "\nWith: "; 930 TLO.New.getNode()->dump(&DAG); 931 dbgs() << '\n'); 932 933 CommitTargetLoweringOpt(TLO); 934 return true; 935 } 936 937 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 938 SDLoc dl(Load); 939 EVT VT = Load->getValueType(0); 940 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 941 942 DEBUG(dbgs() << "\nReplacing.9 "; 943 Load->dump(&DAG); 944 dbgs() << "\nWith: "; 945 Trunc.getNode()->dump(&DAG); 946 dbgs() << '\n'); 947 WorklistRemover DeadNodes(*this); 948 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 949 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 950 deleteAndRecombine(Load); 951 AddToWorklist(Trunc.getNode()); 952 } 953 954 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 955 Replace = false; 956 SDLoc dl(Op); 957 if (ISD::isUNINDEXEDLoad(Op.getNode())) { 958 LoadSDNode *LD = cast<LoadSDNode>(Op); 959 EVT MemVT = LD->getMemoryVT(); 960 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 961 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 962 : ISD::EXTLOAD) 963 : LD->getExtensionType(); 964 Replace = true; 965 return DAG.getExtLoad(ExtType, dl, PVT, 966 LD->getChain(), LD->getBasePtr(), 967 MemVT, LD->getMemOperand()); 968 } 969 970 unsigned Opc = Op.getOpcode(); 971 switch (Opc) { 972 default: break; 973 case ISD::AssertSext: 974 return DAG.getNode(ISD::AssertSext, dl, PVT, 975 SExtPromoteOperand(Op.getOperand(0), PVT), 976 Op.getOperand(1)); 977 case ISD::AssertZext: 978 return DAG.getNode(ISD::AssertZext, dl, PVT, 979 ZExtPromoteOperand(Op.getOperand(0), PVT), 980 Op.getOperand(1)); 981 case ISD::Constant: { 982 unsigned ExtOpc = 983 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 984 return DAG.getNode(ExtOpc, dl, PVT, Op); 985 } 986 } 987 988 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 989 return SDValue(); 990 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 991 } 992 993 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 994 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 995 return SDValue(); 996 EVT OldVT = Op.getValueType(); 997 SDLoc dl(Op); 998 bool Replace = false; 999 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1000 if (!NewOp.getNode()) 1001 return SDValue(); 1002 AddToWorklist(NewOp.getNode()); 1003 1004 if (Replace) 1005 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1006 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 1007 DAG.getValueType(OldVT)); 1008 } 1009 1010 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 1011 EVT OldVT = Op.getValueType(); 1012 SDLoc dl(Op); 1013 bool Replace = false; 1014 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 1015 if (!NewOp.getNode()) 1016 return SDValue(); 1017 AddToWorklist(NewOp.getNode()); 1018 1019 if (Replace) 1020 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 1021 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 1022 } 1023 1024 /// Promote the specified integer binary operation if the target indicates it is 1025 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1026 /// i32 since i16 instructions are longer. 1027 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 1028 if (!LegalOperations) 1029 return SDValue(); 1030 1031 EVT VT = Op.getValueType(); 1032 if (VT.isVector() || !VT.isInteger()) 1033 return SDValue(); 1034 1035 // If operation type is 'undesirable', e.g. i16 on x86, consider 1036 // promoting it. 1037 unsigned Opc = Op.getOpcode(); 1038 if (TLI.isTypeDesirableForOp(Opc, VT)) 1039 return SDValue(); 1040 1041 EVT PVT = VT; 1042 // Consult target whether it is a good idea to promote this operation and 1043 // what's the right type to promote it to. 1044 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1045 assert(PVT != VT && "Don't know what type to promote to!"); 1046 1047 bool Replace0 = false; 1048 SDValue N0 = Op.getOperand(0); 1049 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1050 if (!NN0.getNode()) 1051 return SDValue(); 1052 1053 bool Replace1 = false; 1054 SDValue N1 = Op.getOperand(1); 1055 SDValue NN1; 1056 if (N0 == N1) 1057 NN1 = NN0; 1058 else { 1059 NN1 = PromoteOperand(N1, PVT, Replace1); 1060 if (!NN1.getNode()) 1061 return SDValue(); 1062 } 1063 1064 AddToWorklist(NN0.getNode()); 1065 if (NN1.getNode()) 1066 AddToWorklist(NN1.getNode()); 1067 1068 if (Replace0) 1069 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1070 if (Replace1) 1071 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1072 1073 DEBUG(dbgs() << "\nPromoting "; 1074 Op.getNode()->dump(&DAG)); 1075 SDLoc dl(Op); 1076 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1077 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1078 } 1079 return SDValue(); 1080 } 1081 1082 /// Promote the specified integer shift operation if the target indicates it is 1083 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1084 /// i32 since i16 instructions are longer. 1085 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1086 if (!LegalOperations) 1087 return SDValue(); 1088 1089 EVT VT = Op.getValueType(); 1090 if (VT.isVector() || !VT.isInteger()) 1091 return SDValue(); 1092 1093 // If operation type is 'undesirable', e.g. i16 on x86, consider 1094 // promoting it. 1095 unsigned Opc = Op.getOpcode(); 1096 if (TLI.isTypeDesirableForOp(Opc, VT)) 1097 return SDValue(); 1098 1099 EVT PVT = VT; 1100 // Consult target whether it is a good idea to promote this operation and 1101 // what's the right type to promote it to. 1102 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1103 assert(PVT != VT && "Don't know what type to promote to!"); 1104 1105 bool Replace = false; 1106 SDValue N0 = Op.getOperand(0); 1107 if (Opc == ISD::SRA) 1108 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1109 else if (Opc == ISD::SRL) 1110 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1111 else 1112 N0 = PromoteOperand(N0, PVT, Replace); 1113 if (!N0.getNode()) 1114 return SDValue(); 1115 1116 AddToWorklist(N0.getNode()); 1117 if (Replace) 1118 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1119 1120 DEBUG(dbgs() << "\nPromoting "; 1121 Op.getNode()->dump(&DAG)); 1122 SDLoc dl(Op); 1123 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1124 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1125 } 1126 return SDValue(); 1127 } 1128 1129 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1130 if (!LegalOperations) 1131 return SDValue(); 1132 1133 EVT VT = Op.getValueType(); 1134 if (VT.isVector() || !VT.isInteger()) 1135 return SDValue(); 1136 1137 // If operation type is 'undesirable', e.g. i16 on x86, consider 1138 // promoting it. 1139 unsigned Opc = Op.getOpcode(); 1140 if (TLI.isTypeDesirableForOp(Opc, VT)) 1141 return SDValue(); 1142 1143 EVT PVT = VT; 1144 // Consult target whether it is a good idea to promote this operation and 1145 // what's the right type to promote it to. 1146 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1147 assert(PVT != VT && "Don't know what type to promote to!"); 1148 // fold (aext (aext x)) -> (aext x) 1149 // fold (aext (zext x)) -> (zext x) 1150 // fold (aext (sext x)) -> (sext x) 1151 DEBUG(dbgs() << "\nPromoting "; 1152 Op.getNode()->dump(&DAG)); 1153 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1154 } 1155 return SDValue(); 1156 } 1157 1158 bool DAGCombiner::PromoteLoad(SDValue Op) { 1159 if (!LegalOperations) 1160 return false; 1161 1162 if (!ISD::isUNINDEXEDLoad(Op.getNode())) 1163 return false; 1164 1165 EVT VT = Op.getValueType(); 1166 if (VT.isVector() || !VT.isInteger()) 1167 return false; 1168 1169 // If operation type is 'undesirable', e.g. i16 on x86, consider 1170 // promoting it. 1171 unsigned Opc = Op.getOpcode(); 1172 if (TLI.isTypeDesirableForOp(Opc, VT)) 1173 return false; 1174 1175 EVT PVT = VT; 1176 // Consult target whether it is a good idea to promote this operation and 1177 // what's the right type to promote it to. 1178 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1179 assert(PVT != VT && "Don't know what type to promote to!"); 1180 1181 SDLoc dl(Op); 1182 SDNode *N = Op.getNode(); 1183 LoadSDNode *LD = cast<LoadSDNode>(N); 1184 EVT MemVT = LD->getMemoryVT(); 1185 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1186 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1187 : ISD::EXTLOAD) 1188 : LD->getExtensionType(); 1189 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1190 LD->getChain(), LD->getBasePtr(), 1191 MemVT, LD->getMemOperand()); 1192 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1193 1194 DEBUG(dbgs() << "\nPromoting "; 1195 N->dump(&DAG); 1196 dbgs() << "\nTo: "; 1197 Result.getNode()->dump(&DAG); 1198 dbgs() << '\n'); 1199 WorklistRemover DeadNodes(*this); 1200 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1201 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1202 deleteAndRecombine(N); 1203 AddToWorklist(Result.getNode()); 1204 return true; 1205 } 1206 return false; 1207 } 1208 1209 /// \brief Recursively delete a node which has no uses and any operands for 1210 /// which it is the only use. 1211 /// 1212 /// Note that this both deletes the nodes and removes them from the worklist. 1213 /// It also adds any nodes who have had a user deleted to the worklist as they 1214 /// may now have only one use and subject to other combines. 1215 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1216 if (!N->use_empty()) 1217 return false; 1218 1219 SmallSetVector<SDNode *, 16> Nodes; 1220 Nodes.insert(N); 1221 do { 1222 N = Nodes.pop_back_val(); 1223 if (!N) 1224 continue; 1225 1226 if (N->use_empty()) { 1227 for (const SDValue &ChildN : N->op_values()) 1228 Nodes.insert(ChildN.getNode()); 1229 1230 removeFromWorklist(N); 1231 DAG.DeleteNode(N); 1232 } else { 1233 AddToWorklist(N); 1234 } 1235 } while (!Nodes.empty()); 1236 return true; 1237 } 1238 1239 //===----------------------------------------------------------------------===// 1240 // Main DAG Combiner implementation 1241 //===----------------------------------------------------------------------===// 1242 1243 void DAGCombiner::Run(CombineLevel AtLevel) { 1244 // set the instance variables, so that the various visit routines may use it. 1245 Level = AtLevel; 1246 LegalOperations = Level >= AfterLegalizeVectorOps; 1247 LegalTypes = Level >= AfterLegalizeTypes; 1248 1249 // Add all the dag nodes to the worklist. 1250 for (SDNode &Node : DAG.allnodes()) 1251 AddToWorklist(&Node); 1252 1253 // Create a dummy node (which is not added to allnodes), that adds a reference 1254 // to the root node, preventing it from being deleted, and tracking any 1255 // changes of the root. 1256 HandleSDNode Dummy(DAG.getRoot()); 1257 1258 // While the worklist isn't empty, find a node and try to combine it. 1259 while (!WorklistMap.empty()) { 1260 SDNode *N; 1261 // The Worklist holds the SDNodes in order, but it may contain null entries. 1262 do { 1263 N = Worklist.pop_back_val(); 1264 } while (!N); 1265 1266 bool GoodWorklistEntry = WorklistMap.erase(N); 1267 (void)GoodWorklistEntry; 1268 assert(GoodWorklistEntry && 1269 "Found a worklist entry without a corresponding map entry!"); 1270 1271 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1272 // N is deleted from the DAG, since they too may now be dead or may have a 1273 // reduced number of uses, allowing other xforms. 1274 if (recursivelyDeleteUnusedNodes(N)) 1275 continue; 1276 1277 WorklistRemover DeadNodes(*this); 1278 1279 // If this combine is running after legalizing the DAG, re-legalize any 1280 // nodes pulled off the worklist. 1281 if (Level == AfterLegalizeDAG) { 1282 SmallSetVector<SDNode *, 16> UpdatedNodes; 1283 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1284 1285 for (SDNode *LN : UpdatedNodes) { 1286 AddToWorklist(LN); 1287 AddUsersToWorklist(LN); 1288 } 1289 if (!NIsValid) 1290 continue; 1291 } 1292 1293 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1294 1295 // Add any operands of the new node which have not yet been combined to the 1296 // worklist as well. Because the worklist uniques things already, this 1297 // won't repeatedly process the same operand. 1298 CombinedNodes.insert(N); 1299 for (const SDValue &ChildN : N->op_values()) 1300 if (!CombinedNodes.count(ChildN.getNode())) 1301 AddToWorklist(ChildN.getNode()); 1302 1303 SDValue RV = combine(N); 1304 1305 if (!RV.getNode()) 1306 continue; 1307 1308 ++NodesCombined; 1309 1310 // If we get back the same node we passed in, rather than a new node or 1311 // zero, we know that the node must have defined multiple values and 1312 // CombineTo was used. Since CombineTo takes care of the worklist 1313 // mechanics for us, we have no work to do in this case. 1314 if (RV.getNode() == N) 1315 continue; 1316 1317 assert(N->getOpcode() != ISD::DELETED_NODE && 1318 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1319 "Node was deleted but visit returned new node!"); 1320 1321 DEBUG(dbgs() << " ... into: "; 1322 RV.getNode()->dump(&DAG)); 1323 1324 if (N->getNumValues() == RV.getNode()->getNumValues()) 1325 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1326 else { 1327 assert(N->getValueType(0) == RV.getValueType() && 1328 N->getNumValues() == 1 && "Type mismatch"); 1329 SDValue OpV = RV; 1330 DAG.ReplaceAllUsesWith(N, &OpV); 1331 } 1332 1333 // Push the new node and any users onto the worklist 1334 AddToWorklist(RV.getNode()); 1335 AddUsersToWorklist(RV.getNode()); 1336 1337 // Finally, if the node is now dead, remove it from the graph. The node 1338 // may not be dead if the replacement process recursively simplified to 1339 // something else needing this node. This will also take care of adding any 1340 // operands which have lost a user to the worklist. 1341 recursivelyDeleteUnusedNodes(N); 1342 } 1343 1344 // If the root changed (e.g. it was a dead load, update the root). 1345 DAG.setRoot(Dummy.getValue()); 1346 DAG.RemoveDeadNodes(); 1347 } 1348 1349 SDValue DAGCombiner::visit(SDNode *N) { 1350 switch (N->getOpcode()) { 1351 default: break; 1352 case ISD::TokenFactor: return visitTokenFactor(N); 1353 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1354 case ISD::ADD: return visitADD(N); 1355 case ISD::SUB: return visitSUB(N); 1356 case ISD::ADDC: return visitADDC(N); 1357 case ISD::SUBC: return visitSUBC(N); 1358 case ISD::ADDE: return visitADDE(N); 1359 case ISD::SUBE: return visitSUBE(N); 1360 case ISD::MUL: return visitMUL(N); 1361 case ISD::SDIV: return visitSDIV(N); 1362 case ISD::UDIV: return visitUDIV(N); 1363 case ISD::SREM: 1364 case ISD::UREM: return visitREM(N); 1365 case ISD::MULHU: return visitMULHU(N); 1366 case ISD::MULHS: return visitMULHS(N); 1367 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1368 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1369 case ISD::SMULO: return visitSMULO(N); 1370 case ISD::UMULO: return visitUMULO(N); 1371 case ISD::SMIN: 1372 case ISD::SMAX: 1373 case ISD::UMIN: 1374 case ISD::UMAX: return visitIMINMAX(N); 1375 case ISD::AND: return visitAND(N); 1376 case ISD::OR: return visitOR(N); 1377 case ISD::XOR: return visitXOR(N); 1378 case ISD::SHL: return visitSHL(N); 1379 case ISD::SRA: return visitSRA(N); 1380 case ISD::SRL: return visitSRL(N); 1381 case ISD::ROTR: 1382 case ISD::ROTL: return visitRotate(N); 1383 case ISD::BSWAP: return visitBSWAP(N); 1384 case ISD::BITREVERSE: return visitBITREVERSE(N); 1385 case ISD::CTLZ: return visitCTLZ(N); 1386 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1387 case ISD::CTTZ: return visitCTTZ(N); 1388 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1389 case ISD::CTPOP: return visitCTPOP(N); 1390 case ISD::SELECT: return visitSELECT(N); 1391 case ISD::VSELECT: return visitVSELECT(N); 1392 case ISD::SELECT_CC: return visitSELECT_CC(N); 1393 case ISD::SETCC: return visitSETCC(N); 1394 case ISD::SETCCE: return visitSETCCE(N); 1395 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1396 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1397 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1398 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1399 case ISD::SIGN_EXTEND_VECTOR_INREG: return visitSIGN_EXTEND_VECTOR_INREG(N); 1400 case ISD::ZERO_EXTEND_VECTOR_INREG: return visitZERO_EXTEND_VECTOR_INREG(N); 1401 case ISD::TRUNCATE: return visitTRUNCATE(N); 1402 case ISD::BITCAST: return visitBITCAST(N); 1403 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1404 case ISD::FADD: return visitFADD(N); 1405 case ISD::FSUB: return visitFSUB(N); 1406 case ISD::FMUL: return visitFMUL(N); 1407 case ISD::FMA: return visitFMA(N); 1408 case ISD::FDIV: return visitFDIV(N); 1409 case ISD::FREM: return visitFREM(N); 1410 case ISD::FSQRT: return visitFSQRT(N); 1411 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1412 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1413 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1414 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1415 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1416 case ISD::FP_ROUND: return visitFP_ROUND(N); 1417 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1418 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1419 case ISD::FNEG: return visitFNEG(N); 1420 case ISD::FABS: return visitFABS(N); 1421 case ISD::FFLOOR: return visitFFLOOR(N); 1422 case ISD::FMINNUM: return visitFMINNUM(N); 1423 case ISD::FMAXNUM: return visitFMAXNUM(N); 1424 case ISD::FCEIL: return visitFCEIL(N); 1425 case ISD::FTRUNC: return visitFTRUNC(N); 1426 case ISD::BRCOND: return visitBRCOND(N); 1427 case ISD::BR_CC: return visitBR_CC(N); 1428 case ISD::LOAD: return visitLOAD(N); 1429 case ISD::STORE: return visitSTORE(N); 1430 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1431 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1432 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1433 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1434 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1435 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1436 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1437 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1438 case ISD::MGATHER: return visitMGATHER(N); 1439 case ISD::MLOAD: return visitMLOAD(N); 1440 case ISD::MSCATTER: return visitMSCATTER(N); 1441 case ISD::MSTORE: return visitMSTORE(N); 1442 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1443 case ISD::FP16_TO_FP: return visitFP16_TO_FP(N); 1444 } 1445 return SDValue(); 1446 } 1447 1448 SDValue DAGCombiner::combine(SDNode *N) { 1449 SDValue RV = visit(N); 1450 1451 // If nothing happened, try a target-specific DAG combine. 1452 if (!RV.getNode()) { 1453 assert(N->getOpcode() != ISD::DELETED_NODE && 1454 "Node was deleted but visit returned NULL!"); 1455 1456 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1457 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1458 1459 // Expose the DAG combiner to the target combiner impls. 1460 TargetLowering::DAGCombinerInfo 1461 DagCombineInfo(DAG, Level, false, this); 1462 1463 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1464 } 1465 } 1466 1467 // If nothing happened still, try promoting the operation. 1468 if (!RV.getNode()) { 1469 switch (N->getOpcode()) { 1470 default: break; 1471 case ISD::ADD: 1472 case ISD::SUB: 1473 case ISD::MUL: 1474 case ISD::AND: 1475 case ISD::OR: 1476 case ISD::XOR: 1477 RV = PromoteIntBinOp(SDValue(N, 0)); 1478 break; 1479 case ISD::SHL: 1480 case ISD::SRA: 1481 case ISD::SRL: 1482 RV = PromoteIntShiftOp(SDValue(N, 0)); 1483 break; 1484 case ISD::SIGN_EXTEND: 1485 case ISD::ZERO_EXTEND: 1486 case ISD::ANY_EXTEND: 1487 RV = PromoteExtend(SDValue(N, 0)); 1488 break; 1489 case ISD::LOAD: 1490 if (PromoteLoad(SDValue(N, 0))) 1491 RV = SDValue(N, 0); 1492 break; 1493 } 1494 } 1495 1496 // If N is a commutative binary node, try commuting it to enable more 1497 // sdisel CSE. 1498 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1499 N->getNumValues() == 1) { 1500 SDValue N0 = N->getOperand(0); 1501 SDValue N1 = N->getOperand(1); 1502 1503 // Constant operands are canonicalized to RHS. 1504 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1505 SDValue Ops[] = {N1, N0}; 1506 SDNode *CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops, 1507 N->getFlags()); 1508 if (CSENode) 1509 return SDValue(CSENode, 0); 1510 } 1511 } 1512 1513 return RV; 1514 } 1515 1516 /// Given a node, return its input chain if it has one, otherwise return a null 1517 /// sd operand. 1518 static SDValue getInputChainForNode(SDNode *N) { 1519 if (unsigned NumOps = N->getNumOperands()) { 1520 if (N->getOperand(0).getValueType() == MVT::Other) 1521 return N->getOperand(0); 1522 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1523 return N->getOperand(NumOps-1); 1524 for (unsigned i = 1; i < NumOps-1; ++i) 1525 if (N->getOperand(i).getValueType() == MVT::Other) 1526 return N->getOperand(i); 1527 } 1528 return SDValue(); 1529 } 1530 1531 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1532 // If N has two operands, where one has an input chain equal to the other, 1533 // the 'other' chain is redundant. 1534 if (N->getNumOperands() == 2) { 1535 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1536 return N->getOperand(0); 1537 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1538 return N->getOperand(1); 1539 } 1540 1541 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1542 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1543 SmallPtrSet<SDNode*, 16> SeenOps; 1544 bool Changed = false; // If we should replace this token factor. 1545 1546 // Start out with this token factor. 1547 TFs.push_back(N); 1548 1549 // Iterate through token factors. The TFs grows when new token factors are 1550 // encountered. 1551 for (unsigned i = 0; i < TFs.size(); ++i) { 1552 SDNode *TF = TFs[i]; 1553 1554 // Check each of the operands. 1555 for (const SDValue &Op : TF->op_values()) { 1556 1557 switch (Op.getOpcode()) { 1558 case ISD::EntryToken: 1559 // Entry tokens don't need to be added to the list. They are 1560 // redundant. 1561 Changed = true; 1562 break; 1563 1564 case ISD::TokenFactor: 1565 if (Op.hasOneUse() && 1566 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1567 // Queue up for processing. 1568 TFs.push_back(Op.getNode()); 1569 // Clean up in case the token factor is removed. 1570 AddToWorklist(Op.getNode()); 1571 Changed = true; 1572 break; 1573 } 1574 // Fall thru 1575 1576 default: 1577 // Only add if it isn't already in the list. 1578 if (SeenOps.insert(Op.getNode()).second) 1579 Ops.push_back(Op); 1580 else 1581 Changed = true; 1582 break; 1583 } 1584 } 1585 } 1586 1587 SDValue Result; 1588 1589 // If we've changed things around then replace token factor. 1590 if (Changed) { 1591 if (Ops.empty()) { 1592 // The entry token is the only possible outcome. 1593 Result = DAG.getEntryNode(); 1594 } else { 1595 // New and improved token factor. 1596 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1597 } 1598 1599 // Add users to worklist if AA is enabled, since it may introduce 1600 // a lot of new chained token factors while removing memory deps. 1601 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1602 : DAG.getSubtarget().useAA(); 1603 return CombineTo(N, Result, UseAA /*add to worklist*/); 1604 } 1605 1606 return Result; 1607 } 1608 1609 /// MERGE_VALUES can always be eliminated. 1610 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1611 WorklistRemover DeadNodes(*this); 1612 // Replacing results may cause a different MERGE_VALUES to suddenly 1613 // be CSE'd with N, and carry its uses with it. Iterate until no 1614 // uses remain, to ensure that the node can be safely deleted. 1615 // First add the users of this node to the work list so that they 1616 // can be tried again once they have new operands. 1617 AddUsersToWorklist(N); 1618 do { 1619 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1620 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1621 } while (!N->use_empty()); 1622 deleteAndRecombine(N); 1623 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1624 } 1625 1626 /// If \p N is a ConstantSDNode with isOpaque() == false return it casted to a 1627 /// ConstantSDNode pointer else nullptr. 1628 static ConstantSDNode *getAsNonOpaqueConstant(SDValue N) { 1629 ConstantSDNode *Const = dyn_cast<ConstantSDNode>(N); 1630 return Const != nullptr && !Const->isOpaque() ? Const : nullptr; 1631 } 1632 1633 SDValue DAGCombiner::visitADD(SDNode *N) { 1634 SDValue N0 = N->getOperand(0); 1635 SDValue N1 = N->getOperand(1); 1636 EVT VT = N0.getValueType(); 1637 1638 // fold vector ops 1639 if (VT.isVector()) { 1640 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1641 return FoldedVOp; 1642 1643 // fold (add x, 0) -> x, vector edition 1644 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1645 return N0; 1646 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1647 return N1; 1648 } 1649 1650 // fold (add x, undef) -> undef 1651 if (N0.isUndef()) 1652 return N0; 1653 if (N1.isUndef()) 1654 return N1; 1655 if (DAG.isConstantIntBuildVectorOrConstantInt(N0)) { 1656 // canonicalize constant to RHS 1657 if (!DAG.isConstantIntBuildVectorOrConstantInt(N1)) 1658 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1659 // fold (add c1, c2) -> c1+c2 1660 return DAG.FoldConstantArithmetic(ISD::ADD, SDLoc(N), VT, 1661 N0.getNode(), N1.getNode()); 1662 } 1663 // fold (add x, 0) -> x 1664 if (isNullConstant(N1)) 1665 return N0; 1666 // fold ((c1-A)+c2) -> (c1+c2)-A 1667 if (ConstantSDNode *N1C = getAsNonOpaqueConstant(N1)) { 1668 if (N0.getOpcode() == ISD::SUB) 1669 if (ConstantSDNode *N0C = getAsNonOpaqueConstant(N0.getOperand(0))) { 1670 SDLoc DL(N); 1671 return DAG.getNode(ISD::SUB, DL, VT, 1672 DAG.getConstant(N1C->getAPIntValue()+ 1673 N0C->getAPIntValue(), DL, VT), 1674 N0.getOperand(1)); 1675 } 1676 } 1677 // reassociate add 1678 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1679 return RADD; 1680 // fold ((0-A) + B) -> B-A 1681 if (N0.getOpcode() == ISD::SUB && isNullConstant(N0.getOperand(0))) 1682 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1683 // fold (A + (0-B)) -> A-B 1684 if (N1.getOpcode() == ISD::SUB && isNullConstant(N1.getOperand(0))) 1685 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1686 // fold (A+(B-A)) -> B 1687 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1688 return N1.getOperand(0); 1689 // fold ((B-A)+A) -> B 1690 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1691 return N0.getOperand(0); 1692 // fold (A+(B-(A+C))) to (B-C) 1693 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1694 N0 == N1.getOperand(1).getOperand(0)) 1695 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1696 N1.getOperand(1).getOperand(1)); 1697 // fold (A+(B-(C+A))) to (B-C) 1698 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1699 N0 == N1.getOperand(1).getOperand(1)) 1700 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1701 N1.getOperand(1).getOperand(0)); 1702 // fold (A+((B-A)+or-C)) to (B+or-C) 1703 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1704 N1.getOperand(0).getOpcode() == ISD::SUB && 1705 N0 == N1.getOperand(0).getOperand(1)) 1706 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1707 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1708 1709 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1710 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1711 SDValue N00 = N0.getOperand(0); 1712 SDValue N01 = N0.getOperand(1); 1713 SDValue N10 = N1.getOperand(0); 1714 SDValue N11 = N1.getOperand(1); 1715 1716 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1717 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1718 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1719 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1720 } 1721 1722 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1723 return SDValue(N, 0); 1724 1725 // fold (a+b) -> (a|b) iff a and b share no bits. 1726 if ((!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) && 1727 VT.isInteger() && !VT.isVector() && DAG.haveNoCommonBitsSet(N0, N1)) 1728 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1729 1730 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1731 if (N1.getOpcode() == ISD::SHL && N1.getOperand(0).getOpcode() == ISD::SUB && 1732 isNullConstant(N1.getOperand(0).getOperand(0))) 1733 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1734 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1735 N1.getOperand(0).getOperand(1), 1736 N1.getOperand(1))); 1737 if (N0.getOpcode() == ISD::SHL && N0.getOperand(0).getOpcode() == ISD::SUB && 1738 isNullConstant(N0.getOperand(0).getOperand(0))) 1739 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1740 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1741 N0.getOperand(0).getOperand(1), 1742 N0.getOperand(1))); 1743 1744 if (N1.getOpcode() == ISD::AND) { 1745 SDValue AndOp0 = N1.getOperand(0); 1746 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1747 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1748 1749 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1750 // and similar xforms where the inner op is either ~0 or 0. 1751 if (NumSignBits == DestBits && isOneConstant(N1->getOperand(1))) { 1752 SDLoc DL(N); 1753 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1754 } 1755 } 1756 1757 // add (sext i1), X -> sub X, (zext i1) 1758 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1759 N0.getOperand(0).getValueType() == MVT::i1 && 1760 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1761 SDLoc DL(N); 1762 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1763 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1764 } 1765 1766 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1767 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1768 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1769 if (TN->getVT() == MVT::i1) { 1770 SDLoc DL(N); 1771 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1772 DAG.getConstant(1, DL, VT)); 1773 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1774 } 1775 } 1776 1777 return SDValue(); 1778 } 1779 1780 SDValue DAGCombiner::visitADDC(SDNode *N) { 1781 SDValue N0 = N->getOperand(0); 1782 SDValue N1 = N->getOperand(1); 1783 EVT VT = N0.getValueType(); 1784 1785 // If the flag result is dead, turn this into an ADD. 1786 if (!N->hasAnyUseOfValue(1)) 1787 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1788 DAG.getNode(ISD::CARRY_FALSE, 1789 SDLoc(N), MVT::Glue)); 1790 1791 // canonicalize constant to RHS. 1792 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1793 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1794 if (N0C && !N1C) 1795 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1796 1797 // fold (addc x, 0) -> x + no carry out 1798 if (isNullConstant(N1)) 1799 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1800 SDLoc(N), MVT::Glue)); 1801 1802 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1803 APInt LHSZero, LHSOne; 1804 APInt RHSZero, RHSOne; 1805 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1806 1807 if (LHSZero.getBoolValue()) { 1808 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1809 1810 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1811 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1812 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1813 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1814 DAG.getNode(ISD::CARRY_FALSE, 1815 SDLoc(N), MVT::Glue)); 1816 } 1817 1818 return SDValue(); 1819 } 1820 1821 SDValue DAGCombiner::visitADDE(SDNode *N) { 1822 SDValue N0 = N->getOperand(0); 1823 SDValue N1 = N->getOperand(1); 1824 SDValue CarryIn = N->getOperand(2); 1825 1826 // canonicalize constant to RHS 1827 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1828 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1829 if (N0C && !N1C) 1830 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1831 N1, N0, CarryIn); 1832 1833 // fold (adde x, y, false) -> (addc x, y) 1834 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1835 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1836 1837 return SDValue(); 1838 } 1839 1840 // Since it may not be valid to emit a fold to zero for vector initializers 1841 // check if we can before folding. 1842 static SDValue tryFoldToZero(const SDLoc &DL, const TargetLowering &TLI, EVT VT, 1843 SelectionDAG &DAG, bool LegalOperations, 1844 bool LegalTypes) { 1845 if (!VT.isVector()) 1846 return DAG.getConstant(0, DL, VT); 1847 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1848 return DAG.getConstant(0, DL, VT); 1849 return SDValue(); 1850 } 1851 1852 SDValue DAGCombiner::visitSUB(SDNode *N) { 1853 SDValue N0 = N->getOperand(0); 1854 SDValue N1 = N->getOperand(1); 1855 EVT VT = N0.getValueType(); 1856 1857 // fold vector ops 1858 if (VT.isVector()) { 1859 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1860 return FoldedVOp; 1861 1862 // fold (sub x, 0) -> x, vector edition 1863 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1864 return N0; 1865 } 1866 1867 // fold (sub x, x) -> 0 1868 // FIXME: Refactor this and xor and other similar operations together. 1869 if (N0 == N1) 1870 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1871 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 1872 DAG.isConstantIntBuildVectorOrConstantInt(N1)) { 1873 // fold (sub c1, c2) -> c1-c2 1874 return DAG.FoldConstantArithmetic(ISD::SUB, SDLoc(N), VT, 1875 N0.getNode(), N1.getNode()); 1876 } 1877 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 1878 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 1879 // fold (sub x, c) -> (add x, -c) 1880 if (N1C) { 1881 SDLoc DL(N); 1882 return DAG.getNode(ISD::ADD, DL, VT, N0, 1883 DAG.getConstant(-N1C->getAPIntValue(), DL, VT)); 1884 } 1885 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1886 if (isAllOnesConstant(N0)) 1887 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1888 // fold A-(A-B) -> B 1889 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1890 return N1.getOperand(1); 1891 // fold (A+B)-A -> B 1892 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1893 return N0.getOperand(1); 1894 // fold (A+B)-B -> A 1895 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1896 return N0.getOperand(0); 1897 // fold C2-(A+C1) -> (C2-C1)-A 1898 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1899 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1900 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1901 SDLoc DL(N); 1902 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1903 DL, VT); 1904 return DAG.getNode(ISD::SUB, DL, VT, NewC, 1905 N1.getOperand(0)); 1906 } 1907 // fold ((A+(B+or-C))-B) -> A+or-C 1908 if (N0.getOpcode() == ISD::ADD && 1909 (N0.getOperand(1).getOpcode() == ISD::SUB || 1910 N0.getOperand(1).getOpcode() == ISD::ADD) && 1911 N0.getOperand(1).getOperand(0) == N1) 1912 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1913 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1914 // fold ((A+(C+B))-B) -> A+C 1915 if (N0.getOpcode() == ISD::ADD && 1916 N0.getOperand(1).getOpcode() == ISD::ADD && 1917 N0.getOperand(1).getOperand(1) == N1) 1918 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1919 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1920 // fold ((A-(B-C))-C) -> A-B 1921 if (N0.getOpcode() == ISD::SUB && 1922 N0.getOperand(1).getOpcode() == ISD::SUB && 1923 N0.getOperand(1).getOperand(1) == N1) 1924 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1925 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1926 1927 // If either operand of a sub is undef, the result is undef 1928 if (N0.isUndef()) 1929 return N0; 1930 if (N1.isUndef()) 1931 return N1; 1932 1933 // If the relocation model supports it, consider symbol offsets. 1934 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1935 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1936 // fold (sub Sym, c) -> Sym-c 1937 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1938 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1939 GA->getOffset() - 1940 (uint64_t)N1C->getSExtValue()); 1941 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1942 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1943 if (GA->getGlobal() == GB->getGlobal()) 1944 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1945 SDLoc(N), VT); 1946 } 1947 1948 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1949 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1950 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1951 if (TN->getVT() == MVT::i1) { 1952 SDLoc DL(N); 1953 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1954 DAG.getConstant(1, DL, VT)); 1955 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1956 } 1957 } 1958 1959 return SDValue(); 1960 } 1961 1962 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1963 SDValue N0 = N->getOperand(0); 1964 SDValue N1 = N->getOperand(1); 1965 EVT VT = N0.getValueType(); 1966 SDLoc DL(N); 1967 1968 // If the flag result is dead, turn this into an SUB. 1969 if (!N->hasAnyUseOfValue(1)) 1970 return CombineTo(N, DAG.getNode(ISD::SUB, DL, VT, N0, N1), 1971 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1972 1973 // fold (subc x, x) -> 0 + no borrow 1974 if (N0 == N1) 1975 return CombineTo(N, DAG.getConstant(0, DL, VT), 1976 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1977 1978 // fold (subc x, 0) -> x + no borrow 1979 if (isNullConstant(N1)) 1980 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1981 1982 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1983 if (isAllOnesConstant(N0)) 1984 return CombineTo(N, DAG.getNode(ISD::XOR, DL, VT, N1, N0), 1985 DAG.getNode(ISD::CARRY_FALSE, DL, MVT::Glue)); 1986 1987 return SDValue(); 1988 } 1989 1990 SDValue DAGCombiner::visitSUBE(SDNode *N) { 1991 SDValue N0 = N->getOperand(0); 1992 SDValue N1 = N->getOperand(1); 1993 SDValue CarryIn = N->getOperand(2); 1994 1995 // fold (sube x, y, false) -> (subc x, y) 1996 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1997 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1998 1999 return SDValue(); 2000 } 2001 2002 SDValue DAGCombiner::visitMUL(SDNode *N) { 2003 SDValue N0 = N->getOperand(0); 2004 SDValue N1 = N->getOperand(1); 2005 EVT VT = N0.getValueType(); 2006 2007 // fold (mul x, undef) -> 0 2008 if (N0.isUndef() || N1.isUndef()) 2009 return DAG.getConstant(0, SDLoc(N), VT); 2010 2011 bool N0IsConst = false; 2012 bool N1IsConst = false; 2013 bool N1IsOpaqueConst = false; 2014 bool N0IsOpaqueConst = false; 2015 APInt ConstValue0, ConstValue1; 2016 // fold vector ops 2017 if (VT.isVector()) { 2018 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2019 return FoldedVOp; 2020 2021 N0IsConst = ISD::isConstantSplatVector(N0.getNode(), ConstValue0); 2022 N1IsConst = ISD::isConstantSplatVector(N1.getNode(), ConstValue1); 2023 } else { 2024 N0IsConst = isa<ConstantSDNode>(N0); 2025 if (N0IsConst) { 2026 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 2027 N0IsOpaqueConst = cast<ConstantSDNode>(N0)->isOpaque(); 2028 } 2029 N1IsConst = isa<ConstantSDNode>(N1); 2030 if (N1IsConst) { 2031 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 2032 N1IsOpaqueConst = cast<ConstantSDNode>(N1)->isOpaque(); 2033 } 2034 } 2035 2036 // fold (mul c1, c2) -> c1*c2 2037 if (N0IsConst && N1IsConst && !N0IsOpaqueConst && !N1IsOpaqueConst) 2038 return DAG.FoldConstantArithmetic(ISD::MUL, SDLoc(N), VT, 2039 N0.getNode(), N1.getNode()); 2040 2041 // canonicalize constant to RHS (vector doesn't have to splat) 2042 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2043 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2044 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2045 // fold (mul x, 0) -> 0 2046 if (N1IsConst && ConstValue1 == 0) 2047 return N1; 2048 // We require a splat of the entire scalar bit width for non-contiguous 2049 // bit patterns. 2050 bool IsFullSplat = 2051 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2052 // fold (mul x, 1) -> x 2053 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2054 return N0; 2055 // fold (mul x, -1) -> 0-x 2056 if (N1IsConst && ConstValue1.isAllOnesValue()) { 2057 SDLoc DL(N); 2058 return DAG.getNode(ISD::SUB, DL, VT, 2059 DAG.getConstant(0, DL, VT), N0); 2060 } 2061 // fold (mul x, (1 << c)) -> x << c 2062 if (N1IsConst && !N1IsOpaqueConst && ConstValue1.isPowerOf2() && 2063 IsFullSplat) { 2064 SDLoc DL(N); 2065 return DAG.getNode(ISD::SHL, DL, VT, N0, 2066 DAG.getConstant(ConstValue1.logBase2(), DL, 2067 getShiftAmountTy(N0.getValueType()))); 2068 } 2069 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2070 if (N1IsConst && !N1IsOpaqueConst && (-ConstValue1).isPowerOf2() && 2071 IsFullSplat) { 2072 unsigned Log2Val = (-ConstValue1).logBase2(); 2073 SDLoc DL(N); 2074 // FIXME: If the input is something that is easily negated (e.g. a 2075 // single-use add), we should put the negate there. 2076 return DAG.getNode(ISD::SUB, DL, VT, 2077 DAG.getConstant(0, DL, VT), 2078 DAG.getNode(ISD::SHL, DL, VT, N0, 2079 DAG.getConstant(Log2Val, DL, 2080 getShiftAmountTy(N0.getValueType())))); 2081 } 2082 2083 APInt Val; 2084 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2085 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2086 (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2087 isa<ConstantSDNode>(N0.getOperand(1)))) { 2088 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); 2089 AddToWorklist(C3.getNode()); 2090 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); 2091 } 2092 2093 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2094 // use. 2095 { 2096 SDValue Sh(nullptr, 0), Y(nullptr, 0); 2097 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2098 if (N0.getOpcode() == ISD::SHL && 2099 (ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2100 isa<ConstantSDNode>(N0.getOperand(1))) && 2101 N0.getNode()->hasOneUse()) { 2102 Sh = N0; Y = N1; 2103 } else if (N1.getOpcode() == ISD::SHL && 2104 isa<ConstantSDNode>(N1.getOperand(1)) && 2105 N1.getNode()->hasOneUse()) { 2106 Sh = N1; Y = N0; 2107 } 2108 2109 if (Sh.getNode()) { 2110 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); 2111 return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); 2112 } 2113 } 2114 2115 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2116 if (DAG.isConstantIntBuildVectorOrConstantInt(N1) && 2117 N0.getOpcode() == ISD::ADD && 2118 DAG.isConstantIntBuildVectorOrConstantInt(N0.getOperand(1)) && 2119 isMulAddWithConstProfitable(N, N0, N1)) 2120 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2121 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2122 N0.getOperand(0), N1), 2123 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2124 N0.getOperand(1), N1)); 2125 2126 // reassociate mul 2127 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2128 return RMUL; 2129 2130 return SDValue(); 2131 } 2132 2133 /// Return true if divmod libcall is available. 2134 static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, 2135 const TargetLowering &TLI) { 2136 RTLIB::Libcall LC; 2137 EVT NodeType = Node->getValueType(0); 2138 if (!NodeType.isSimple()) 2139 return false; 2140 switch (NodeType.getSimpleVT().SimpleTy) { 2141 default: return false; // No libcall for vector types. 2142 case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; 2143 case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; 2144 case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; 2145 case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; 2146 case MVT::i128: LC= isSigned ? RTLIB::SDIVREM_I128:RTLIB::UDIVREM_I128; break; 2147 } 2148 2149 return TLI.getLibcallName(LC) != nullptr; 2150 } 2151 2152 /// Issue divrem if both quotient and remainder are needed. 2153 SDValue DAGCombiner::useDivRem(SDNode *Node) { 2154 if (Node->use_empty()) 2155 return SDValue(); // This is a dead node, leave it alone. 2156 2157 unsigned Opcode = Node->getOpcode(); 2158 bool isSigned = (Opcode == ISD::SDIV) || (Opcode == ISD::SREM); 2159 unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; 2160 2161 // DivMod lib calls can still work on non-legal types if using lib-calls. 2162 EVT VT = Node->getValueType(0); 2163 if (VT.isVector() || !VT.isInteger()) 2164 return SDValue(); 2165 2166 if (!TLI.isTypeLegal(VT) && !TLI.isOperationCustom(DivRemOpc, VT)) 2167 return SDValue(); 2168 2169 // If DIVREM is going to get expanded into a libcall, 2170 // but there is no libcall available, then don't combine. 2171 if (!TLI.isOperationLegalOrCustom(DivRemOpc, VT) && 2172 !isDivRemLibcallAvailable(Node, isSigned, TLI)) 2173 return SDValue(); 2174 2175 // If div is legal, it's better to do the normal expansion 2176 unsigned OtherOpcode = 0; 2177 if ((Opcode == ISD::SDIV) || (Opcode == ISD::UDIV)) { 2178 OtherOpcode = isSigned ? ISD::SREM : ISD::UREM; 2179 if (TLI.isOperationLegalOrCustom(Opcode, VT)) 2180 return SDValue(); 2181 } else { 2182 OtherOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2183 if (TLI.isOperationLegalOrCustom(OtherOpcode, VT)) 2184 return SDValue(); 2185 } 2186 2187 SDValue Op0 = Node->getOperand(0); 2188 SDValue Op1 = Node->getOperand(1); 2189 SDValue combined; 2190 for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), 2191 UE = Op0.getNode()->use_end(); UI != UE; ++UI) { 2192 SDNode *User = *UI; 2193 if (User == Node || User->use_empty()) 2194 continue; 2195 // Convert the other matching node(s), too; 2196 // otherwise, the DIVREM may get target-legalized into something 2197 // target-specific that we won't be able to recognize. 2198 unsigned UserOpc = User->getOpcode(); 2199 if ((UserOpc == Opcode || UserOpc == OtherOpcode || UserOpc == DivRemOpc) && 2200 User->getOperand(0) == Op0 && 2201 User->getOperand(1) == Op1) { 2202 if (!combined) { 2203 if (UserOpc == OtherOpcode) { 2204 SDVTList VTs = DAG.getVTList(VT, VT); 2205 combined = DAG.getNode(DivRemOpc, SDLoc(Node), VTs, Op0, Op1); 2206 } else if (UserOpc == DivRemOpc) { 2207 combined = SDValue(User, 0); 2208 } else { 2209 assert(UserOpc == Opcode); 2210 continue; 2211 } 2212 } 2213 if (UserOpc == ISD::SDIV || UserOpc == ISD::UDIV) 2214 CombineTo(User, combined); 2215 else if (UserOpc == ISD::SREM || UserOpc == ISD::UREM) 2216 CombineTo(User, combined.getValue(1)); 2217 } 2218 } 2219 return combined; 2220 } 2221 2222 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2223 SDValue N0 = N->getOperand(0); 2224 SDValue N1 = N->getOperand(1); 2225 EVT VT = N->getValueType(0); 2226 2227 // fold vector ops 2228 if (VT.isVector()) 2229 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2230 return FoldedVOp; 2231 2232 SDLoc DL(N); 2233 2234 // fold (sdiv c1, c2) -> c1/c2 2235 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2236 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2237 if (N0C && N1C && !N0C->isOpaque() && !N1C->isOpaque()) 2238 return DAG.FoldConstantArithmetic(ISD::SDIV, DL, VT, N0C, N1C); 2239 // fold (sdiv X, 1) -> X 2240 if (N1C && N1C->isOne()) 2241 return N0; 2242 // fold (sdiv X, -1) -> 0-X 2243 if (N1C && N1C->isAllOnesValue()) 2244 return DAG.getNode(ISD::SUB, DL, VT, 2245 DAG.getConstant(0, DL, VT), N0); 2246 2247 // If we know the sign bits of both operands are zero, strength reduce to a 2248 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2249 if (!VT.isVector()) { 2250 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2251 return DAG.getNode(ISD::UDIV, DL, N1.getValueType(), N0, N1); 2252 } 2253 2254 // fold (sdiv X, pow2) -> simple ops after legalize 2255 // FIXME: We check for the exact bit here because the generic lowering gives 2256 // better results in that case. The target-specific lowering should learn how 2257 // to handle exact sdivs efficiently. 2258 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2259 !cast<BinaryWithFlagsSDNode>(N)->Flags.hasExact() && 2260 (N1C->getAPIntValue().isPowerOf2() || 2261 (-N1C->getAPIntValue()).isPowerOf2())) { 2262 // Target-specific implementation of sdiv x, pow2. 2263 if (SDValue Res = BuildSDIVPow2(N)) 2264 return Res; 2265 2266 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2267 2268 // Splat the sign bit into the register 2269 SDValue SGN = 2270 DAG.getNode(ISD::SRA, DL, VT, N0, 2271 DAG.getConstant(VT.getScalarSizeInBits() - 1, DL, 2272 getShiftAmountTy(N0.getValueType()))); 2273 AddToWorklist(SGN.getNode()); 2274 2275 // Add (N0 < 0) ? abs2 - 1 : 0; 2276 SDValue SRL = 2277 DAG.getNode(ISD::SRL, DL, VT, SGN, 2278 DAG.getConstant(VT.getScalarSizeInBits() - lg2, DL, 2279 getShiftAmountTy(SGN.getValueType()))); 2280 SDValue ADD = DAG.getNode(ISD::ADD, DL, VT, N0, SRL); 2281 AddToWorklist(SRL.getNode()); 2282 AddToWorklist(ADD.getNode()); // Divide by pow2 2283 SDValue SRA = DAG.getNode(ISD::SRA, DL, VT, ADD, 2284 DAG.getConstant(lg2, DL, 2285 getShiftAmountTy(ADD.getValueType()))); 2286 2287 // If we're dividing by a positive value, we're done. Otherwise, we must 2288 // negate the result. 2289 if (N1C->getAPIntValue().isNonNegative()) 2290 return SRA; 2291 2292 AddToWorklist(SRA.getNode()); 2293 return DAG.getNode(ISD::SUB, DL, VT, DAG.getConstant(0, DL, VT), SRA); 2294 } 2295 2296 // If integer divide is expensive and we satisfy the requirements, emit an 2297 // alternate sequence. Targets may check function attributes for size/speed 2298 // trade-offs. 2299 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2300 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2301 if (SDValue Op = BuildSDIV(N)) 2302 return Op; 2303 2304 // sdiv, srem -> sdivrem 2305 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2306 // Otherwise, we break the simplification logic in visitREM(). 2307 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2308 if (SDValue DivRem = useDivRem(N)) 2309 return DivRem; 2310 2311 // undef / X -> 0 2312 if (N0.isUndef()) 2313 return DAG.getConstant(0, DL, VT); 2314 // X / undef -> undef 2315 if (N1.isUndef()) 2316 return N1; 2317 2318 return SDValue(); 2319 } 2320 2321 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2322 SDValue N0 = N->getOperand(0); 2323 SDValue N1 = N->getOperand(1); 2324 EVT VT = N->getValueType(0); 2325 2326 // fold vector ops 2327 if (VT.isVector()) 2328 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2329 return FoldedVOp; 2330 2331 SDLoc DL(N); 2332 2333 // fold (udiv c1, c2) -> c1/c2 2334 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2335 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2336 if (N0C && N1C) 2337 if (SDValue Folded = DAG.FoldConstantArithmetic(ISD::UDIV, DL, VT, 2338 N0C, N1C)) 2339 return Folded; 2340 // fold (udiv x, (1 << c)) -> x >>u c 2341 if (N1C && !N1C->isOpaque() && N1C->getAPIntValue().isPowerOf2()) 2342 return DAG.getNode(ISD::SRL, DL, VT, N0, 2343 DAG.getConstant(N1C->getAPIntValue().logBase2(), DL, 2344 getShiftAmountTy(N0.getValueType()))); 2345 2346 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2347 if (N1.getOpcode() == ISD::SHL) { 2348 if (ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0))) { 2349 if (SHC->getAPIntValue().isPowerOf2()) { 2350 EVT ADDVT = N1.getOperand(1).getValueType(); 2351 SDValue Add = DAG.getNode(ISD::ADD, DL, ADDVT, 2352 N1.getOperand(1), 2353 DAG.getConstant(SHC->getAPIntValue() 2354 .logBase2(), 2355 DL, ADDVT)); 2356 AddToWorklist(Add.getNode()); 2357 return DAG.getNode(ISD::SRL, DL, VT, N0, Add); 2358 } 2359 } 2360 } 2361 2362 // fold (udiv x, c) -> alternate 2363 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2364 if (N1C && !TLI.isIntDivCheap(N->getValueType(0), Attr)) 2365 if (SDValue Op = BuildUDIV(N)) 2366 return Op; 2367 2368 // sdiv, srem -> sdivrem 2369 // If the divisor is constant, then return DIVREM only if isIntDivCheap() is true. 2370 // Otherwise, we break the simplification logic in visitREM(). 2371 if (!N1C || TLI.isIntDivCheap(N->getValueType(0), Attr)) 2372 if (SDValue DivRem = useDivRem(N)) 2373 return DivRem; 2374 2375 // undef / X -> 0 2376 if (N0.isUndef()) 2377 return DAG.getConstant(0, DL, VT); 2378 // X / undef -> undef 2379 if (N1.isUndef()) 2380 return N1; 2381 2382 return SDValue(); 2383 } 2384 2385 // handles ISD::SREM and ISD::UREM 2386 SDValue DAGCombiner::visitREM(SDNode *N) { 2387 unsigned Opcode = N->getOpcode(); 2388 SDValue N0 = N->getOperand(0); 2389 SDValue N1 = N->getOperand(1); 2390 EVT VT = N->getValueType(0); 2391 bool isSigned = (Opcode == ISD::SREM); 2392 SDLoc DL(N); 2393 2394 // fold (rem c1, c2) -> c1%c2 2395 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2396 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2397 if (N0C && N1C) 2398 if (SDValue Folded = DAG.FoldConstantArithmetic(Opcode, DL, VT, N0C, N1C)) 2399 return Folded; 2400 2401 if (isSigned) { 2402 // If we know the sign bits of both operands are zero, strength reduce to a 2403 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2404 if (!VT.isVector()) { 2405 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2406 return DAG.getNode(ISD::UREM, DL, VT, N0, N1); 2407 } 2408 } else { 2409 // fold (urem x, pow2) -> (and x, pow2-1) 2410 if (N1C && !N1C->isNullValue() && !N1C->isOpaque() && 2411 N1C->getAPIntValue().isPowerOf2()) { 2412 return DAG.getNode(ISD::AND, DL, VT, N0, 2413 DAG.getConstant(N1C->getAPIntValue() - 1, DL, VT)); 2414 } 2415 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2416 if (N1.getOpcode() == ISD::SHL) { 2417 ConstantSDNode *SHC = getAsNonOpaqueConstant(N1.getOperand(0)); 2418 if (SHC && SHC->getAPIntValue().isPowerOf2()) { 2419 APInt NegOne = APInt::getAllOnesValue(VT.getSizeInBits()); 2420 SDValue Add = 2421 DAG.getNode(ISD::ADD, DL, VT, N1, DAG.getConstant(NegOne, DL, VT)); 2422 AddToWorklist(Add.getNode()); 2423 return DAG.getNode(ISD::AND, DL, VT, N0, Add); 2424 } 2425 } 2426 } 2427 2428 AttributeSet Attr = DAG.getMachineFunction().getFunction()->getAttributes(); 2429 2430 // If X/C can be simplified by the division-by-constant logic, lower 2431 // X%C to the equivalent of X-X/C*C. 2432 // To avoid mangling nodes, this simplification requires that the combine() 2433 // call for the speculative DIV must not cause a DIVREM conversion. We guard 2434 // against this by skipping the simplification if isIntDivCheap(). When 2435 // div is not cheap, combine will not return a DIVREM. Regardless, 2436 // checking cheapness here makes sense since the simplification results in 2437 // fatter code. 2438 if (N1C && !N1C->isNullValue() && !TLI.isIntDivCheap(VT, Attr)) { 2439 unsigned DivOpcode = isSigned ? ISD::SDIV : ISD::UDIV; 2440 SDValue Div = DAG.getNode(DivOpcode, DL, VT, N0, N1); 2441 AddToWorklist(Div.getNode()); 2442 SDValue OptimizedDiv = combine(Div.getNode()); 2443 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2444 assert((OptimizedDiv.getOpcode() != ISD::UDIVREM) && 2445 (OptimizedDiv.getOpcode() != ISD::SDIVREM)); 2446 SDValue Mul = DAG.getNode(ISD::MUL, DL, VT, OptimizedDiv, N1); 2447 SDValue Sub = DAG.getNode(ISD::SUB, DL, VT, N0, Mul); 2448 AddToWorklist(Mul.getNode()); 2449 return Sub; 2450 } 2451 } 2452 2453 // sdiv, srem -> sdivrem 2454 if (SDValue DivRem = useDivRem(N)) 2455 return DivRem.getValue(1); 2456 2457 // undef % X -> 0 2458 if (N0.isUndef()) 2459 return DAG.getConstant(0, DL, VT); 2460 // X % undef -> undef 2461 if (N1.isUndef()) 2462 return N1; 2463 2464 return SDValue(); 2465 } 2466 2467 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2468 SDValue N0 = N->getOperand(0); 2469 SDValue N1 = N->getOperand(1); 2470 EVT VT = N->getValueType(0); 2471 SDLoc DL(N); 2472 2473 // fold (mulhs x, 0) -> 0 2474 if (isNullConstant(N1)) 2475 return N1; 2476 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2477 if (isOneConstant(N1)) { 2478 SDLoc DL(N); 2479 return DAG.getNode(ISD::SRA, DL, N0.getValueType(), N0, 2480 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2481 DL, 2482 getShiftAmountTy(N0.getValueType()))); 2483 } 2484 // fold (mulhs x, undef) -> 0 2485 if (N0.isUndef() || N1.isUndef()) 2486 return DAG.getConstant(0, SDLoc(N), VT); 2487 2488 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2489 // plus a shift. 2490 if (VT.isSimple() && !VT.isVector()) { 2491 MVT Simple = VT.getSimpleVT(); 2492 unsigned SimpleSize = Simple.getSizeInBits(); 2493 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2494 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2495 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2496 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2497 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2498 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2499 DAG.getConstant(SimpleSize, DL, 2500 getShiftAmountTy(N1.getValueType()))); 2501 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2502 } 2503 } 2504 2505 return SDValue(); 2506 } 2507 2508 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2509 SDValue N0 = N->getOperand(0); 2510 SDValue N1 = N->getOperand(1); 2511 EVT VT = N->getValueType(0); 2512 SDLoc DL(N); 2513 2514 // fold (mulhu x, 0) -> 0 2515 if (isNullConstant(N1)) 2516 return N1; 2517 // fold (mulhu x, 1) -> 0 2518 if (isOneConstant(N1)) 2519 return DAG.getConstant(0, DL, N0.getValueType()); 2520 // fold (mulhu x, undef) -> 0 2521 if (N0.isUndef() || N1.isUndef()) 2522 return DAG.getConstant(0, DL, VT); 2523 2524 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2525 // plus a shift. 2526 if (VT.isSimple() && !VT.isVector()) { 2527 MVT Simple = VT.getSimpleVT(); 2528 unsigned SimpleSize = Simple.getSizeInBits(); 2529 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2530 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2531 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2532 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2533 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2534 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2535 DAG.getConstant(SimpleSize, DL, 2536 getShiftAmountTy(N1.getValueType()))); 2537 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2538 } 2539 } 2540 2541 return SDValue(); 2542 } 2543 2544 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2545 /// give the opcodes for the two computations that are being performed. Return 2546 /// true if a simplification was made. 2547 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2548 unsigned HiOp) { 2549 // If the high half is not needed, just compute the low half. 2550 bool HiExists = N->hasAnyUseOfValue(1); 2551 if (!HiExists && 2552 (!LegalOperations || 2553 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2554 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2555 return CombineTo(N, Res, Res); 2556 } 2557 2558 // If the low half is not needed, just compute the high half. 2559 bool LoExists = N->hasAnyUseOfValue(0); 2560 if (!LoExists && 2561 (!LegalOperations || 2562 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2563 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2564 return CombineTo(N, Res, Res); 2565 } 2566 2567 // If both halves are used, return as it is. 2568 if (LoExists && HiExists) 2569 return SDValue(); 2570 2571 // If the two computed results can be simplified separately, separate them. 2572 if (LoExists) { 2573 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2574 AddToWorklist(Lo.getNode()); 2575 SDValue LoOpt = combine(Lo.getNode()); 2576 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2577 (!LegalOperations || 2578 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2579 return CombineTo(N, LoOpt, LoOpt); 2580 } 2581 2582 if (HiExists) { 2583 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2584 AddToWorklist(Hi.getNode()); 2585 SDValue HiOpt = combine(Hi.getNode()); 2586 if (HiOpt.getNode() && HiOpt != Hi && 2587 (!LegalOperations || 2588 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2589 return CombineTo(N, HiOpt, HiOpt); 2590 } 2591 2592 return SDValue(); 2593 } 2594 2595 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2596 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS)) 2597 return Res; 2598 2599 EVT VT = N->getValueType(0); 2600 SDLoc DL(N); 2601 2602 // If the type is twice as wide is legal, transform the mulhu to a wider 2603 // multiply plus a shift. 2604 if (VT.isSimple() && !VT.isVector()) { 2605 MVT Simple = VT.getSimpleVT(); 2606 unsigned SimpleSize = Simple.getSizeInBits(); 2607 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2608 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2609 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2610 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2611 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2612 // Compute the high part as N1. 2613 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2614 DAG.getConstant(SimpleSize, DL, 2615 getShiftAmountTy(Lo.getValueType()))); 2616 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2617 // Compute the low part as N0. 2618 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2619 return CombineTo(N, Lo, Hi); 2620 } 2621 } 2622 2623 return SDValue(); 2624 } 2625 2626 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2627 if (SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU)) 2628 return Res; 2629 2630 EVT VT = N->getValueType(0); 2631 SDLoc DL(N); 2632 2633 // If the type is twice as wide is legal, transform the mulhu to a wider 2634 // multiply plus a shift. 2635 if (VT.isSimple() && !VT.isVector()) { 2636 MVT Simple = VT.getSimpleVT(); 2637 unsigned SimpleSize = Simple.getSizeInBits(); 2638 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2639 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2640 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2641 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2642 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2643 // Compute the high part as N1. 2644 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2645 DAG.getConstant(SimpleSize, DL, 2646 getShiftAmountTy(Lo.getValueType()))); 2647 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2648 // Compute the low part as N0. 2649 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2650 return CombineTo(N, Lo, Hi); 2651 } 2652 } 2653 2654 return SDValue(); 2655 } 2656 2657 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2658 // (smulo x, 2) -> (saddo x, x) 2659 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2660 if (C2->getAPIntValue() == 2) 2661 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2662 N->getOperand(0), N->getOperand(0)); 2663 2664 return SDValue(); 2665 } 2666 2667 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2668 // (umulo x, 2) -> (uaddo x, x) 2669 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2670 if (C2->getAPIntValue() == 2) 2671 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2672 N->getOperand(0), N->getOperand(0)); 2673 2674 return SDValue(); 2675 } 2676 2677 SDValue DAGCombiner::visitIMINMAX(SDNode *N) { 2678 SDValue N0 = N->getOperand(0); 2679 SDValue N1 = N->getOperand(1); 2680 EVT VT = N0.getValueType(); 2681 2682 // fold vector ops 2683 if (VT.isVector()) 2684 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2685 return FoldedVOp; 2686 2687 // fold (add c1, c2) -> c1+c2 2688 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 2689 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 2690 if (N0C && N1C) 2691 return DAG.FoldConstantArithmetic(N->getOpcode(), SDLoc(N), VT, N0C, N1C); 2692 2693 // canonicalize constant to RHS 2694 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 2695 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 2696 return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N1, N0); 2697 2698 return SDValue(); 2699 } 2700 2701 /// If this is a binary operator with two operands of the same opcode, try to 2702 /// simplify it. 2703 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2704 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2705 EVT VT = N0.getValueType(); 2706 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2707 2708 // Bail early if none of these transforms apply. 2709 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2710 2711 // For each of OP in AND/OR/XOR: 2712 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2713 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2714 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2715 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2716 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2717 // 2718 // do not sink logical op inside of a vector extend, since it may combine 2719 // into a vsetcc. 2720 EVT Op0VT = N0.getOperand(0).getValueType(); 2721 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2722 N0.getOpcode() == ISD::SIGN_EXTEND || 2723 N0.getOpcode() == ISD::BSWAP || 2724 // Avoid infinite looping with PromoteIntBinOp. 2725 (N0.getOpcode() == ISD::ANY_EXTEND && 2726 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2727 (N0.getOpcode() == ISD::TRUNCATE && 2728 (!TLI.isZExtFree(VT, Op0VT) || 2729 !TLI.isTruncateFree(Op0VT, VT)) && 2730 TLI.isTypeLegal(Op0VT))) && 2731 !VT.isVector() && 2732 Op0VT == N1.getOperand(0).getValueType() && 2733 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2734 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2735 N0.getOperand(0).getValueType(), 2736 N0.getOperand(0), N1.getOperand(0)); 2737 AddToWorklist(ORNode.getNode()); 2738 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2739 } 2740 2741 // For each of OP in SHL/SRL/SRA/AND... 2742 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2743 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2744 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2745 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2746 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2747 N0.getOperand(1) == N1.getOperand(1)) { 2748 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2749 N0.getOperand(0).getValueType(), 2750 N0.getOperand(0), N1.getOperand(0)); 2751 AddToWorklist(ORNode.getNode()); 2752 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2753 ORNode, N0.getOperand(1)); 2754 } 2755 2756 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2757 // Only perform this optimization up until type legalization, before 2758 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2759 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2760 // we don't want to undo this promotion. 2761 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2762 // on scalars. 2763 if ((N0.getOpcode() == ISD::BITCAST || 2764 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2765 Level <= AfterLegalizeTypes) { 2766 SDValue In0 = N0.getOperand(0); 2767 SDValue In1 = N1.getOperand(0); 2768 EVT In0Ty = In0.getValueType(); 2769 EVT In1Ty = In1.getValueType(); 2770 SDLoc DL(N); 2771 // If both incoming values are integers, and the original types are the 2772 // same. 2773 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2774 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2775 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2776 AddToWorklist(Op.getNode()); 2777 return BC; 2778 } 2779 } 2780 2781 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2782 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2783 // If both shuffles use the same mask, and both shuffle within a single 2784 // vector, then it is worthwhile to move the swizzle after the operation. 2785 // The type-legalizer generates this pattern when loading illegal 2786 // vector types from memory. In many cases this allows additional shuffle 2787 // optimizations. 2788 // There are other cases where moving the shuffle after the xor/and/or 2789 // is profitable even if shuffles don't perform a swizzle. 2790 // If both shuffles use the same mask, and both shuffles have the same first 2791 // or second operand, then it might still be profitable to move the shuffle 2792 // after the xor/and/or operation. 2793 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2794 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2795 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2796 2797 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2798 "Inputs to shuffles are not the same type"); 2799 2800 // Check that both shuffles use the same mask. The masks are known to be of 2801 // the same length because the result vector type is the same. 2802 // Check also that shuffles have only one use to avoid introducing extra 2803 // instructions. 2804 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2805 SVN0->getMask().equals(SVN1->getMask())) { 2806 SDValue ShOp = N0->getOperand(1); 2807 2808 // Don't try to fold this node if it requires introducing a 2809 // build vector of all zeros that might be illegal at this stage. 2810 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 2811 if (!LegalTypes) 2812 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2813 else 2814 ShOp = SDValue(); 2815 } 2816 2817 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2818 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2819 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2820 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2821 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2822 N0->getOperand(0), N1->getOperand(0)); 2823 AddToWorklist(NewNode.getNode()); 2824 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2825 SVN0->getMask()); 2826 } 2827 2828 // Don't try to fold this node if it requires introducing a 2829 // build vector of all zeros that might be illegal at this stage. 2830 ShOp = N0->getOperand(0); 2831 if (N->getOpcode() == ISD::XOR && !ShOp.isUndef()) { 2832 if (!LegalTypes) 2833 ShOp = DAG.getConstant(0, SDLoc(N), VT); 2834 else 2835 ShOp = SDValue(); 2836 } 2837 2838 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2839 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2840 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2841 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2842 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2843 N0->getOperand(1), N1->getOperand(1)); 2844 AddToWorklist(NewNode.getNode()); 2845 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2846 SVN0->getMask()); 2847 } 2848 } 2849 } 2850 2851 return SDValue(); 2852 } 2853 2854 /// This contains all DAGCombine rules which reduce two values combined by 2855 /// an And operation to a single value. This makes them reusable in the context 2856 /// of visitSELECT(). Rules involving constants are not included as 2857 /// visitSELECT() already handles those cases. 2858 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2859 SDNode *LocReference) { 2860 EVT VT = N1.getValueType(); 2861 2862 // fold (and x, undef) -> 0 2863 if (N0.isUndef() || N1.isUndef()) 2864 return DAG.getConstant(0, SDLoc(LocReference), VT); 2865 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2866 SDValue LL, LR, RL, RR, CC0, CC1; 2867 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2868 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2869 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2870 2871 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2872 LL.getValueType().isInteger()) { 2873 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2874 if (isNullConstant(LR) && Op1 == ISD::SETEQ) { 2875 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2876 LR.getValueType(), LL, RL); 2877 AddToWorklist(ORNode.getNode()); 2878 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2879 } 2880 if (isAllOnesConstant(LR)) { 2881 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2882 if (Op1 == ISD::SETEQ) { 2883 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2884 LR.getValueType(), LL, RL); 2885 AddToWorklist(ANDNode.getNode()); 2886 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2887 } 2888 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2889 if (Op1 == ISD::SETGT) { 2890 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2891 LR.getValueType(), LL, RL); 2892 AddToWorklist(ORNode.getNode()); 2893 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2894 } 2895 } 2896 } 2897 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2898 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2899 Op0 == Op1 && LL.getValueType().isInteger() && 2900 Op0 == ISD::SETNE && ((isNullConstant(LR) && isAllOnesConstant(RR)) || 2901 (isAllOnesConstant(LR) && isNullConstant(RR)))) { 2902 SDLoc DL(N0); 2903 SDValue ADDNode = DAG.getNode(ISD::ADD, DL, LL.getValueType(), 2904 LL, DAG.getConstant(1, DL, 2905 LL.getValueType())); 2906 AddToWorklist(ADDNode.getNode()); 2907 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2908 DAG.getConstant(2, DL, LL.getValueType()), 2909 ISD::SETUGE); 2910 } 2911 // canonicalize equivalent to ll == rl 2912 if (LL == RR && LR == RL) { 2913 Op1 = ISD::getSetCCSwappedOperands(Op1); 2914 std::swap(RL, RR); 2915 } 2916 if (LL == RL && LR == RR) { 2917 bool isInteger = LL.getValueType().isInteger(); 2918 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2919 if (Result != ISD::SETCC_INVALID && 2920 (!LegalOperations || 2921 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2922 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 2923 EVT CCVT = getSetCCResultType(LL.getValueType()); 2924 if (N0.getValueType() == CCVT || 2925 (!LegalOperations && N0.getValueType() == MVT::i1)) 2926 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2927 LL, LR, Result); 2928 } 2929 } 2930 } 2931 2932 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2933 VT.getSizeInBits() <= 64) { 2934 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2935 APInt ADDC = ADDI->getAPIntValue(); 2936 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2937 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2938 // immediate for an add, but it is legal if its top c2 bits are set, 2939 // transform the ADD so the immediate doesn't need to be materialized 2940 // in a register. 2941 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2942 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2943 SRLI->getZExtValue()); 2944 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2945 ADDC |= Mask; 2946 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2947 SDLoc DL(N0); 2948 SDValue NewAdd = 2949 DAG.getNode(ISD::ADD, DL, VT, 2950 N0.getOperand(0), DAG.getConstant(ADDC, DL, VT)); 2951 CombineTo(N0.getNode(), NewAdd); 2952 // Return N so it doesn't get rechecked! 2953 return SDValue(LocReference, 0); 2954 } 2955 } 2956 } 2957 } 2958 } 2959 } 2960 2961 // Reduce bit extract of low half of an integer to the narrower type. 2962 // (and (srl i64:x, K), KMask) -> 2963 // (i64 zero_extend (and (srl (i32 (trunc i64:x)), K)), KMask) 2964 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 2965 if (ConstantSDNode *CAnd = dyn_cast<ConstantSDNode>(N1)) { 2966 if (ConstantSDNode *CShift = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2967 unsigned Size = VT.getSizeInBits(); 2968 const APInt &AndMask = CAnd->getAPIntValue(); 2969 unsigned ShiftBits = CShift->getZExtValue(); 2970 unsigned MaskBits = AndMask.countTrailingOnes(); 2971 EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), Size / 2); 2972 2973 if (APIntOps::isMask(AndMask) && 2974 // Required bits must not span the two halves of the integer and 2975 // must fit in the half size type. 2976 (ShiftBits + MaskBits <= Size / 2) && 2977 TLI.isNarrowingProfitable(VT, HalfVT) && 2978 TLI.isTypeDesirableForOp(ISD::AND, HalfVT) && 2979 TLI.isTypeDesirableForOp(ISD::SRL, HalfVT) && 2980 TLI.isTruncateFree(VT, HalfVT) && 2981 TLI.isZExtFree(HalfVT, VT)) { 2982 // The isNarrowingProfitable is to avoid regressions on PPC and 2983 // AArch64 which match a few 64-bit bit insert / bit extract patterns 2984 // on downstream users of this. Those patterns could probably be 2985 // extended to handle extensions mixed in. 2986 2987 SDValue SL(N0); 2988 assert(ShiftBits != 0 && MaskBits <= Size); 2989 2990 // Extracting the highest bit of the low half. 2991 EVT ShiftVT = TLI.getShiftAmountTy(HalfVT, DAG.getDataLayout()); 2992 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SL, HalfVT, 2993 N0.getOperand(0)); 2994 2995 SDValue NewMask = DAG.getConstant(AndMask.trunc(Size / 2), SL, HalfVT); 2996 SDValue ShiftK = DAG.getConstant(ShiftBits, SL, ShiftVT); 2997 SDValue Shift = DAG.getNode(ISD::SRL, SL, HalfVT, Trunc, ShiftK); 2998 SDValue And = DAG.getNode(ISD::AND, SL, HalfVT, Shift, NewMask); 2999 return DAG.getNode(ISD::ZERO_EXTEND, SL, VT, And); 3000 } 3001 } 3002 } 3003 } 3004 3005 return SDValue(); 3006 } 3007 3008 bool DAGCombiner::isAndLoadExtLoad(ConstantSDNode *AndC, LoadSDNode *LoadN, 3009 EVT LoadResultTy, EVT &ExtVT, EVT &LoadedVT, 3010 bool &NarrowLoad) { 3011 uint32_t ActiveBits = AndC->getAPIntValue().getActiveBits(); 3012 3013 if (ActiveBits == 0 || !APIntOps::isMask(ActiveBits, AndC->getAPIntValue())) 3014 return false; 3015 3016 ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 3017 LoadedVT = LoadN->getMemoryVT(); 3018 3019 if (ExtVT == LoadedVT && 3020 (!LegalOperations || 3021 TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT))) { 3022 // ZEXTLOAD will match without needing to change the size of the value being 3023 // loaded. 3024 NarrowLoad = false; 3025 return true; 3026 } 3027 3028 // Do not change the width of a volatile load. 3029 if (LoadN->isVolatile()) 3030 return false; 3031 3032 // Do not generate loads of non-round integer types since these can 3033 // be expensive (and would be wrong if the type is not byte sized). 3034 if (!LoadedVT.bitsGT(ExtVT) || !ExtVT.isRound()) 3035 return false; 3036 3037 if (LegalOperations && 3038 !TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, ExtVT)) 3039 return false; 3040 3041 if (!TLI.shouldReduceLoadWidth(LoadN, ISD::ZEXTLOAD, ExtVT)) 3042 return false; 3043 3044 NarrowLoad = true; 3045 return true; 3046 } 3047 3048 SDValue DAGCombiner::visitAND(SDNode *N) { 3049 SDValue N0 = N->getOperand(0); 3050 SDValue N1 = N->getOperand(1); 3051 EVT VT = N1.getValueType(); 3052 3053 // fold vector ops 3054 if (VT.isVector()) { 3055 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3056 return FoldedVOp; 3057 3058 // fold (and x, 0) -> 0, vector edition 3059 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3060 // do not return N0, because undef node may exist in N0 3061 return DAG.getConstant( 3062 APInt::getNullValue( 3063 N0.getValueType().getScalarType().getSizeInBits()), 3064 SDLoc(N), N0.getValueType()); 3065 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3066 // do not return N1, because undef node may exist in N1 3067 return DAG.getConstant( 3068 APInt::getNullValue( 3069 N1.getValueType().getScalarType().getSizeInBits()), 3070 SDLoc(N), N1.getValueType()); 3071 3072 // fold (and x, -1) -> x, vector edition 3073 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3074 return N1; 3075 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3076 return N0; 3077 } 3078 3079 // fold (and c1, c2) -> c1&c2 3080 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3081 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3082 if (N0C && N1C && !N1C->isOpaque()) 3083 return DAG.FoldConstantArithmetic(ISD::AND, SDLoc(N), VT, N0C, N1C); 3084 // canonicalize constant to RHS 3085 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 3086 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 3087 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 3088 // fold (and x, -1) -> x 3089 if (isAllOnesConstant(N1)) 3090 return N0; 3091 // if (and x, c) is known to be zero, return 0 3092 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 3093 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 3094 APInt::getAllOnesValue(BitWidth))) 3095 return DAG.getConstant(0, SDLoc(N), VT); 3096 // reassociate and 3097 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 3098 return RAND; 3099 // fold (and (or x, C), D) -> D if (C & D) == D 3100 if (N1C && N0.getOpcode() == ISD::OR) 3101 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 3102 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 3103 return N1; 3104 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 3105 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 3106 SDValue N0Op0 = N0.getOperand(0); 3107 APInt Mask = ~N1C->getAPIntValue(); 3108 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 3109 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 3110 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 3111 N0.getValueType(), N0Op0); 3112 3113 // Replace uses of the AND with uses of the Zero extend node. 3114 CombineTo(N, Zext); 3115 3116 // We actually want to replace all uses of the any_extend with the 3117 // zero_extend, to avoid duplicating things. This will later cause this 3118 // AND to be folded. 3119 CombineTo(N0.getNode(), Zext); 3120 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3121 } 3122 } 3123 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 3124 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 3125 // already be zero by virtue of the width of the base type of the load. 3126 // 3127 // the 'X' node here can either be nothing or an extract_vector_elt to catch 3128 // more cases. 3129 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 3130 N0.getValueSizeInBits() == N0.getOperand(0).getScalarValueSizeInBits() && 3131 N0.getOperand(0).getOpcode() == ISD::LOAD && 3132 N0.getOperand(0).getResNo() == 0) || 3133 (N0.getOpcode() == ISD::LOAD && N0.getResNo() == 0)) { 3134 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 3135 N0 : N0.getOperand(0) ); 3136 3137 // Get the constant (if applicable) the zero'th operand is being ANDed with. 3138 // This can be a pure constant or a vector splat, in which case we treat the 3139 // vector as a scalar and use the splat value. 3140 APInt Constant = APInt::getNullValue(1); 3141 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 3142 Constant = C->getAPIntValue(); 3143 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 3144 APInt SplatValue, SplatUndef; 3145 unsigned SplatBitSize; 3146 bool HasAnyUndefs; 3147 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 3148 SplatBitSize, HasAnyUndefs); 3149 if (IsSplat) { 3150 // Undef bits can contribute to a possible optimisation if set, so 3151 // set them. 3152 SplatValue |= SplatUndef; 3153 3154 // The splat value may be something like "0x00FFFFFF", which means 0 for 3155 // the first vector value and FF for the rest, repeating. We need a mask 3156 // that will apply equally to all members of the vector, so AND all the 3157 // lanes of the constant together. 3158 EVT VT = Vector->getValueType(0); 3159 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 3160 3161 // If the splat value has been compressed to a bitlength lower 3162 // than the size of the vector lane, we need to re-expand it to 3163 // the lane size. 3164 if (BitWidth > SplatBitSize) 3165 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 3166 SplatBitSize < BitWidth; 3167 SplatBitSize = SplatBitSize * 2) 3168 SplatValue |= SplatValue.shl(SplatBitSize); 3169 3170 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 3171 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 3172 if (SplatBitSize % BitWidth == 0) { 3173 Constant = APInt::getAllOnesValue(BitWidth); 3174 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 3175 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 3176 } 3177 } 3178 } 3179 3180 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 3181 // actually legal and isn't going to get expanded, else this is a false 3182 // optimisation. 3183 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 3184 Load->getValueType(0), 3185 Load->getMemoryVT()); 3186 3187 // Resize the constant to the same size as the original memory access before 3188 // extension. If it is still the AllOnesValue then this AND is completely 3189 // unneeded. 3190 Constant = 3191 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 3192 3193 bool B; 3194 switch (Load->getExtensionType()) { 3195 default: B = false; break; 3196 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 3197 case ISD::ZEXTLOAD: 3198 case ISD::NON_EXTLOAD: B = true; break; 3199 } 3200 3201 if (B && Constant.isAllOnesValue()) { 3202 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 3203 // preserve semantics once we get rid of the AND. 3204 SDValue NewLoad(Load, 0); 3205 if (Load->getExtensionType() == ISD::EXTLOAD) { 3206 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 3207 Load->getValueType(0), SDLoc(Load), 3208 Load->getChain(), Load->getBasePtr(), 3209 Load->getOffset(), Load->getMemoryVT(), 3210 Load->getMemOperand()); 3211 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 3212 if (Load->getNumValues() == 3) { 3213 // PRE/POST_INC loads have 3 values. 3214 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 3215 NewLoad.getValue(2) }; 3216 CombineTo(Load, To, 3, true); 3217 } else { 3218 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 3219 } 3220 } 3221 3222 // Fold the AND away, taking care not to fold to the old load node if we 3223 // replaced it. 3224 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 3225 3226 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3227 } 3228 } 3229 3230 // fold (and (load x), 255) -> (zextload x, i8) 3231 // fold (and (extload x, i16), 255) -> (zextload x, i8) 3232 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 3233 if (N1C && (N0.getOpcode() == ISD::LOAD || 3234 (N0.getOpcode() == ISD::ANY_EXTEND && 3235 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 3236 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 3237 LoadSDNode *LN0 = HasAnyExt 3238 ? cast<LoadSDNode>(N0.getOperand(0)) 3239 : cast<LoadSDNode>(N0); 3240 if (LN0->getExtensionType() != ISD::SEXTLOAD && 3241 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 3242 auto NarrowLoad = false; 3243 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 3244 EVT ExtVT, LoadedVT; 3245 if (isAndLoadExtLoad(N1C, LN0, LoadResultTy, ExtVT, LoadedVT, 3246 NarrowLoad)) { 3247 if (!NarrowLoad) { 3248 SDValue NewLoad = 3249 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3250 LN0->getChain(), LN0->getBasePtr(), ExtVT, 3251 LN0->getMemOperand()); 3252 AddToWorklist(N); 3253 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 3254 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3255 } else { 3256 EVT PtrType = LN0->getOperand(1).getValueType(); 3257 3258 unsigned Alignment = LN0->getAlignment(); 3259 SDValue NewPtr = LN0->getBasePtr(); 3260 3261 // For big endian targets, we need to add an offset to the pointer 3262 // to load the correct bytes. For little endian systems, we merely 3263 // need to read fewer bytes from the same pointer. 3264 if (DAG.getDataLayout().isBigEndian()) { 3265 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3266 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3267 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3268 SDLoc DL(LN0); 3269 NewPtr = DAG.getNode(ISD::ADD, DL, PtrType, 3270 NewPtr, DAG.getConstant(PtrOff, DL, PtrType)); 3271 Alignment = MinAlign(Alignment, PtrOff); 3272 } 3273 3274 AddToWorklist(NewPtr.getNode()); 3275 3276 SDValue Load = 3277 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3278 LN0->getChain(), NewPtr, 3279 LN0->getPointerInfo(), 3280 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3281 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3282 AddToWorklist(N); 3283 CombineTo(LN0, Load, Load.getValue(1)); 3284 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3285 } 3286 } 3287 } 3288 } 3289 3290 if (SDValue Combined = visitANDLike(N0, N1, N)) 3291 return Combined; 3292 3293 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 3294 if (N0.getOpcode() == N1.getOpcode()) 3295 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3296 return Tmp; 3297 3298 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 3299 // fold (and (sra)) -> (and (srl)) when possible. 3300 if (!VT.isVector() && 3301 SimplifyDemandedBits(SDValue(N, 0))) 3302 return SDValue(N, 0); 3303 3304 // fold (zext_inreg (extload x)) -> (zextload x) 3305 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 3306 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3307 EVT MemVT = LN0->getMemoryVT(); 3308 // If we zero all the possible extended bits, then we can turn this into 3309 // a zextload if we are running before legalize or the operation is legal. 3310 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3311 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3312 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3313 ((!LegalOperations && !LN0->isVolatile()) || 3314 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3315 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3316 LN0->getChain(), LN0->getBasePtr(), 3317 MemVT, LN0->getMemOperand()); 3318 AddToWorklist(N); 3319 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3320 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3321 } 3322 } 3323 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 3324 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3325 N0.hasOneUse()) { 3326 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3327 EVT MemVT = LN0->getMemoryVT(); 3328 // If we zero all the possible extended bits, then we can turn this into 3329 // a zextload if we are running before legalize or the operation is legal. 3330 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3331 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3332 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3333 ((!LegalOperations && !LN0->isVolatile()) || 3334 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3335 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3336 LN0->getChain(), LN0->getBasePtr(), 3337 MemVT, LN0->getMemOperand()); 3338 AddToWorklist(N); 3339 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3340 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3341 } 3342 } 3343 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3344 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3345 if (SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3346 N0.getOperand(1), false)) 3347 return BSwap; 3348 } 3349 3350 return SDValue(); 3351 } 3352 3353 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3354 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3355 bool DemandHighBits) { 3356 if (!LegalOperations) 3357 return SDValue(); 3358 3359 EVT VT = N->getValueType(0); 3360 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3361 return SDValue(); 3362 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3363 return SDValue(); 3364 3365 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3366 bool LookPassAnd0 = false; 3367 bool LookPassAnd1 = false; 3368 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3369 std::swap(N0, N1); 3370 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3371 std::swap(N0, N1); 3372 if (N0.getOpcode() == ISD::AND) { 3373 if (!N0.getNode()->hasOneUse()) 3374 return SDValue(); 3375 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3376 if (!N01C || N01C->getZExtValue() != 0xFF00) 3377 return SDValue(); 3378 N0 = N0.getOperand(0); 3379 LookPassAnd0 = true; 3380 } 3381 3382 if (N1.getOpcode() == ISD::AND) { 3383 if (!N1.getNode()->hasOneUse()) 3384 return SDValue(); 3385 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3386 if (!N11C || N11C->getZExtValue() != 0xFF) 3387 return SDValue(); 3388 N1 = N1.getOperand(0); 3389 LookPassAnd1 = true; 3390 } 3391 3392 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3393 std::swap(N0, N1); 3394 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3395 return SDValue(); 3396 if (!N0.getNode()->hasOneUse() || 3397 !N1.getNode()->hasOneUse()) 3398 return SDValue(); 3399 3400 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3401 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3402 if (!N01C || !N11C) 3403 return SDValue(); 3404 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3405 return SDValue(); 3406 3407 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3408 SDValue N00 = N0->getOperand(0); 3409 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3410 if (!N00.getNode()->hasOneUse()) 3411 return SDValue(); 3412 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3413 if (!N001C || N001C->getZExtValue() != 0xFF) 3414 return SDValue(); 3415 N00 = N00.getOperand(0); 3416 LookPassAnd0 = true; 3417 } 3418 3419 SDValue N10 = N1->getOperand(0); 3420 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3421 if (!N10.getNode()->hasOneUse()) 3422 return SDValue(); 3423 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3424 if (!N101C || N101C->getZExtValue() != 0xFF00) 3425 return SDValue(); 3426 N10 = N10.getOperand(0); 3427 LookPassAnd1 = true; 3428 } 3429 3430 if (N00 != N10) 3431 return SDValue(); 3432 3433 // Make sure everything beyond the low halfword gets set to zero since the SRL 3434 // 16 will clear the top bits. 3435 unsigned OpSizeInBits = VT.getSizeInBits(); 3436 if (DemandHighBits && OpSizeInBits > 16) { 3437 // If the left-shift isn't masked out then the only way this is a bswap is 3438 // if all bits beyond the low 8 are 0. In that case the entire pattern 3439 // reduces to a left shift anyway: leave it for other parts of the combiner. 3440 if (!LookPassAnd0) 3441 return SDValue(); 3442 3443 // However, if the right shift isn't masked out then it might be because 3444 // it's not needed. See if we can spot that too. 3445 if (!LookPassAnd1 && 3446 !DAG.MaskedValueIsZero( 3447 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3448 return SDValue(); 3449 } 3450 3451 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3452 if (OpSizeInBits > 16) { 3453 SDLoc DL(N); 3454 Res = DAG.getNode(ISD::SRL, DL, VT, Res, 3455 DAG.getConstant(OpSizeInBits - 16, DL, 3456 getShiftAmountTy(VT))); 3457 } 3458 return Res; 3459 } 3460 3461 /// Return true if the specified node is an element that makes up a 32-bit 3462 /// packed halfword byteswap. 3463 /// ((x & 0x000000ff) << 8) | 3464 /// ((x & 0x0000ff00) >> 8) | 3465 /// ((x & 0x00ff0000) << 8) | 3466 /// ((x & 0xff000000) >> 8) 3467 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3468 if (!N.getNode()->hasOneUse()) 3469 return false; 3470 3471 unsigned Opc = N.getOpcode(); 3472 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3473 return false; 3474 3475 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3476 if (!N1C) 3477 return false; 3478 3479 unsigned Num; 3480 switch (N1C->getZExtValue()) { 3481 default: 3482 return false; 3483 case 0xFF: Num = 0; break; 3484 case 0xFF00: Num = 1; break; 3485 case 0xFF0000: Num = 2; break; 3486 case 0xFF000000: Num = 3; break; 3487 } 3488 3489 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3490 SDValue N0 = N.getOperand(0); 3491 if (Opc == ISD::AND) { 3492 if (Num == 0 || Num == 2) { 3493 // (x >> 8) & 0xff 3494 // (x >> 8) & 0xff0000 3495 if (N0.getOpcode() != ISD::SRL) 3496 return false; 3497 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3498 if (!C || C->getZExtValue() != 8) 3499 return false; 3500 } else { 3501 // (x << 8) & 0xff00 3502 // (x << 8) & 0xff000000 3503 if (N0.getOpcode() != ISD::SHL) 3504 return false; 3505 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3506 if (!C || C->getZExtValue() != 8) 3507 return false; 3508 } 3509 } else if (Opc == ISD::SHL) { 3510 // (x & 0xff) << 8 3511 // (x & 0xff0000) << 8 3512 if (Num != 0 && Num != 2) 3513 return false; 3514 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3515 if (!C || C->getZExtValue() != 8) 3516 return false; 3517 } else { // Opc == ISD::SRL 3518 // (x & 0xff00) >> 8 3519 // (x & 0xff000000) >> 8 3520 if (Num != 1 && Num != 3) 3521 return false; 3522 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3523 if (!C || C->getZExtValue() != 8) 3524 return false; 3525 } 3526 3527 if (Parts[Num]) 3528 return false; 3529 3530 Parts[Num] = N0.getOperand(0).getNode(); 3531 return true; 3532 } 3533 3534 /// Match a 32-bit packed halfword bswap. That is 3535 /// ((x & 0x000000ff) << 8) | 3536 /// ((x & 0x0000ff00) >> 8) | 3537 /// ((x & 0x00ff0000) << 8) | 3538 /// ((x & 0xff000000) >> 8) 3539 /// => (rotl (bswap x), 16) 3540 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3541 if (!LegalOperations) 3542 return SDValue(); 3543 3544 EVT VT = N->getValueType(0); 3545 if (VT != MVT::i32) 3546 return SDValue(); 3547 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3548 return SDValue(); 3549 3550 // Look for either 3551 // (or (or (and), (and)), (or (and), (and))) 3552 // (or (or (or (and), (and)), (and)), (and)) 3553 if (N0.getOpcode() != ISD::OR) 3554 return SDValue(); 3555 SDValue N00 = N0.getOperand(0); 3556 SDValue N01 = N0.getOperand(1); 3557 SDNode *Parts[4] = {}; 3558 3559 if (N1.getOpcode() == ISD::OR && 3560 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3561 // (or (or (and), (and)), (or (and), (and))) 3562 SDValue N000 = N00.getOperand(0); 3563 if (!isBSwapHWordElement(N000, Parts)) 3564 return SDValue(); 3565 3566 SDValue N001 = N00.getOperand(1); 3567 if (!isBSwapHWordElement(N001, Parts)) 3568 return SDValue(); 3569 SDValue N010 = N01.getOperand(0); 3570 if (!isBSwapHWordElement(N010, Parts)) 3571 return SDValue(); 3572 SDValue N011 = N01.getOperand(1); 3573 if (!isBSwapHWordElement(N011, Parts)) 3574 return SDValue(); 3575 } else { 3576 // (or (or (or (and), (and)), (and)), (and)) 3577 if (!isBSwapHWordElement(N1, Parts)) 3578 return SDValue(); 3579 if (!isBSwapHWordElement(N01, Parts)) 3580 return SDValue(); 3581 if (N00.getOpcode() != ISD::OR) 3582 return SDValue(); 3583 SDValue N000 = N00.getOperand(0); 3584 if (!isBSwapHWordElement(N000, Parts)) 3585 return SDValue(); 3586 SDValue N001 = N00.getOperand(1); 3587 if (!isBSwapHWordElement(N001, Parts)) 3588 return SDValue(); 3589 } 3590 3591 // Make sure the parts are all coming from the same node. 3592 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3593 return SDValue(); 3594 3595 SDLoc DL(N); 3596 SDValue BSwap = DAG.getNode(ISD::BSWAP, DL, VT, 3597 SDValue(Parts[0], 0)); 3598 3599 // Result of the bswap should be rotated by 16. If it's not legal, then 3600 // do (x << 16) | (x >> 16). 3601 SDValue ShAmt = DAG.getConstant(16, DL, getShiftAmountTy(VT)); 3602 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3603 return DAG.getNode(ISD::ROTL, DL, VT, BSwap, ShAmt); 3604 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3605 return DAG.getNode(ISD::ROTR, DL, VT, BSwap, ShAmt); 3606 return DAG.getNode(ISD::OR, DL, VT, 3607 DAG.getNode(ISD::SHL, DL, VT, BSwap, ShAmt), 3608 DAG.getNode(ISD::SRL, DL, VT, BSwap, ShAmt)); 3609 } 3610 3611 /// This contains all DAGCombine rules which reduce two values combined by 3612 /// an Or operation to a single value \see visitANDLike(). 3613 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { 3614 EVT VT = N1.getValueType(); 3615 // fold (or x, undef) -> -1 3616 if (!LegalOperations && 3617 (N0.isUndef() || N1.isUndef())) { 3618 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3619 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), 3620 SDLoc(LocReference), VT); 3621 } 3622 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3623 SDValue LL, LR, RL, RR, CC0, CC1; 3624 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3625 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3626 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3627 3628 if (LR == RR && Op0 == Op1 && LL.getValueType().isInteger()) { 3629 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3630 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3631 if (isNullConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3632 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3633 LR.getValueType(), LL, RL); 3634 AddToWorklist(ORNode.getNode()); 3635 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 3636 } 3637 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3638 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3639 if (isAllOnesConstant(LR) && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3640 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3641 LR.getValueType(), LL, RL); 3642 AddToWorklist(ANDNode.getNode()); 3643 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 3644 } 3645 } 3646 // canonicalize equivalent to ll == rl 3647 if (LL == RR && LR == RL) { 3648 Op1 = ISD::getSetCCSwappedOperands(Op1); 3649 std::swap(RL, RR); 3650 } 3651 if (LL == RL && LR == RR) { 3652 bool isInteger = LL.getValueType().isInteger(); 3653 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3654 if (Result != ISD::SETCC_INVALID && 3655 (!LegalOperations || 3656 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3657 TLI.isOperationLegal(ISD::SETCC, LL.getValueType())))) { 3658 EVT CCVT = getSetCCResultType(LL.getValueType()); 3659 if (N0.getValueType() == CCVT || 3660 (!LegalOperations && N0.getValueType() == MVT::i1)) 3661 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 3662 LL, LR, Result); 3663 } 3664 } 3665 } 3666 3667 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3668 if (N0.getOpcode() == ISD::AND && N1.getOpcode() == ISD::AND && 3669 // Don't increase # computations. 3670 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3671 // We can only do this xform if we know that bits from X that are set in C2 3672 // but not in C1 are already zero. Likewise for Y. 3673 if (const ConstantSDNode *N0O1C = 3674 getAsNonOpaqueConstant(N0.getOperand(1))) { 3675 if (const ConstantSDNode *N1O1C = 3676 getAsNonOpaqueConstant(N1.getOperand(1))) { 3677 // We can only do this xform if we know that bits from X that are set in 3678 // C2 but not in C1 are already zero. Likewise for Y. 3679 const APInt &LHSMask = N0O1C->getAPIntValue(); 3680 const APInt &RHSMask = N1O1C->getAPIntValue(); 3681 3682 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3683 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3684 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3685 N0.getOperand(0), N1.getOperand(0)); 3686 SDLoc DL(LocReference); 3687 return DAG.getNode(ISD::AND, DL, VT, X, 3688 DAG.getConstant(LHSMask | RHSMask, DL, VT)); 3689 } 3690 } 3691 } 3692 } 3693 3694 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 3695 if (N0.getOpcode() == ISD::AND && 3696 N1.getOpcode() == ISD::AND && 3697 N0.getOperand(0) == N1.getOperand(0) && 3698 // Don't increase # computations. 3699 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3700 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3701 N0.getOperand(1), N1.getOperand(1)); 3702 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); 3703 } 3704 3705 return SDValue(); 3706 } 3707 3708 SDValue DAGCombiner::visitOR(SDNode *N) { 3709 SDValue N0 = N->getOperand(0); 3710 SDValue N1 = N->getOperand(1); 3711 EVT VT = N1.getValueType(); 3712 3713 // fold vector ops 3714 if (VT.isVector()) { 3715 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3716 return FoldedVOp; 3717 3718 // fold (or x, 0) -> x, vector edition 3719 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3720 return N1; 3721 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3722 return N0; 3723 3724 // fold (or x, -1) -> -1, vector edition 3725 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3726 // do not return N0, because undef node may exist in N0 3727 return DAG.getConstant( 3728 APInt::getAllOnesValue( 3729 N0.getValueType().getScalarType().getSizeInBits()), 3730 SDLoc(N), N0.getValueType()); 3731 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3732 // do not return N1, because undef node may exist in N1 3733 return DAG.getConstant( 3734 APInt::getAllOnesValue( 3735 N1.getValueType().getScalarType().getSizeInBits()), 3736 SDLoc(N), N1.getValueType()); 3737 3738 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask) 3739 // Do this only if the resulting shuffle is legal. 3740 if (isa<ShuffleVectorSDNode>(N0) && 3741 isa<ShuffleVectorSDNode>(N1) && 3742 // Avoid folding a node with illegal type. 3743 TLI.isTypeLegal(VT)) { 3744 bool ZeroN00 = ISD::isBuildVectorAllZeros(N0.getOperand(0).getNode()); 3745 bool ZeroN01 = ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode()); 3746 bool ZeroN10 = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 3747 bool ZeroN11 = ISD::isBuildVectorAllZeros(N1.getOperand(1).getNode()); 3748 // Ensure both shuffles have a zero input. 3749 if ((ZeroN00 || ZeroN01) && (ZeroN10 || ZeroN11)) { 3750 assert((!ZeroN00 || !ZeroN01) && "Both inputs zero!"); 3751 assert((!ZeroN10 || !ZeroN11) && "Both inputs zero!"); 3752 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3753 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3754 bool CanFold = true; 3755 int NumElts = VT.getVectorNumElements(); 3756 SmallVector<int, 4> Mask(NumElts); 3757 3758 for (int i = 0; i != NumElts; ++i) { 3759 int M0 = SV0->getMaskElt(i); 3760 int M1 = SV1->getMaskElt(i); 3761 3762 // Determine if either index is pointing to a zero vector. 3763 bool M0Zero = M0 < 0 || (ZeroN00 == (M0 < NumElts)); 3764 bool M1Zero = M1 < 0 || (ZeroN10 == (M1 < NumElts)); 3765 3766 // If one element is zero and the otherside is undef, keep undef. 3767 // This also handles the case that both are undef. 3768 if ((M0Zero && M1 < 0) || (M1Zero && M0 < 0)) { 3769 Mask[i] = -1; 3770 continue; 3771 } 3772 3773 // Make sure only one of the elements is zero. 3774 if (M0Zero == M1Zero) { 3775 CanFold = false; 3776 break; 3777 } 3778 3779 assert((M0 >= 0 || M1 >= 0) && "Undef index!"); 3780 3781 // We have a zero and non-zero element. If the non-zero came from 3782 // SV0 make the index a LHS index. If it came from SV1, make it 3783 // a RHS index. We need to mod by NumElts because we don't care 3784 // which operand it came from in the original shuffles. 3785 Mask[i] = M1Zero ? M0 % NumElts : (M1 % NumElts) + NumElts; 3786 } 3787 3788 if (CanFold) { 3789 SDValue NewLHS = ZeroN00 ? N0.getOperand(1) : N0.getOperand(0); 3790 SDValue NewRHS = ZeroN10 ? N1.getOperand(1) : N1.getOperand(0); 3791 3792 bool LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 3793 if (!LegalMask) { 3794 std::swap(NewLHS, NewRHS); 3795 ShuffleVectorSDNode::commuteMask(Mask); 3796 LegalMask = TLI.isShuffleMaskLegal(Mask, VT); 3797 } 3798 3799 if (LegalMask) 3800 return DAG.getVectorShuffle(VT, SDLoc(N), NewLHS, NewRHS, Mask); 3801 } 3802 } 3803 } 3804 } 3805 3806 // fold (or c1, c2) -> c1|c2 3807 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 3808 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3809 if (N0C && N1C && !N1C->isOpaque()) 3810 return DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N), VT, N0C, N1C); 3811 // canonicalize constant to RHS 3812 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 3813 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 3814 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3815 // fold (or x, 0) -> x 3816 if (isNullConstant(N1)) 3817 return N0; 3818 // fold (or x, -1) -> -1 3819 if (isAllOnesConstant(N1)) 3820 return N1; 3821 // fold (or x, c) -> c iff (x & ~c) == 0 3822 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3823 return N1; 3824 3825 if (SDValue Combined = visitORLike(N0, N1, N)) 3826 return Combined; 3827 3828 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3829 if (SDValue BSwap = MatchBSwapHWord(N, N0, N1)) 3830 return BSwap; 3831 if (SDValue BSwap = MatchBSwapHWordLow(N, N0, N1)) 3832 return BSwap; 3833 3834 // reassociate or 3835 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 3836 return ROR; 3837 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3838 // iff (c1 & c2) == 0. 3839 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3840 isa<ConstantSDNode>(N0.getOperand(1))) { 3841 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3842 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3843 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, SDLoc(N1), VT, 3844 N1C, C1)) 3845 return DAG.getNode( 3846 ISD::AND, SDLoc(N), VT, 3847 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3848 return SDValue(); 3849 } 3850 } 3851 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3852 if (N0.getOpcode() == N1.getOpcode()) 3853 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 3854 return Tmp; 3855 3856 // See if this is some rotate idiom. 3857 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3858 return SDValue(Rot, 0); 3859 3860 // Simplify the operands using demanded-bits information. 3861 if (!VT.isVector() && 3862 SimplifyDemandedBits(SDValue(N, 0))) 3863 return SDValue(N, 0); 3864 3865 return SDValue(); 3866 } 3867 3868 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 3869 bool DAGCombiner::MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3870 if (Op.getOpcode() == ISD::AND) { 3871 if (DAG.isConstantIntBuildVectorOrConstantInt(Op.getOperand(1))) { 3872 Mask = Op.getOperand(1); 3873 Op = Op.getOperand(0); 3874 } else { 3875 return false; 3876 } 3877 } 3878 3879 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3880 Shift = Op; 3881 return true; 3882 } 3883 3884 return false; 3885 } 3886 3887 // Return true if we can prove that, whenever Neg and Pos are both in the 3888 // range [0, EltSize), Neg == (Pos == 0 ? 0 : EltSize - Pos). This means that 3889 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3890 // 3891 // (or (shift1 X, Neg), (shift2 X, Pos)) 3892 // 3893 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3894 // in direction shift1 by Neg. The range [0, EltSize) means that we only need 3895 // to consider shift amounts with defined behavior. 3896 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { 3897 // If EltSize is a power of 2 then: 3898 // 3899 // (a) (Pos == 0 ? 0 : EltSize - Pos) == (EltSize - Pos) & (EltSize - 1) 3900 // (b) Neg == Neg & (EltSize - 1) whenever Neg is in [0, EltSize). 3901 // 3902 // So if EltSize is a power of 2 and Neg is (and Neg', EltSize-1), we check 3903 // for the stronger condition: 3904 // 3905 // Neg & (EltSize - 1) == (EltSize - Pos) & (EltSize - 1) [A] 3906 // 3907 // for all Neg and Pos. Since Neg & (EltSize - 1) == Neg' & (EltSize - 1) 3908 // we can just replace Neg with Neg' for the rest of the function. 3909 // 3910 // In other cases we check for the even stronger condition: 3911 // 3912 // Neg == EltSize - Pos [B] 3913 // 3914 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3915 // behavior if Pos == 0 (and consequently Neg == EltSize). 3916 // 3917 // We could actually use [A] whenever EltSize is a power of 2, but the 3918 // only extra cases that it would match are those uninteresting ones 3919 // where Neg and Pos are never in range at the same time. E.g. for 3920 // EltSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3921 // as well as (sub 32, Pos), but: 3922 // 3923 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3924 // 3925 // always invokes undefined behavior for 32-bit X. 3926 // 3927 // Below, Mask == EltSize - 1 when using [A] and is all-ones otherwise. 3928 unsigned MaskLoBits = 0; 3929 if (Neg.getOpcode() == ISD::AND && isPowerOf2_64(EltSize)) { 3930 if (ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(1))) { 3931 if (NegC->getAPIntValue() == EltSize - 1) { 3932 Neg = Neg.getOperand(0); 3933 MaskLoBits = Log2_64(EltSize); 3934 } 3935 } 3936 } 3937 3938 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3939 if (Neg.getOpcode() != ISD::SUB) 3940 return false; 3941 ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); 3942 if (!NegC) 3943 return false; 3944 SDValue NegOp1 = Neg.getOperand(1); 3945 3946 // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with 3947 // Pos'. The truncation is redundant for the purpose of the equality. 3948 if (MaskLoBits && Pos.getOpcode() == ISD::AND) 3949 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3950 if (PosC->getAPIntValue() == EltSize - 1) 3951 Pos = Pos.getOperand(0); 3952 3953 // The condition we need is now: 3954 // 3955 // (NegC - NegOp1) & Mask == (EltSize - Pos) & Mask 3956 // 3957 // If NegOp1 == Pos then we need: 3958 // 3959 // EltSize & Mask == NegC & Mask 3960 // 3961 // (because "x & Mask" is a truncation and distributes through subtraction). 3962 APInt Width; 3963 if (Pos == NegOp1) 3964 Width = NegC->getAPIntValue(); 3965 3966 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3967 // Then the condition we want to prove becomes: 3968 // 3969 // (NegC - NegOp1) & Mask == (EltSize - (NegOp1 + PosC)) & Mask 3970 // 3971 // which, again because "x & Mask" is a truncation, becomes: 3972 // 3973 // NegC & Mask == (EltSize - PosC) & Mask 3974 // EltSize & Mask == (NegC + PosC) & Mask 3975 else if (Pos.getOpcode() == ISD::ADD && Pos.getOperand(0) == NegOp1) { 3976 if (ConstantSDNode *PosC = isConstOrConstSplat(Pos.getOperand(1))) 3977 Width = PosC->getAPIntValue() + NegC->getAPIntValue(); 3978 else 3979 return false; 3980 } else 3981 return false; 3982 3983 // Now we just need to check that EltSize & Mask == Width & Mask. 3984 if (MaskLoBits) 3985 // EltSize & Mask is 0 since Mask is EltSize - 1. 3986 return Width.getLoBits(MaskLoBits) == 0; 3987 return Width == EltSize; 3988 } 3989 3990 // A subroutine of MatchRotate used once we have found an OR of two opposite 3991 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3992 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3993 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3994 // Neg with outer conversions stripped away. 3995 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3996 SDValue Neg, SDValue InnerPos, 3997 SDValue InnerNeg, unsigned PosOpcode, 3998 unsigned NegOpcode, const SDLoc &DL) { 3999 // fold (or (shl x, (*ext y)), 4000 // (srl x, (*ext (sub 32, y)))) -> 4001 // (rotl x, y) or (rotr x, (sub 32, y)) 4002 // 4003 // fold (or (shl x, (*ext (sub 32, y))), 4004 // (srl x, (*ext y))) -> 4005 // (rotr x, y) or (rotl x, (sub 32, y)) 4006 EVT VT = Shifted.getValueType(); 4007 if (matchRotateSub(InnerPos, InnerNeg, VT.getScalarSizeInBits())) { 4008 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 4009 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 4010 HasPos ? Pos : Neg).getNode(); 4011 } 4012 4013 return nullptr; 4014 } 4015 4016 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 4017 // idioms for rotate, and if the target supports rotation instructions, generate 4018 // a rot[lr]. 4019 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, const SDLoc &DL) { 4020 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 4021 EVT VT = LHS.getValueType(); 4022 if (!TLI.isTypeLegal(VT)) return nullptr; 4023 4024 // The target must have at least one rotate flavor. 4025 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 4026 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 4027 if (!HasROTL && !HasROTR) return nullptr; 4028 4029 // Match "(X shl/srl V1) & V2" where V2 may not be present. 4030 SDValue LHSShift; // The shift. 4031 SDValue LHSMask; // AND value if any. 4032 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 4033 return nullptr; // Not part of a rotate. 4034 4035 SDValue RHSShift; // The shift. 4036 SDValue RHSMask; // AND value if any. 4037 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 4038 return nullptr; // Not part of a rotate. 4039 4040 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 4041 return nullptr; // Not shifting the same value. 4042 4043 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 4044 return nullptr; // Shifts must disagree. 4045 4046 // Canonicalize shl to left side in a shl/srl pair. 4047 if (RHSShift.getOpcode() == ISD::SHL) { 4048 std::swap(LHS, RHS); 4049 std::swap(LHSShift, RHSShift); 4050 std::swap(LHSMask, RHSMask); 4051 } 4052 4053 unsigned EltSizeInBits = VT.getScalarSizeInBits(); 4054 SDValue LHSShiftArg = LHSShift.getOperand(0); 4055 SDValue LHSShiftAmt = LHSShift.getOperand(1); 4056 SDValue RHSShiftArg = RHSShift.getOperand(0); 4057 SDValue RHSShiftAmt = RHSShift.getOperand(1); 4058 4059 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 4060 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 4061 if (isConstOrConstSplat(LHSShiftAmt) && isConstOrConstSplat(RHSShiftAmt)) { 4062 uint64_t LShVal = isConstOrConstSplat(LHSShiftAmt)->getZExtValue(); 4063 uint64_t RShVal = isConstOrConstSplat(RHSShiftAmt)->getZExtValue(); 4064 if ((LShVal + RShVal) != EltSizeInBits) 4065 return nullptr; 4066 4067 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 4068 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 4069 4070 // If there is an AND of either shifted operand, apply it to the result. 4071 if (LHSMask.getNode() || RHSMask.getNode()) { 4072 APInt AllBits = APInt::getAllOnesValue(EltSizeInBits); 4073 SDValue Mask = DAG.getConstant(AllBits, DL, VT); 4074 4075 if (LHSMask.getNode()) { 4076 APInt RHSBits = APInt::getLowBitsSet(EltSizeInBits, LShVal); 4077 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4078 DAG.getNode(ISD::OR, DL, VT, LHSMask, 4079 DAG.getConstant(RHSBits, DL, VT))); 4080 } 4081 if (RHSMask.getNode()) { 4082 APInt LHSBits = APInt::getHighBitsSet(EltSizeInBits, RShVal); 4083 Mask = DAG.getNode(ISD::AND, DL, VT, Mask, 4084 DAG.getNode(ISD::OR, DL, VT, RHSMask, 4085 DAG.getConstant(LHSBits, DL, VT))); 4086 } 4087 4088 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, Mask); 4089 } 4090 4091 return Rot.getNode(); 4092 } 4093 4094 // If there is a mask here, and we have a variable shift, we can't be sure 4095 // that we're masking out the right stuff. 4096 if (LHSMask.getNode() || RHSMask.getNode()) 4097 return nullptr; 4098 4099 // If the shift amount is sign/zext/any-extended just peel it off. 4100 SDValue LExtOp0 = LHSShiftAmt; 4101 SDValue RExtOp0 = RHSShiftAmt; 4102 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4103 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4104 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4105 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 4106 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 4107 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 4108 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 4109 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 4110 LExtOp0 = LHSShiftAmt.getOperand(0); 4111 RExtOp0 = RHSShiftAmt.getOperand(0); 4112 } 4113 4114 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 4115 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 4116 if (TryL) 4117 return TryL; 4118 4119 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 4120 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 4121 if (TryR) 4122 return TryR; 4123 4124 return nullptr; 4125 } 4126 4127 SDValue DAGCombiner::visitXOR(SDNode *N) { 4128 SDValue N0 = N->getOperand(0); 4129 SDValue N1 = N->getOperand(1); 4130 EVT VT = N0.getValueType(); 4131 4132 // fold vector ops 4133 if (VT.isVector()) { 4134 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4135 return FoldedVOp; 4136 4137 // fold (xor x, 0) -> x, vector edition 4138 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4139 return N1; 4140 if (ISD::isBuildVectorAllZeros(N1.getNode())) 4141 return N0; 4142 } 4143 4144 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 4145 if (N0.isUndef() && N1.isUndef()) 4146 return DAG.getConstant(0, SDLoc(N), VT); 4147 // fold (xor x, undef) -> undef 4148 if (N0.isUndef()) 4149 return N0; 4150 if (N1.isUndef()) 4151 return N1; 4152 // fold (xor c1, c2) -> c1^c2 4153 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4154 ConstantSDNode *N1C = getAsNonOpaqueConstant(N1); 4155 if (N0C && N1C) 4156 return DAG.FoldConstantArithmetic(ISD::XOR, SDLoc(N), VT, N0C, N1C); 4157 // canonicalize constant to RHS 4158 if (DAG.isConstantIntBuildVectorOrConstantInt(N0) && 4159 !DAG.isConstantIntBuildVectorOrConstantInt(N1)) 4160 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 4161 // fold (xor x, 0) -> x 4162 if (isNullConstant(N1)) 4163 return N0; 4164 // reassociate xor 4165 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 4166 return RXOR; 4167 4168 // fold !(x cc y) -> (x !cc y) 4169 SDValue LHS, RHS, CC; 4170 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 4171 bool isInt = LHS.getValueType().isInteger(); 4172 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 4173 isInt); 4174 4175 if (!LegalOperations || 4176 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 4177 switch (N0.getOpcode()) { 4178 default: 4179 llvm_unreachable("Unhandled SetCC Equivalent!"); 4180 case ISD::SETCC: 4181 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 4182 case ISD::SELECT_CC: 4183 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 4184 N0.getOperand(3), NotCC); 4185 } 4186 } 4187 } 4188 4189 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 4190 if (isOneConstant(N1) && N0.getOpcode() == ISD::ZERO_EXTEND && 4191 N0.getNode()->hasOneUse() && 4192 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 4193 SDValue V = N0.getOperand(0); 4194 SDLoc DL(N0); 4195 V = DAG.getNode(ISD::XOR, DL, V.getValueType(), V, 4196 DAG.getConstant(1, DL, V.getValueType())); 4197 AddToWorklist(V.getNode()); 4198 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 4199 } 4200 4201 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 4202 if (isOneConstant(N1) && VT == MVT::i1 && 4203 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4204 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4205 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 4206 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4207 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4208 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4209 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4210 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4211 } 4212 } 4213 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 4214 if (isAllOnesConstant(N1) && 4215 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 4216 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4217 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 4218 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 4219 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 4220 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 4221 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 4222 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 4223 } 4224 } 4225 // fold (xor (and x, y), y) -> (and (not x), y) 4226 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 4227 N0->getOperand(1) == N1) { 4228 SDValue X = N0->getOperand(0); 4229 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 4230 AddToWorklist(NotX.getNode()); 4231 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 4232 } 4233 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 4234 if (N1C && N0.getOpcode() == ISD::XOR) { 4235 if (const ConstantSDNode *N00C = getAsNonOpaqueConstant(N0.getOperand(0))) { 4236 SDLoc DL(N); 4237 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(1), 4238 DAG.getConstant(N1C->getAPIntValue() ^ 4239 N00C->getAPIntValue(), DL, VT)); 4240 } 4241 if (const ConstantSDNode *N01C = getAsNonOpaqueConstant(N0.getOperand(1))) { 4242 SDLoc DL(N); 4243 return DAG.getNode(ISD::XOR, DL, VT, N0.getOperand(0), 4244 DAG.getConstant(N1C->getAPIntValue() ^ 4245 N01C->getAPIntValue(), DL, VT)); 4246 } 4247 } 4248 // fold (xor x, x) -> 0 4249 if (N0 == N1) 4250 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 4251 4252 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 4253 // Here is a concrete example of this equivalence: 4254 // i16 x == 14 4255 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 4256 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 4257 // 4258 // => 4259 // 4260 // i16 ~1 == 0b1111111111111110 4261 // i16 rol(~1, 14) == 0b1011111111111111 4262 // 4263 // Some additional tips to help conceptualize this transform: 4264 // - Try to see the operation as placing a single zero in a value of all ones. 4265 // - There exists no value for x which would allow the result to contain zero. 4266 // - Values of x larger than the bitwidth are undefined and do not require a 4267 // consistent result. 4268 // - Pushing the zero left requires shifting one bits in from the right. 4269 // A rotate left of ~1 is a nice way of achieving the desired result. 4270 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT) && N0.getOpcode() == ISD::SHL 4271 && isAllOnesConstant(N1) && isOneConstant(N0.getOperand(0))) { 4272 SDLoc DL(N); 4273 return DAG.getNode(ISD::ROTL, DL, VT, DAG.getConstant(~1, DL, VT), 4274 N0.getOperand(1)); 4275 } 4276 4277 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 4278 if (N0.getOpcode() == N1.getOpcode()) 4279 if (SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N)) 4280 return Tmp; 4281 4282 // Simplify the expression using non-local knowledge. 4283 if (!VT.isVector() && 4284 SimplifyDemandedBits(SDValue(N, 0))) 4285 return SDValue(N, 0); 4286 4287 return SDValue(); 4288 } 4289 4290 /// Handle transforms common to the three shifts, when the shift amount is a 4291 /// constant. 4292 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 4293 SDNode *LHS = N->getOperand(0).getNode(); 4294 if (!LHS->hasOneUse()) return SDValue(); 4295 4296 // We want to pull some binops through shifts, so that we have (and (shift)) 4297 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 4298 // thing happens with address calculations, so it's important to canonicalize 4299 // it. 4300 bool HighBitSet = false; // Can we transform this if the high bit is set? 4301 4302 switch (LHS->getOpcode()) { 4303 default: return SDValue(); 4304 case ISD::OR: 4305 case ISD::XOR: 4306 HighBitSet = false; // We can only transform sra if the high bit is clear. 4307 break; 4308 case ISD::AND: 4309 HighBitSet = true; // We can only transform sra if the high bit is set. 4310 break; 4311 case ISD::ADD: 4312 if (N->getOpcode() != ISD::SHL) 4313 return SDValue(); // only shl(add) not sr[al](add). 4314 HighBitSet = false; // We can only transform sra if the high bit is clear. 4315 break; 4316 } 4317 4318 // We require the RHS of the binop to be a constant and not opaque as well. 4319 ConstantSDNode *BinOpCst = getAsNonOpaqueConstant(LHS->getOperand(1)); 4320 if (!BinOpCst) return SDValue(); 4321 4322 // FIXME: disable this unless the input to the binop is a shift by a constant. 4323 // If it is not a shift, it pessimizes some common cases like: 4324 // 4325 // void foo(int *X, int i) { X[i & 1235] = 1; } 4326 // int bar(int *X, int i) { return X[i & 255]; } 4327 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 4328 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 4329 BinOpLHSVal->getOpcode() != ISD::SRA && 4330 BinOpLHSVal->getOpcode() != ISD::SRL) || 4331 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 4332 return SDValue(); 4333 4334 EVT VT = N->getValueType(0); 4335 4336 // If this is a signed shift right, and the high bit is modified by the 4337 // logical operation, do not perform the transformation. The highBitSet 4338 // boolean indicates the value of the high bit of the constant which would 4339 // cause it to be modified for this operation. 4340 if (N->getOpcode() == ISD::SRA) { 4341 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 4342 if (BinOpRHSSignSet != HighBitSet) 4343 return SDValue(); 4344 } 4345 4346 if (!TLI.isDesirableToCommuteWithShift(LHS)) 4347 return SDValue(); 4348 4349 // Fold the constants, shifting the binop RHS by the shift amount. 4350 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 4351 N->getValueType(0), 4352 LHS->getOperand(1), N->getOperand(1)); 4353 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4354 4355 // Create the new shift. 4356 SDValue NewShift = DAG.getNode(N->getOpcode(), 4357 SDLoc(LHS->getOperand(0)), 4358 VT, LHS->getOperand(0), N->getOperand(1)); 4359 4360 // Create the new binop. 4361 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4362 } 4363 4364 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4365 assert(N->getOpcode() == ISD::TRUNCATE); 4366 assert(N->getOperand(0).getOpcode() == ISD::AND); 4367 4368 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4369 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4370 SDValue N01 = N->getOperand(0).getOperand(1); 4371 4372 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4373 if (!N01C->isOpaque()) { 4374 EVT TruncVT = N->getValueType(0); 4375 SDValue N00 = N->getOperand(0).getOperand(0); 4376 APInt TruncC = N01C->getAPIntValue(); 4377 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4378 SDLoc DL(N); 4379 4380 return DAG.getNode(ISD::AND, DL, TruncVT, 4381 DAG.getNode(ISD::TRUNCATE, DL, TruncVT, N00), 4382 DAG.getConstant(TruncC, DL, TruncVT)); 4383 } 4384 } 4385 } 4386 4387 return SDValue(); 4388 } 4389 4390 SDValue DAGCombiner::visitRotate(SDNode *N) { 4391 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4392 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4393 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4394 if (SDValue NewOp1 = 4395 distributeTruncateThroughAnd(N->getOperand(1).getNode())) 4396 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4397 N->getOperand(0), NewOp1); 4398 } 4399 return SDValue(); 4400 } 4401 4402 SDValue DAGCombiner::visitSHL(SDNode *N) { 4403 SDValue N0 = N->getOperand(0); 4404 SDValue N1 = N->getOperand(1); 4405 EVT VT = N0.getValueType(); 4406 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4407 4408 // fold vector ops 4409 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4410 if (VT.isVector()) { 4411 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4412 return FoldedVOp; 4413 4414 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4415 // If setcc produces all-one true value then: 4416 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4417 if (N1CV && N1CV->isConstant()) { 4418 if (N0.getOpcode() == ISD::AND) { 4419 SDValue N00 = N0->getOperand(0); 4420 SDValue N01 = N0->getOperand(1); 4421 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4422 4423 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4424 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4425 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4426 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, 4427 N01CV, N1CV)) 4428 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4429 } 4430 } else { 4431 N1C = isConstOrConstSplat(N1); 4432 } 4433 } 4434 } 4435 4436 // fold (shl c1, c2) -> c1<<c2 4437 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4438 if (N0C && N1C && !N1C->isOpaque()) 4439 return DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N), VT, N0C, N1C); 4440 // fold (shl 0, x) -> 0 4441 if (isNullConstant(N0)) 4442 return N0; 4443 // fold (shl x, c >= size(x)) -> undef 4444 if (N1C && N1C->getAPIntValue().uge(OpSizeInBits)) 4445 return DAG.getUNDEF(VT); 4446 // fold (shl x, 0) -> x 4447 if (N1C && N1C->isNullValue()) 4448 return N0; 4449 // fold (shl undef, x) -> 0 4450 if (N0.isUndef()) 4451 return DAG.getConstant(0, SDLoc(N), VT); 4452 // if (shl x, c) is known to be zero, return 0 4453 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4454 APInt::getAllOnesValue(OpSizeInBits))) 4455 return DAG.getConstant(0, SDLoc(N), VT); 4456 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4457 if (N1.getOpcode() == ISD::TRUNCATE && 4458 N1.getOperand(0).getOpcode() == ISD::AND) { 4459 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 4460 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4461 } 4462 4463 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4464 return SDValue(N, 0); 4465 4466 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4467 if (N1C && N0.getOpcode() == ISD::SHL) { 4468 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4469 uint64_t c1 = N0C1->getZExtValue(); 4470 uint64_t c2 = N1C->getZExtValue(); 4471 SDLoc DL(N); 4472 if (c1 + c2 >= OpSizeInBits) 4473 return DAG.getConstant(0, DL, VT); 4474 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4475 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4476 } 4477 } 4478 4479 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4480 // For this to be valid, the second form must not preserve any of the bits 4481 // that are shifted out by the inner shift in the first form. This means 4482 // the outer shift size must be >= the number of bits added by the ext. 4483 // As a corollary, we don't care what kind of ext it is. 4484 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4485 N0.getOpcode() == ISD::ANY_EXTEND || 4486 N0.getOpcode() == ISD::SIGN_EXTEND) && 4487 N0.getOperand(0).getOpcode() == ISD::SHL) { 4488 SDValue N0Op0 = N0.getOperand(0); 4489 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4490 uint64_t c1 = N0Op0C1->getZExtValue(); 4491 uint64_t c2 = N1C->getZExtValue(); 4492 EVT InnerShiftVT = N0Op0.getValueType(); 4493 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4494 if (c2 >= OpSizeInBits - InnerShiftSize) { 4495 SDLoc DL(N0); 4496 if (c1 + c2 >= OpSizeInBits) 4497 return DAG.getConstant(0, DL, VT); 4498 return DAG.getNode(ISD::SHL, DL, VT, 4499 DAG.getNode(N0.getOpcode(), DL, VT, 4500 N0Op0->getOperand(0)), 4501 DAG.getConstant(c1 + c2, DL, N1.getValueType())); 4502 } 4503 } 4504 } 4505 4506 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4507 // Only fold this if the inner zext has no other uses to avoid increasing 4508 // the total number of instructions. 4509 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4510 N0.getOperand(0).getOpcode() == ISD::SRL) { 4511 SDValue N0Op0 = N0.getOperand(0); 4512 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4513 uint64_t c1 = N0Op0C1->getZExtValue(); 4514 if (c1 < VT.getScalarSizeInBits()) { 4515 uint64_t c2 = N1C->getZExtValue(); 4516 if (c1 == c2) { 4517 SDValue NewOp0 = N0.getOperand(0); 4518 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4519 SDLoc DL(N); 4520 SDValue NewSHL = DAG.getNode(ISD::SHL, DL, NewOp0.getValueType(), 4521 NewOp0, 4522 DAG.getConstant(c2, DL, CountVT)); 4523 AddToWorklist(NewSHL.getNode()); 4524 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4525 } 4526 } 4527 } 4528 } 4529 4530 // fold (shl (sr[la] exact X, C1), C2) -> (shl X, (C2-C1)) if C1 <= C2 4531 // fold (shl (sr[la] exact X, C1), C2) -> (sr[la] X, (C2-C1)) if C1 > C2 4532 if (N1C && (N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA) && 4533 cast<BinaryWithFlagsSDNode>(N0)->Flags.hasExact()) { 4534 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4535 uint64_t C1 = N0C1->getZExtValue(); 4536 uint64_t C2 = N1C->getZExtValue(); 4537 SDLoc DL(N); 4538 if (C1 <= C2) 4539 return DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4540 DAG.getConstant(C2 - C1, DL, N1.getValueType())); 4541 return DAG.getNode(N0.getOpcode(), DL, VT, N0.getOperand(0), 4542 DAG.getConstant(C1 - C2, DL, N1.getValueType())); 4543 } 4544 } 4545 4546 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4547 // (and (srl x, (sub c1, c2), MASK) 4548 // Only fold this if the inner shift has no other uses -- if it does, folding 4549 // this will increase the total number of instructions. 4550 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4551 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4552 uint64_t c1 = N0C1->getZExtValue(); 4553 if (c1 < OpSizeInBits) { 4554 uint64_t c2 = N1C->getZExtValue(); 4555 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4556 SDValue Shift; 4557 if (c2 > c1) { 4558 Mask = Mask.shl(c2 - c1); 4559 SDLoc DL(N); 4560 Shift = DAG.getNode(ISD::SHL, DL, VT, N0.getOperand(0), 4561 DAG.getConstant(c2 - c1, DL, N1.getValueType())); 4562 } else { 4563 Mask = Mask.lshr(c1 - c2); 4564 SDLoc DL(N); 4565 Shift = DAG.getNode(ISD::SRL, DL, VT, N0.getOperand(0), 4566 DAG.getConstant(c1 - c2, DL, N1.getValueType())); 4567 } 4568 SDLoc DL(N0); 4569 return DAG.getNode(ISD::AND, DL, VT, Shift, 4570 DAG.getConstant(Mask, DL, VT)); 4571 } 4572 } 4573 } 4574 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4575 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4576 unsigned BitSize = VT.getScalarSizeInBits(); 4577 SDLoc DL(N); 4578 SDValue HiBitsMask = 4579 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4580 BitSize - N1C->getZExtValue()), 4581 DL, VT); 4582 return DAG.getNode(ISD::AND, DL, VT, N0.getOperand(0), 4583 HiBitsMask); 4584 } 4585 4586 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4587 // Variant of version done on multiply, except mul by a power of 2 is turned 4588 // into a shift. 4589 APInt Val; 4590 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4591 (isa<ConstantSDNode>(N0.getOperand(1)) || 4592 ISD::isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4593 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4594 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4595 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4596 } 4597 4598 // fold (shl (mul x, c1), c2) -> (mul x, c1 << c2) 4599 if (N1C && N0.getOpcode() == ISD::MUL && N0.getNode()->hasOneUse()) { 4600 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4601 if (SDValue Folded = 4602 DAG.FoldConstantArithmetic(ISD::SHL, SDLoc(N1), VT, N0C1, N1C)) 4603 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), Folded); 4604 } 4605 } 4606 4607 if (N1C && !N1C->isOpaque()) 4608 if (SDValue NewSHL = visitShiftByConstant(N, N1C)) 4609 return NewSHL; 4610 4611 return SDValue(); 4612 } 4613 4614 SDValue DAGCombiner::visitSRA(SDNode *N) { 4615 SDValue N0 = N->getOperand(0); 4616 SDValue N1 = N->getOperand(1); 4617 EVT VT = N0.getValueType(); 4618 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4619 4620 // fold vector ops 4621 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4622 if (VT.isVector()) { 4623 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4624 return FoldedVOp; 4625 4626 N1C = isConstOrConstSplat(N1); 4627 } 4628 4629 // fold (sra c1, c2) -> (sra c1, c2) 4630 ConstantSDNode *N0C = getAsNonOpaqueConstant(N0); 4631 if (N0C && N1C && !N1C->isOpaque()) 4632 return DAG.FoldConstantArithmetic(ISD::SRA, SDLoc(N), VT, N0C, N1C); 4633 // fold (sra 0, x) -> 0 4634 if (isNullConstant(N0)) 4635 return N0; 4636 // fold (sra -1, x) -> -1 4637 if (isAllOnesConstant(N0)) 4638 return N0; 4639 // fold (sra x, (setge c, size(x))) -> undef 4640 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4641 return DAG.getUNDEF(VT); 4642 // fold (sra x, 0) -> x 4643 if (N1C && N1C->isNullValue()) 4644 return N0; 4645 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4646 // sext_inreg. 4647 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4648 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4649 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4650 if (VT.isVector()) 4651 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4652 ExtVT, VT.getVectorNumElements()); 4653 if ((!LegalOperations || 4654 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4655 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4656 N0.getOperand(0), DAG.getValueType(ExtVT)); 4657 } 4658 4659 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4660 if (N1C && N0.getOpcode() == ISD::SRA) { 4661 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4662 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4663 if (Sum >= OpSizeInBits) 4664 Sum = OpSizeInBits - 1; 4665 SDLoc DL(N); 4666 return DAG.getNode(ISD::SRA, DL, VT, N0.getOperand(0), 4667 DAG.getConstant(Sum, DL, N1.getValueType())); 4668 } 4669 } 4670 4671 // fold (sra (shl X, m), (sub result_size, n)) 4672 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4673 // result_size - n != m. 4674 // If truncate is free for the target sext(shl) is likely to result in better 4675 // code. 4676 if (N0.getOpcode() == ISD::SHL && N1C) { 4677 // Get the two constanst of the shifts, CN0 = m, CN = n. 4678 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4679 if (N01C) { 4680 LLVMContext &Ctx = *DAG.getContext(); 4681 // Determine what the truncate's result bitsize and type would be. 4682 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4683 4684 if (VT.isVector()) 4685 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4686 4687 // Determine the residual right-shift amount. 4688 int ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4689 4690 // If the shift is not a no-op (in which case this should be just a sign 4691 // extend already), the truncated to type is legal, sign_extend is legal 4692 // on that type, and the truncate to that type is both legal and free, 4693 // perform the transform. 4694 if ((ShiftAmt > 0) && 4695 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4696 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4697 TLI.isTruncateFree(VT, TruncVT)) { 4698 4699 SDLoc DL(N); 4700 SDValue Amt = DAG.getConstant(ShiftAmt, DL, 4701 getShiftAmountTy(N0.getOperand(0).getValueType())); 4702 SDValue Shift = DAG.getNode(ISD::SRL, DL, VT, 4703 N0.getOperand(0), Amt); 4704 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, DL, TruncVT, 4705 Shift); 4706 return DAG.getNode(ISD::SIGN_EXTEND, DL, 4707 N->getValueType(0), Trunc); 4708 } 4709 } 4710 } 4711 4712 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4713 if (N1.getOpcode() == ISD::TRUNCATE && 4714 N1.getOperand(0).getOpcode() == ISD::AND) { 4715 if (SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode())) 4716 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4717 }