1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SetVector.h" 21 #include "llvm/ADT/SmallBitVector.h" 22 #include "llvm/ADT/SmallPtrSet.h" 23 #include "llvm/ADT/Statistic.h" 24 #include "llvm/Analysis/AliasAnalysis.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineFunction.h" 27 #include "llvm/IR/DataLayout.h" 28 #include "llvm/IR/DerivedTypes.h" 29 #include "llvm/IR/Function.h" 30 #include "llvm/IR/LLVMContext.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/ErrorHandling.h" 34 #include "llvm/Support/MathExtras.h" 35 #include "llvm/Support/raw_ostream.h" 36 #include "llvm/Target/TargetLowering.h" 37 #include "llvm/Target/TargetOptions.h" 38 #include "llvm/Target/TargetRegisterInfo.h" 39 #include "llvm/Target/TargetSubtargetInfo.h" 40 #include <algorithm> 41 using namespace llvm; 42 43 #define DEBUG_TYPE "dagcombine" 44 45 STATISTIC(NodesCombined , "Number of dag nodes combined"); 46 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 47 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 48 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 49 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 50 STATISTIC(SlicedLoads, "Number of load sliced"); 51 52 namespace { 53 static cl::opt<bool> 54 CombinerAA("combiner-alias-analysis", cl::Hidden, 55 cl::desc("Enable DAG combiner alias-analysis heuristics")); 56 57 static cl::opt<bool> 58 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 59 cl::desc("Enable DAG combiner's use of IR alias analysis")); 60 61 static cl::opt<bool> 62 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 63 cl::desc("Enable DAG combiner's use of TBAA")); 64 65 #ifndef NDEBUG 66 static cl::opt<std::string> 67 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 68 cl::desc("Only use DAG-combiner alias analysis in this" 69 " function")); 70 #endif 71 72 /// Hidden option to stress test load slicing, i.e., when this option 73 /// is enabled, load slicing bypasses most of its profitability guards. 74 static cl::opt<bool> 75 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 76 cl::desc("Bypass the profitability model of load " 77 "slicing"), 78 cl::init(false)); 79 80 static cl::opt<bool> 81 MaySplitLoadIndex("combiner-split-load-index", cl::Hidden, cl::init(true), 82 cl::desc("DAG combiner may split indexing from loads")); 83 84 //------------------------------ DAGCombiner ---------------------------------// 85 86 class DAGCombiner { 87 SelectionDAG &DAG; 88 const TargetLowering &TLI; 89 CombineLevel Level; 90 CodeGenOpt::Level OptLevel; 91 bool LegalOperations; 92 bool LegalTypes; 93 bool ForCodeSize; 94 95 /// \brief Worklist of all of the nodes that need to be simplified. 96 /// 97 /// This must behave as a stack -- new nodes to process are pushed onto the 98 /// back and when processing we pop off of the back. 99 /// 100 /// The worklist will not contain duplicates but may contain null entries 101 /// due to nodes being deleted from the underlying DAG. 102 SmallVector<SDNode *, 64> Worklist; 103 104 /// \brief Mapping from an SDNode to its position on the worklist. 105 /// 106 /// This is used to find and remove nodes from the worklist (by nulling 107 /// them) when they are deleted from the underlying DAG. It relies on 108 /// stable indices of nodes within the worklist. 109 DenseMap<SDNode *, unsigned> WorklistMap; 110 111 /// \brief Set of nodes which have been combined (at least once). 112 /// 113 /// This is used to allow us to reliably add any operands of a DAG node 114 /// which have not yet been combined to the worklist. 115 SmallPtrSet<SDNode *, 64> CombinedNodes; 116 117 // AA - Used for DAG load/store alias analysis. 118 AliasAnalysis &AA; 119 120 /// When an instruction is simplified, add all users of the instruction to 121 /// the work lists because they might get more simplified now. 122 void AddUsersToWorklist(SDNode *N) { 123 for (SDNode *Node : N->uses()) 124 AddToWorklist(Node); 125 } 126 127 /// Call the node-specific routine that folds each particular type of node. 128 SDValue visit(SDNode *N); 129 130 public: 131 /// Add to the worklist making sure its instance is at the back (next to be 132 /// processed.) 133 void AddToWorklist(SDNode *N) { 134 // Skip handle nodes as they can't usefully be combined and confuse the 135 // zero-use deletion strategy. 136 if (N->getOpcode() == ISD::HANDLENODE) 137 return; 138 139 if (WorklistMap.insert(std::make_pair(N, Worklist.size())).second) 140 Worklist.push_back(N); 141 } 142 143 /// Remove all instances of N from the worklist. 144 void removeFromWorklist(SDNode *N) { 145 CombinedNodes.erase(N); 146 147 auto It = WorklistMap.find(N); 148 if (It == WorklistMap.end()) 149 return; // Not in the worklist. 150 151 // Null out the entry rather than erasing it to avoid a linear operation. 152 Worklist[It->second] = nullptr; 153 WorklistMap.erase(It); 154 } 155 156 void deleteAndRecombine(SDNode *N); 157 bool recursivelyDeleteUnusedNodes(SDNode *N); 158 159 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 160 bool AddTo = true); 161 162 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 163 return CombineTo(N, &Res, 1, AddTo); 164 } 165 166 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 167 bool AddTo = true) { 168 SDValue To[] = { Res0, Res1 }; 169 return CombineTo(N, To, 2, AddTo); 170 } 171 172 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 173 174 private: 175 176 /// Check the specified integer node value to see if it can be simplified or 177 /// if things it uses can be simplified by bit propagation. 178 /// If so, return true. 179 bool SimplifyDemandedBits(SDValue Op) { 180 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 181 APInt Demanded = APInt::getAllOnesValue(BitWidth); 182 return SimplifyDemandedBits(Op, Demanded); 183 } 184 185 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 186 187 bool CombineToPreIndexedLoadStore(SDNode *N); 188 bool CombineToPostIndexedLoadStore(SDNode *N); 189 SDValue SplitIndexingFromLoad(LoadSDNode *LD); 190 bool SliceUpLoad(SDNode *N); 191 192 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 193 /// load. 194 /// 195 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 196 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 197 /// \param EltNo index of the vector element to load. 198 /// \param OriginalLoad load that EVE came from to be replaced. 199 /// \returns EVE on success SDValue() on failure. 200 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 201 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 202 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 203 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 204 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 205 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 206 SDValue PromoteIntBinOp(SDValue Op); 207 SDValue PromoteIntShiftOp(SDValue Op); 208 SDValue PromoteExtend(SDValue Op); 209 bool PromoteLoad(SDValue Op); 210 211 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 212 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 213 ISD::NodeType ExtType); 214 215 /// Call the node-specific routine that knows how to fold each 216 /// particular type of node. If that doesn't do anything, try the 217 /// target-specific DAG combines. 218 SDValue combine(SDNode *N); 219 220 // Visitation implementation - Implement dag node combining for different 221 // node types. The semantics are as follows: 222 // Return Value: 223 // SDValue.getNode() == 0 - No change was made 224 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 225 // otherwise - N should be replaced by the returned Operand. 226 // 227 SDValue visitTokenFactor(SDNode *N); 228 SDValue visitMERGE_VALUES(SDNode *N); 229 SDValue visitADD(SDNode *N); 230 SDValue visitSUB(SDNode *N); 231 SDValue visitADDC(SDNode *N); 232 SDValue visitSUBC(SDNode *N); 233 SDValue visitADDE(SDNode *N); 234 SDValue visitSUBE(SDNode *N); 235 SDValue visitMUL(SDNode *N); 236 SDValue visitSDIV(SDNode *N); 237 SDValue visitUDIV(SDNode *N); 238 SDValue visitSREM(SDNode *N); 239 SDValue visitUREM(SDNode *N); 240 SDValue visitMULHU(SDNode *N); 241 SDValue visitMULHS(SDNode *N); 242 SDValue visitSMUL_LOHI(SDNode *N); 243 SDValue visitUMUL_LOHI(SDNode *N); 244 SDValue visitSMULO(SDNode *N); 245 SDValue visitUMULO(SDNode *N); 246 SDValue visitSDIVREM(SDNode *N); 247 SDValue visitUDIVREM(SDNode *N); 248 SDValue visitAND(SDNode *N); 249 SDValue visitANDLike(SDValue N0, SDValue N1, SDNode *LocReference); 250 SDValue visitOR(SDNode *N); 251 SDValue visitORLike(SDValue N0, SDValue N1, SDNode *LocReference); 252 SDValue visitXOR(SDNode *N); 253 SDValue SimplifyVBinOp(SDNode *N); 254 SDValue visitSHL(SDNode *N); 255 SDValue visitSRA(SDNode *N); 256 SDValue visitSRL(SDNode *N); 257 SDValue visitRotate(SDNode *N); 258 SDValue visitCTLZ(SDNode *N); 259 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 260 SDValue visitCTTZ(SDNode *N); 261 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 262 SDValue visitCTPOP(SDNode *N); 263 SDValue visitSELECT(SDNode *N); 264 SDValue visitVSELECT(SDNode *N); 265 SDValue visitSELECT_CC(SDNode *N); 266 SDValue visitSETCC(SDNode *N); 267 SDValue visitSIGN_EXTEND(SDNode *N); 268 SDValue visitZERO_EXTEND(SDNode *N); 269 SDValue visitANY_EXTEND(SDNode *N); 270 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 271 SDValue visitTRUNCATE(SDNode *N); 272 SDValue visitBITCAST(SDNode *N); 273 SDValue visitBUILD_PAIR(SDNode *N); 274 SDValue visitFADD(SDNode *N); 275 SDValue visitFSUB(SDNode *N); 276 SDValue visitFMUL(SDNode *N); 277 SDValue visitFMA(SDNode *N); 278 SDValue visitFDIV(SDNode *N); 279 SDValue visitFREM(SDNode *N); 280 SDValue visitFSQRT(SDNode *N); 281 SDValue visitFCOPYSIGN(SDNode *N); 282 SDValue visitSINT_TO_FP(SDNode *N); 283 SDValue visitUINT_TO_FP(SDNode *N); 284 SDValue visitFP_TO_SINT(SDNode *N); 285 SDValue visitFP_TO_UINT(SDNode *N); 286 SDValue visitFP_ROUND(SDNode *N); 287 SDValue visitFP_ROUND_INREG(SDNode *N); 288 SDValue visitFP_EXTEND(SDNode *N); 289 SDValue visitFNEG(SDNode *N); 290 SDValue visitFABS(SDNode *N); 291 SDValue visitFCEIL(SDNode *N); 292 SDValue visitFTRUNC(SDNode *N); 293 SDValue visitFFLOOR(SDNode *N); 294 SDValue visitFMINNUM(SDNode *N); 295 SDValue visitFMAXNUM(SDNode *N); 296 SDValue visitBRCOND(SDNode *N); 297 SDValue visitBR_CC(SDNode *N); 298 SDValue visitLOAD(SDNode *N); 299 SDValue visitSTORE(SDNode *N); 300 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 301 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 302 SDValue visitBUILD_VECTOR(SDNode *N); 303 SDValue visitCONCAT_VECTORS(SDNode *N); 304 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 305 SDValue visitVECTOR_SHUFFLE(SDNode *N); 306 SDValue visitSCALAR_TO_VECTOR(SDNode *N); 307 SDValue visitINSERT_SUBVECTOR(SDNode *N); 308 SDValue visitMLOAD(SDNode *N); 309 SDValue visitMSTORE(SDNode *N); 310 SDValue visitFP_TO_FP16(SDNode *N); 311 312 SDValue XformToShuffleWithZero(SDNode *N); 313 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 314 315 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 316 317 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 318 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 319 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 320 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 321 SDValue N3, ISD::CondCode CC, 322 bool NotExtCompare = false); 323 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 324 SDLoc DL, bool foldBooleans = true); 325 326 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 327 SDValue &CC) const; 328 bool isOneUseSetCC(SDValue N) const; 329 330 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 331 unsigned HiOp); 332 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 333 SDValue CombineExtLoad(SDNode *N); 334 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 335 SDValue BuildSDIV(SDNode *N); 336 SDValue BuildSDIVPow2(SDNode *N); 337 SDValue BuildUDIV(SDNode *N); 338 SDValue BuildReciprocalEstimate(SDValue Op); 339 SDValue BuildRsqrtEstimate(SDValue Op); 340 SDValue BuildRsqrtNROneConst(SDValue Op, SDValue Est, unsigned Iterations); 341 SDValue BuildRsqrtNRTwoConst(SDValue Op, SDValue Est, unsigned Iterations); 342 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 343 bool DemandHighBits = true); 344 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 345 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 346 SDValue InnerPos, SDValue InnerNeg, 347 unsigned PosOpcode, unsigned NegOpcode, 348 SDLoc DL); 349 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 350 SDValue ReduceLoadWidth(SDNode *N); 351 SDValue ReduceLoadOpStoreWidth(SDNode *N); 352 SDValue TransformFPLoadStorePair(SDNode *N); 353 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 354 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 355 356 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 357 358 /// Walk up chain skipping non-aliasing memory nodes, 359 /// looking for aliasing nodes and adding them to the Aliases vector. 360 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 361 SmallVectorImpl<SDValue> &Aliases); 362 363 /// Return true if there is any possibility that the two addresses overlap. 364 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 365 366 /// Walk up chain skipping non-aliasing memory nodes, looking for a better 367 /// chain (aliasing node.) 368 SDValue FindBetterChain(SDNode *N, SDValue Chain); 369 370 /// Holds a pointer to an LSBaseSDNode as well as information on where it 371 /// is located in a sequence of memory operations connected by a chain. 372 struct MemOpLink { 373 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 374 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 375 // Ptr to the mem node. 376 LSBaseSDNode *MemNode; 377 // Offset from the base ptr. 378 int64_t OffsetFromBase; 379 // What is the sequence number of this mem node. 380 // Lowest mem operand in the DAG starts at zero. 381 unsigned SequenceNum; 382 }; 383 384 /// This is a helper function for MergeConsecutiveStores. When the source 385 /// elements of the consecutive stores are all constants or all extracted 386 /// vector elements, try to merge them into one larger store. 387 /// \return True if a merged store was created. 388 bool MergeStoresOfConstantsOrVecElts(SmallVectorImpl<MemOpLink> &StoreNodes, 389 EVT MemVT, unsigned NumElem, 390 bool IsConstantSrc, bool UseVector); 391 392 /// Merge consecutive store operations into a wide store. 393 /// This optimization uses wide integers or vectors when possible. 394 /// \return True if some memory operations were changed. 395 bool MergeConsecutiveStores(StoreSDNode *N); 396 397 /// \brief Try to transform a truncation where C is a constant: 398 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 399 /// 400 /// \p N needs to be a truncation and its first operand an AND. Other 401 /// requirements are checked by the function (e.g. that trunc is 402 /// single-use) and if missed an empty SDValue is returned. 403 SDValue distributeTruncateThroughAnd(SDNode *N); 404 405 public: 406 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 407 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 408 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 409 auto *F = DAG.getMachineFunction().getFunction(); 410 ForCodeSize = F->hasFnAttribute(Attribute::OptimizeForSize) || 411 F->hasFnAttribute(Attribute::MinSize); 412 } 413 414 /// Runs the dag combiner on all nodes in the work list 415 void Run(CombineLevel AtLevel); 416 417 SelectionDAG &getDAG() const { return DAG; } 418 419 /// Returns a type large enough to hold any valid shift amount - before type 420 /// legalization these can be huge. 421 EVT getShiftAmountTy(EVT LHSTy) { 422 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 423 if (LHSTy.isVector()) 424 return LHSTy; 425 return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) 426 : TLI.getPointerTy(); 427 } 428 429 /// This method returns true if we are running before type legalization or 430 /// if the specified VT is legal. 431 bool isTypeLegal(const EVT &VT) { 432 if (!LegalTypes) return true; 433 return TLI.isTypeLegal(VT); 434 } 435 436 /// Convenience wrapper around TargetLowering::getSetCCResultType 437 EVT getSetCCResultType(EVT VT) const { 438 return TLI.getSetCCResultType(*DAG.getContext(), VT); 439 } 440 }; 441 } 442 443 444 namespace { 445 /// This class is a DAGUpdateListener that removes any deleted 446 /// nodes from the worklist. 447 class WorklistRemover : public SelectionDAG::DAGUpdateListener { 448 DAGCombiner &DC; 449 public: 450 explicit WorklistRemover(DAGCombiner &dc) 451 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 452 453 void NodeDeleted(SDNode *N, SDNode *E) override { 454 DC.removeFromWorklist(N); 455 } 456 }; 457 } 458 459 //===----------------------------------------------------------------------===// 460 // TargetLowering::DAGCombinerInfo implementation 461 //===----------------------------------------------------------------------===// 462 463 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 464 ((DAGCombiner*)DC)->AddToWorklist(N); 465 } 466 467 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 468 ((DAGCombiner*)DC)->removeFromWorklist(N); 469 } 470 471 SDValue TargetLowering::DAGCombinerInfo:: 472 CombineTo(SDNode *N, ArrayRef<SDValue> To, bool AddTo) { 473 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 474 } 475 476 SDValue TargetLowering::DAGCombinerInfo:: 477 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 478 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 479 } 480 481 482 SDValue TargetLowering::DAGCombinerInfo:: 483 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 484 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 485 } 486 487 void TargetLowering::DAGCombinerInfo:: 488 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 489 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 490 } 491 492 //===----------------------------------------------------------------------===// 493 // Helper Functions 494 //===----------------------------------------------------------------------===// 495 496 void DAGCombiner::deleteAndRecombine(SDNode *N) { 497 removeFromWorklist(N); 498 499 // If the operands of this node are only used by the node, they will now be 500 // dead. Make sure to re-visit them and recursively delete dead nodes. 501 for (const SDValue &Op : N->ops()) 502 // For an operand generating multiple values, one of the values may 503 // become dead allowing further simplification (e.g. split index 504 // arithmetic from an indexed load). 505 if (Op->hasOneUse() || Op->getNumValues() > 1) 506 AddToWorklist(Op.getNode()); 507 508 DAG.DeleteNode(N); 509 } 510 511 /// Return 1 if we can compute the negated form of the specified expression for 512 /// the same cost as the expression itself, or 2 if we can compute the negated 513 /// form more cheaply than the expression itself. 514 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 515 const TargetLowering &TLI, 516 const TargetOptions *Options, 517 unsigned Depth = 0) { 518 // fneg is removable even if it has multiple uses. 519 if (Op.getOpcode() == ISD::FNEG) return 2; 520 521 // Don't allow anything with multiple uses. 522 if (!Op.hasOneUse()) return 0; 523 524 // Don't recurse exponentially. 525 if (Depth > 6) return 0; 526 527 switch (Op.getOpcode()) { 528 default: return false; 529 case ISD::ConstantFP: 530 // Don't invert constant FP values after legalize. The negated constant 531 // isn't necessarily legal. 532 return LegalOperations ? 0 : 1; 533 case ISD::FADD: 534 // FIXME: determine better conditions for this xform. 535 if (!Options->UnsafeFPMath) return 0; 536 537 // After operation legalization, it might not be legal to create new FSUBs. 538 if (LegalOperations && 539 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 540 return 0; 541 542 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 543 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 544 Options, Depth + 1)) 545 return V; 546 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 547 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 548 Depth + 1); 549 case ISD::FSUB: 550 // We can't turn -(A-B) into B-A when we honor signed zeros. 551 if (!Options->UnsafeFPMath) return 0; 552 553 // fold (fneg (fsub A, B)) -> (fsub B, A) 554 return 1; 555 556 case ISD::FMUL: 557 case ISD::FDIV: 558 if (Options->HonorSignDependentRoundingFPMath()) return 0; 559 560 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 561 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 562 Options, Depth + 1)) 563 return V; 564 565 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 566 Depth + 1); 567 568 case ISD::FP_EXTEND: 569 case ISD::FP_ROUND: 570 case ISD::FSIN: 571 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 572 Depth + 1); 573 } 574 } 575 576 /// If isNegatibleForFree returns true, return the newly negated expression. 577 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 578 bool LegalOperations, unsigned Depth = 0) { 579 const TargetOptions &Options = DAG.getTarget().Options; 580 // fneg is removable even if it has multiple uses. 581 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 582 583 // Don't allow anything with multiple uses. 584 assert(Op.hasOneUse() && "Unknown reuse!"); 585 586 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 587 switch (Op.getOpcode()) { 588 default: llvm_unreachable("Unknown code"); 589 case ISD::ConstantFP: { 590 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 591 V.changeSign(); 592 return DAG.getConstantFP(V, Op.getValueType()); 593 } 594 case ISD::FADD: 595 // FIXME: determine better conditions for this xform. 596 assert(Options.UnsafeFPMath); 597 598 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 599 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 600 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 601 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 602 GetNegatedExpression(Op.getOperand(0), DAG, 603 LegalOperations, Depth+1), 604 Op.getOperand(1)); 605 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 606 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 607 GetNegatedExpression(Op.getOperand(1), DAG, 608 LegalOperations, Depth+1), 609 Op.getOperand(0)); 610 case ISD::FSUB: 611 // We can't turn -(A-B) into B-A when we honor signed zeros. 612 assert(Options.UnsafeFPMath); 613 614 // fold (fneg (fsub 0, B)) -> B 615 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 616 if (N0CFP->getValueAPF().isZero()) 617 return Op.getOperand(1); 618 619 // fold (fneg (fsub A, B)) -> (fsub B, A) 620 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 621 Op.getOperand(1), Op.getOperand(0)); 622 623 case ISD::FMUL: 624 case ISD::FDIV: 625 assert(!Options.HonorSignDependentRoundingFPMath()); 626 627 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 628 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 629 DAG.getTargetLoweringInfo(), &Options, Depth+1)) 630 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 631 GetNegatedExpression(Op.getOperand(0), DAG, 632 LegalOperations, Depth+1), 633 Op.getOperand(1)); 634 635 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 636 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 637 Op.getOperand(0), 638 GetNegatedExpression(Op.getOperand(1), DAG, 639 LegalOperations, Depth+1)); 640 641 case ISD::FP_EXTEND: 642 case ISD::FSIN: 643 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 644 GetNegatedExpression(Op.getOperand(0), DAG, 645 LegalOperations, Depth+1)); 646 case ISD::FP_ROUND: 647 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 648 GetNegatedExpression(Op.getOperand(0), DAG, 649 LegalOperations, Depth+1), 650 Op.getOperand(1)); 651 } 652 } 653 654 // Return true if this node is a setcc, or is a select_cc 655 // that selects between the target values used for true and false, making it 656 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 657 // the appropriate nodes based on the type of node we are checking. This 658 // simplifies life a bit for the callers. 659 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 660 SDValue &CC) const { 661 if (N.getOpcode() == ISD::SETCC) { 662 LHS = N.getOperand(0); 663 RHS = N.getOperand(1); 664 CC = N.getOperand(2); 665 return true; 666 } 667 668 if (N.getOpcode() != ISD::SELECT_CC || 669 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 670 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 671 return false; 672 673 if (TLI.getBooleanContents(N.getValueType()) == 674 TargetLowering::UndefinedBooleanContent) 675 return false; 676 677 LHS = N.getOperand(0); 678 RHS = N.getOperand(1); 679 CC = N.getOperand(4); 680 return true; 681 } 682 683 /// Return true if this is a SetCC-equivalent operation with only one use. 684 /// If this is true, it allows the users to invert the operation for free when 685 /// it is profitable to do so. 686 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 687 SDValue N0, N1, N2; 688 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 689 return true; 690 return false; 691 } 692 693 /// Returns true if N is a BUILD_VECTOR node whose 694 /// elements are all the same constant or undefined. 695 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 696 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 697 if (!C) 698 return false; 699 700 APInt SplatUndef; 701 unsigned SplatBitSize; 702 bool HasAnyUndefs; 703 EVT EltVT = N->getValueType(0).getVectorElementType(); 704 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 705 HasAnyUndefs) && 706 EltVT.getSizeInBits() >= SplatBitSize); 707 } 708 709 // \brief Returns the SDNode if it is a constant integer BuildVector 710 // or constant integer. 711 static SDNode *isConstantIntBuildVectorOrConstantInt(SDValue N) { 712 if (isa<ConstantSDNode>(N)) 713 return N.getNode(); 714 if (ISD::isBuildVectorOfConstantSDNodes(N.getNode())) 715 return N.getNode(); 716 return nullptr; 717 } 718 719 // \brief Returns the SDNode if it is a constant float BuildVector 720 // or constant float. 721 static SDNode *isConstantFPBuildVectorOrConstantFP(SDValue N) { 722 if (isa<ConstantFPSDNode>(N)) 723 return N.getNode(); 724 if (ISD::isBuildVectorOfConstantFPSDNodes(N.getNode())) 725 return N.getNode(); 726 return nullptr; 727 } 728 729 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 730 // int. 731 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 732 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 733 return CN; 734 735 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 736 BitVector UndefElements; 737 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 738 739 // BuildVectors can truncate their operands. Ignore that case here. 740 // FIXME: We blindly ignore splats which include undef which is overly 741 // pessimistic. 742 if (CN && UndefElements.none() && 743 CN->getValueType(0) == N.getValueType().getScalarType()) 744 return CN; 745 } 746 747 return nullptr; 748 } 749 750 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 751 // float. 752 static ConstantFPSDNode *isConstOrConstSplatFP(SDValue N) { 753 if (ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N)) 754 return CN; 755 756 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 757 BitVector UndefElements; 758 ConstantFPSDNode *CN = BV->getConstantFPSplatNode(&UndefElements); 759 760 if (CN && UndefElements.none()) 761 return CN; 762 } 763 764 return nullptr; 765 } 766 767 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 768 SDValue N0, SDValue N1) { 769 EVT VT = N0.getValueType(); 770 if (N0.getOpcode() == Opc) { 771 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0.getOperand(1))) { 772 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1)) { 773 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 774 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R)) 775 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 776 return SDValue(); 777 } 778 if (N0.hasOneUse()) { 779 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 780 // use 781 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 782 if (!OpNode.getNode()) 783 return SDValue(); 784 AddToWorklist(OpNode.getNode()); 785 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 786 } 787 } 788 } 789 790 if (N1.getOpcode() == Opc) { 791 if (SDNode *R = isConstantIntBuildVectorOrConstantInt(N1.getOperand(1))) { 792 if (SDNode *L = isConstantIntBuildVectorOrConstantInt(N0)) { 793 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 794 if (SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L)) 795 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 796 return SDValue(); 797 } 798 if (N1.hasOneUse()) { 799 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 800 // use 801 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 802 if (!OpNode.getNode()) 803 return SDValue(); 804 AddToWorklist(OpNode.getNode()); 805 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 806 } 807 } 808 } 809 810 return SDValue(); 811 } 812 813 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 814 bool AddTo) { 815 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 816 ++NodesCombined; 817 DEBUG(dbgs() << "\nReplacing.1 "; 818 N->dump(&DAG); 819 dbgs() << "\nWith: "; 820 To[0].getNode()->dump(&DAG); 821 dbgs() << " and " << NumTo-1 << " other values\n"); 822 for (unsigned i = 0, e = NumTo; i != e; ++i) 823 assert((!To[i].getNode() || 824 N->getValueType(i) == To[i].getValueType()) && 825 "Cannot combine value to value of different type!"); 826 827 WorklistRemover DeadNodes(*this); 828 DAG.ReplaceAllUsesWith(N, To); 829 if (AddTo) { 830 // Push the new nodes and any users onto the worklist 831 for (unsigned i = 0, e = NumTo; i != e; ++i) { 832 if (To[i].getNode()) { 833 AddToWorklist(To[i].getNode()); 834 AddUsersToWorklist(To[i].getNode()); 835 } 836 } 837 } 838 839 // Finally, if the node is now dead, remove it from the graph. The node 840 // may not be dead if the replacement process recursively simplified to 841 // something else needing this node. 842 if (N->use_empty()) 843 deleteAndRecombine(N); 844 return SDValue(N, 0); 845 } 846 847 void DAGCombiner:: 848 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 849 // Replace all uses. If any nodes become isomorphic to other nodes and 850 // are deleted, make sure to remove them from our worklist. 851 WorklistRemover DeadNodes(*this); 852 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 853 854 // Push the new node and any (possibly new) users onto the worklist. 855 AddToWorklist(TLO.New.getNode()); 856 AddUsersToWorklist(TLO.New.getNode()); 857 858 // Finally, if the node is now dead, remove it from the graph. The node 859 // may not be dead if the replacement process recursively simplified to 860 // something else needing this node. 861 if (TLO.Old.getNode()->use_empty()) 862 deleteAndRecombine(TLO.Old.getNode()); 863 } 864 865 /// Check the specified integer node value to see if it can be simplified or if 866 /// things it uses can be simplified by bit propagation. If so, return true. 867 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 868 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 869 APInt KnownZero, KnownOne; 870 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 871 return false; 872 873 // Revisit the node. 874 AddToWorklist(Op.getNode()); 875 876 // Replace the old value with the new one. 877 ++NodesCombined; 878 DEBUG(dbgs() << "\nReplacing.2 "; 879 TLO.Old.getNode()->dump(&DAG); 880 dbgs() << "\nWith: "; 881 TLO.New.getNode()->dump(&DAG); 882 dbgs() << '\n'); 883 884 CommitTargetLoweringOpt(TLO); 885 return true; 886 } 887 888 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 889 SDLoc dl(Load); 890 EVT VT = Load->getValueType(0); 891 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 892 893 DEBUG(dbgs() << "\nReplacing.9 "; 894 Load->dump(&DAG); 895 dbgs() << "\nWith: "; 896 Trunc.getNode()->dump(&DAG); 897 dbgs() << '\n'); 898 WorklistRemover DeadNodes(*this); 899 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 900 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 901 deleteAndRecombine(Load); 902 AddToWorklist(Trunc.getNode()); 903 } 904 905 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 906 Replace = false; 907 SDLoc dl(Op); 908 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 909 EVT MemVT = LD->getMemoryVT(); 910 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 911 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 912 : ISD::EXTLOAD) 913 : LD->getExtensionType(); 914 Replace = true; 915 return DAG.getExtLoad(ExtType, dl, PVT, 916 LD->getChain(), LD->getBasePtr(), 917 MemVT, LD->getMemOperand()); 918 } 919 920 unsigned Opc = Op.getOpcode(); 921 switch (Opc) { 922 default: break; 923 case ISD::AssertSext: 924 return DAG.getNode(ISD::AssertSext, dl, PVT, 925 SExtPromoteOperand(Op.getOperand(0), PVT), 926 Op.getOperand(1)); 927 case ISD::AssertZext: 928 return DAG.getNode(ISD::AssertZext, dl, PVT, 929 ZExtPromoteOperand(Op.getOperand(0), PVT), 930 Op.getOperand(1)); 931 case ISD::Constant: { 932 unsigned ExtOpc = 933 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 934 return DAG.getNode(ExtOpc, dl, PVT, Op); 935 } 936 } 937 938 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 939 return SDValue(); 940 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 941 } 942 943 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 944 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 945 return SDValue(); 946 EVT OldVT = Op.getValueType(); 947 SDLoc dl(Op); 948 bool Replace = false; 949 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 950 if (!NewOp.getNode()) 951 return SDValue(); 952 AddToWorklist(NewOp.getNode()); 953 954 if (Replace) 955 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 956 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 957 DAG.getValueType(OldVT)); 958 } 959 960 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 961 EVT OldVT = Op.getValueType(); 962 SDLoc dl(Op); 963 bool Replace = false; 964 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 965 if (!NewOp.getNode()) 966 return SDValue(); 967 AddToWorklist(NewOp.getNode()); 968 969 if (Replace) 970 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 971 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 972 } 973 974 /// Promote the specified integer binary operation if the target indicates it is 975 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 976 /// i32 since i16 instructions are longer. 977 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 978 if (!LegalOperations) 979 return SDValue(); 980 981 EVT VT = Op.getValueType(); 982 if (VT.isVector() || !VT.isInteger()) 983 return SDValue(); 984 985 // If operation type is 'undesirable', e.g. i16 on x86, consider 986 // promoting it. 987 unsigned Opc = Op.getOpcode(); 988 if (TLI.isTypeDesirableForOp(Opc, VT)) 989 return SDValue(); 990 991 EVT PVT = VT; 992 // Consult target whether it is a good idea to promote this operation and 993 // what's the right type to promote it to. 994 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 995 assert(PVT != VT && "Don't know what type to promote to!"); 996 997 bool Replace0 = false; 998 SDValue N0 = Op.getOperand(0); 999 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 1000 if (!NN0.getNode()) 1001 return SDValue(); 1002 1003 bool Replace1 = false; 1004 SDValue N1 = Op.getOperand(1); 1005 SDValue NN1; 1006 if (N0 == N1) 1007 NN1 = NN0; 1008 else { 1009 NN1 = PromoteOperand(N1, PVT, Replace1); 1010 if (!NN1.getNode()) 1011 return SDValue(); 1012 } 1013 1014 AddToWorklist(NN0.getNode()); 1015 if (NN1.getNode()) 1016 AddToWorklist(NN1.getNode()); 1017 1018 if (Replace0) 1019 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 1020 if (Replace1) 1021 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 1022 1023 DEBUG(dbgs() << "\nPromoting "; 1024 Op.getNode()->dump(&DAG)); 1025 SDLoc dl(Op); 1026 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1027 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 1028 } 1029 return SDValue(); 1030 } 1031 1032 /// Promote the specified integer shift operation if the target indicates it is 1033 /// beneficial. e.g. On x86, it's usually better to promote i16 operations to 1034 /// i32 since i16 instructions are longer. 1035 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 1036 if (!LegalOperations) 1037 return SDValue(); 1038 1039 EVT VT = Op.getValueType(); 1040 if (VT.isVector() || !VT.isInteger()) 1041 return SDValue(); 1042 1043 // If operation type is 'undesirable', e.g. i16 on x86, consider 1044 // promoting it. 1045 unsigned Opc = Op.getOpcode(); 1046 if (TLI.isTypeDesirableForOp(Opc, VT)) 1047 return SDValue(); 1048 1049 EVT PVT = VT; 1050 // Consult target whether it is a good idea to promote this operation and 1051 // what's the right type to promote it to. 1052 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1053 assert(PVT != VT && "Don't know what type to promote to!"); 1054 1055 bool Replace = false; 1056 SDValue N0 = Op.getOperand(0); 1057 if (Opc == ISD::SRA) 1058 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 1059 else if (Opc == ISD::SRL) 1060 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 1061 else 1062 N0 = PromoteOperand(N0, PVT, Replace); 1063 if (!N0.getNode()) 1064 return SDValue(); 1065 1066 AddToWorklist(N0.getNode()); 1067 if (Replace) 1068 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 1069 1070 DEBUG(dbgs() << "\nPromoting "; 1071 Op.getNode()->dump(&DAG)); 1072 SDLoc dl(Op); 1073 return DAG.getNode(ISD::TRUNCATE, dl, VT, 1074 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 1075 } 1076 return SDValue(); 1077 } 1078 1079 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1080 if (!LegalOperations) 1081 return SDValue(); 1082 1083 EVT VT = Op.getValueType(); 1084 if (VT.isVector() || !VT.isInteger()) 1085 return SDValue(); 1086 1087 // If operation type is 'undesirable', e.g. i16 on x86, consider 1088 // promoting it. 1089 unsigned Opc = Op.getOpcode(); 1090 if (TLI.isTypeDesirableForOp(Opc, VT)) 1091 return SDValue(); 1092 1093 EVT PVT = VT; 1094 // Consult target whether it is a good idea to promote this operation and 1095 // what's the right type to promote it to. 1096 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1097 assert(PVT != VT && "Don't know what type to promote to!"); 1098 // fold (aext (aext x)) -> (aext x) 1099 // fold (aext (zext x)) -> (zext x) 1100 // fold (aext (sext x)) -> (sext x) 1101 DEBUG(dbgs() << "\nPromoting "; 1102 Op.getNode()->dump(&DAG)); 1103 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1104 } 1105 return SDValue(); 1106 } 1107 1108 bool DAGCombiner::PromoteLoad(SDValue Op) { 1109 if (!LegalOperations) 1110 return false; 1111 1112 EVT VT = Op.getValueType(); 1113 if (VT.isVector() || !VT.isInteger()) 1114 return false; 1115 1116 // If operation type is 'undesirable', e.g. i16 on x86, consider 1117 // promoting it. 1118 unsigned Opc = Op.getOpcode(); 1119 if (TLI.isTypeDesirableForOp(Opc, VT)) 1120 return false; 1121 1122 EVT PVT = VT; 1123 // Consult target whether it is a good idea to promote this operation and 1124 // what's the right type to promote it to. 1125 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1126 assert(PVT != VT && "Don't know what type to promote to!"); 1127 1128 SDLoc dl(Op); 1129 SDNode *N = Op.getNode(); 1130 LoadSDNode *LD = cast<LoadSDNode>(N); 1131 EVT MemVT = LD->getMemoryVT(); 1132 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1133 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, PVT, MemVT) ? ISD::ZEXTLOAD 1134 : ISD::EXTLOAD) 1135 : LD->getExtensionType(); 1136 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1137 LD->getChain(), LD->getBasePtr(), 1138 MemVT, LD->getMemOperand()); 1139 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1140 1141 DEBUG(dbgs() << "\nPromoting "; 1142 N->dump(&DAG); 1143 dbgs() << "\nTo: "; 1144 Result.getNode()->dump(&DAG); 1145 dbgs() << '\n'); 1146 WorklistRemover DeadNodes(*this); 1147 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1148 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1149 deleteAndRecombine(N); 1150 AddToWorklist(Result.getNode()); 1151 return true; 1152 } 1153 return false; 1154 } 1155 1156 /// \brief Recursively delete a node which has no uses and any operands for 1157 /// which it is the only use. 1158 /// 1159 /// Note that this both deletes the nodes and removes them from the worklist. 1160 /// It also adds any nodes who have had a user deleted to the worklist as they 1161 /// may now have only one use and subject to other combines. 1162 bool DAGCombiner::recursivelyDeleteUnusedNodes(SDNode *N) { 1163 if (!N->use_empty()) 1164 return false; 1165 1166 SmallSetVector<SDNode *, 16> Nodes; 1167 Nodes.insert(N); 1168 do { 1169 N = Nodes.pop_back_val(); 1170 if (!N) 1171 continue; 1172 1173 if (N->use_empty()) { 1174 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1175 Nodes.insert(N->getOperand(i).getNode()); 1176 1177 removeFromWorklist(N); 1178 DAG.DeleteNode(N); 1179 } else { 1180 AddToWorklist(N); 1181 } 1182 } while (!Nodes.empty()); 1183 return true; 1184 } 1185 1186 //===----------------------------------------------------------------------===// 1187 // Main DAG Combiner implementation 1188 //===----------------------------------------------------------------------===// 1189 1190 void DAGCombiner::Run(CombineLevel AtLevel) { 1191 // set the instance variables, so that the various visit routines may use it. 1192 Level = AtLevel; 1193 LegalOperations = Level >= AfterLegalizeVectorOps; 1194 LegalTypes = Level >= AfterLegalizeTypes; 1195 1196 // Add all the dag nodes to the worklist. 1197 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1198 E = DAG.allnodes_end(); I != E; ++I) 1199 AddToWorklist(I); 1200 1201 // Create a dummy node (which is not added to allnodes), that adds a reference 1202 // to the root node, preventing it from being deleted, and tracking any 1203 // changes of the root. 1204 HandleSDNode Dummy(DAG.getRoot()); 1205 1206 // while the worklist isn't empty, find a node and 1207 // try and combine it. 1208 while (!WorklistMap.empty()) { 1209 SDNode *N; 1210 // The Worklist holds the SDNodes in order, but it may contain null entries. 1211 do { 1212 N = Worklist.pop_back_val(); 1213 } while (!N); 1214 1215 bool GoodWorklistEntry = WorklistMap.erase(N); 1216 (void)GoodWorklistEntry; 1217 assert(GoodWorklistEntry && 1218 "Found a worklist entry without a corresponding map entry!"); 1219 1220 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1221 // N is deleted from the DAG, since they too may now be dead or may have a 1222 // reduced number of uses, allowing other xforms. 1223 if (recursivelyDeleteUnusedNodes(N)) 1224 continue; 1225 1226 WorklistRemover DeadNodes(*this); 1227 1228 // If this combine is running after legalizing the DAG, re-legalize any 1229 // nodes pulled off the worklist. 1230 if (Level == AfterLegalizeDAG) { 1231 SmallSetVector<SDNode *, 16> UpdatedNodes; 1232 bool NIsValid = DAG.LegalizeOp(N, UpdatedNodes); 1233 1234 for (SDNode *LN : UpdatedNodes) { 1235 AddToWorklist(LN); 1236 AddUsersToWorklist(LN); 1237 } 1238 if (!NIsValid) 1239 continue; 1240 } 1241 1242 DEBUG(dbgs() << "\nCombining: "; N->dump(&DAG)); 1243 1244 // Add any operands of the new node which have not yet been combined to the 1245 // worklist as well. Because the worklist uniques things already, this 1246 // won't repeatedly process the same operand. 1247 CombinedNodes.insert(N); 1248 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1249 if (!CombinedNodes.count(N->getOperand(i).getNode())) 1250 AddToWorklist(N->getOperand(i).getNode()); 1251 1252 SDValue RV = combine(N); 1253 1254 if (!RV.getNode()) 1255 continue; 1256 1257 ++NodesCombined; 1258 1259 // If we get back the same node we passed in, rather than a new node or 1260 // zero, we know that the node must have defined multiple values and 1261 // CombineTo was used. Since CombineTo takes care of the worklist 1262 // mechanics for us, we have no work to do in this case. 1263 if (RV.getNode() == N) 1264 continue; 1265 1266 assert(N->getOpcode() != ISD::DELETED_NODE && 1267 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1268 "Node was deleted but visit returned new node!"); 1269 1270 DEBUG(dbgs() << " ... into: "; 1271 RV.getNode()->dump(&DAG)); 1272 1273 // Transfer debug value. 1274 DAG.TransferDbgValues(SDValue(N, 0), RV); 1275 if (N->getNumValues() == RV.getNode()->getNumValues()) 1276 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1277 else { 1278 assert(N->getValueType(0) == RV.getValueType() && 1279 N->getNumValues() == 1 && "Type mismatch"); 1280 SDValue OpV = RV; 1281 DAG.ReplaceAllUsesWith(N, &OpV); 1282 } 1283 1284 // Push the new node and any users onto the worklist 1285 AddToWorklist(RV.getNode()); 1286 AddUsersToWorklist(RV.getNode()); 1287 1288 // Finally, if the node is now dead, remove it from the graph. The node 1289 // may not be dead if the replacement process recursively simplified to 1290 // something else needing this node. This will also take care of adding any 1291 // operands which have lost a user to the worklist. 1292 recursivelyDeleteUnusedNodes(N); 1293 } 1294 1295 // If the root changed (e.g. it was a dead load, update the root). 1296 DAG.setRoot(Dummy.getValue()); 1297 DAG.RemoveDeadNodes(); 1298 } 1299 1300 SDValue DAGCombiner::visit(SDNode *N) { 1301 switch (N->getOpcode()) { 1302 default: break; 1303 case ISD::TokenFactor: return visitTokenFactor(N); 1304 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1305 case ISD::ADD: return visitADD(N); 1306 case ISD::SUB: return visitSUB(N); 1307 case ISD::ADDC: return visitADDC(N); 1308 case ISD::SUBC: return visitSUBC(N); 1309 case ISD::ADDE: return visitADDE(N); 1310 case ISD::SUBE: return visitSUBE(N); 1311 case ISD::MUL: return visitMUL(N); 1312 case ISD::SDIV: return visitSDIV(N); 1313 case ISD::UDIV: return visitUDIV(N); 1314 case ISD::SREM: return visitSREM(N); 1315 case ISD::UREM: return visitUREM(N); 1316 case ISD::MULHU: return visitMULHU(N); 1317 case ISD::MULHS: return visitMULHS(N); 1318 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1319 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1320 case ISD::SMULO: return visitSMULO(N); 1321 case ISD::UMULO: return visitUMULO(N); 1322 case ISD::SDIVREM: return visitSDIVREM(N); 1323 case ISD::UDIVREM: return visitUDIVREM(N); 1324 case ISD::AND: return visitAND(N); 1325 case ISD::OR: return visitOR(N); 1326 case ISD::XOR: return visitXOR(N); 1327 case ISD::SHL: return visitSHL(N); 1328 case ISD::SRA: return visitSRA(N); 1329 case ISD::SRL: return visitSRL(N); 1330 case ISD::ROTR: 1331 case ISD::ROTL: return visitRotate(N); 1332 case ISD::CTLZ: return visitCTLZ(N); 1333 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1334 case ISD::CTTZ: return visitCTTZ(N); 1335 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1336 case ISD::CTPOP: return visitCTPOP(N); 1337 case ISD::SELECT: return visitSELECT(N); 1338 case ISD::VSELECT: return visitVSELECT(N); 1339 case ISD::SELECT_CC: return visitSELECT_CC(N); 1340 case ISD::SETCC: return visitSETCC(N); 1341 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1342 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1343 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1344 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1345 case ISD::TRUNCATE: return visitTRUNCATE(N); 1346 case ISD::BITCAST: return visitBITCAST(N); 1347 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1348 case ISD::FADD: return visitFADD(N); 1349 case ISD::FSUB: return visitFSUB(N); 1350 case ISD::FMUL: return visitFMUL(N); 1351 case ISD::FMA: return visitFMA(N); 1352 case ISD::FDIV: return visitFDIV(N); 1353 case ISD::FREM: return visitFREM(N); 1354 case ISD::FSQRT: return visitFSQRT(N); 1355 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1356 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1357 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1358 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1359 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1360 case ISD::FP_ROUND: return visitFP_ROUND(N); 1361 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1362 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1363 case ISD::FNEG: return visitFNEG(N); 1364 case ISD::FABS: return visitFABS(N); 1365 case ISD::FFLOOR: return visitFFLOOR(N); 1366 case ISD::FMINNUM: return visitFMINNUM(N); 1367 case ISD::FMAXNUM: return visitFMAXNUM(N); 1368 case ISD::FCEIL: return visitFCEIL(N); 1369 case ISD::FTRUNC: return visitFTRUNC(N); 1370 case ISD::BRCOND: return visitBRCOND(N); 1371 case ISD::BR_CC: return visitBR_CC(N); 1372 case ISD::LOAD: return visitLOAD(N); 1373 case ISD::STORE: return visitSTORE(N); 1374 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1375 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1376 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1377 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1378 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1379 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1380 case ISD::SCALAR_TO_VECTOR: return visitSCALAR_TO_VECTOR(N); 1381 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1382 case ISD::MLOAD: return visitMLOAD(N); 1383 case ISD::MSTORE: return visitMSTORE(N); 1384 case ISD::FP_TO_FP16: return visitFP_TO_FP16(N); 1385 } 1386 return SDValue(); 1387 } 1388 1389 SDValue DAGCombiner::combine(SDNode *N) { 1390 SDValue RV = visit(N); 1391 1392 // If nothing happened, try a target-specific DAG combine. 1393 if (!RV.getNode()) { 1394 assert(N->getOpcode() != ISD::DELETED_NODE && 1395 "Node was deleted but visit returned NULL!"); 1396 1397 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1398 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1399 1400 // Expose the DAG combiner to the target combiner impls. 1401 TargetLowering::DAGCombinerInfo 1402 DagCombineInfo(DAG, Level, false, this); 1403 1404 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1405 } 1406 } 1407 1408 // If nothing happened still, try promoting the operation. 1409 if (!RV.getNode()) { 1410 switch (N->getOpcode()) { 1411 default: break; 1412 case ISD::ADD: 1413 case ISD::SUB: 1414 case ISD::MUL: 1415 case ISD::AND: 1416 case ISD::OR: 1417 case ISD::XOR: 1418 RV = PromoteIntBinOp(SDValue(N, 0)); 1419 break; 1420 case ISD::SHL: 1421 case ISD::SRA: 1422 case ISD::SRL: 1423 RV = PromoteIntShiftOp(SDValue(N, 0)); 1424 break; 1425 case ISD::SIGN_EXTEND: 1426 case ISD::ZERO_EXTEND: 1427 case ISD::ANY_EXTEND: 1428 RV = PromoteExtend(SDValue(N, 0)); 1429 break; 1430 case ISD::LOAD: 1431 if (PromoteLoad(SDValue(N, 0))) 1432 RV = SDValue(N, 0); 1433 break; 1434 } 1435 } 1436 1437 // If N is a commutative binary node, try commuting it to enable more 1438 // sdisel CSE. 1439 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1440 N->getNumValues() == 1) { 1441 SDValue N0 = N->getOperand(0); 1442 SDValue N1 = N->getOperand(1); 1443 1444 // Constant operands are canonicalized to RHS. 1445 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1446 SDValue Ops[] = {N1, N0}; 1447 SDNode *CSENode; 1448 if (const BinaryWithFlagsSDNode *BinNode = 1449 dyn_cast<BinaryWithFlagsSDNode>(N)) { 1450 CSENode = DAG.getNodeIfExists( 1451 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1452 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1453 } else { 1454 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1455 } 1456 if (CSENode) 1457 return SDValue(CSENode, 0); 1458 } 1459 } 1460 1461 return RV; 1462 } 1463 1464 /// Given a node, return its input chain if it has one, otherwise return a null 1465 /// sd operand. 1466 static SDValue getInputChainForNode(SDNode *N) { 1467 if (unsigned NumOps = N->getNumOperands()) { 1468 if (N->getOperand(0).getValueType() == MVT::Other) 1469 return N->getOperand(0); 1470 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1471 return N->getOperand(NumOps-1); 1472 for (unsigned i = 1; i < NumOps-1; ++i) 1473 if (N->getOperand(i).getValueType() == MVT::Other) 1474 return N->getOperand(i); 1475 } 1476 return SDValue(); 1477 } 1478 1479 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1480 // If N has two operands, where one has an input chain equal to the other, 1481 // the 'other' chain is redundant. 1482 if (N->getNumOperands() == 2) { 1483 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1484 return N->getOperand(0); 1485 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1486 return N->getOperand(1); 1487 } 1488 1489 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1490 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1491 SmallPtrSet<SDNode*, 16> SeenOps; 1492 bool Changed = false; // If we should replace this token factor. 1493 1494 // Start out with this token factor. 1495 TFs.push_back(N); 1496 1497 // Iterate through token factors. The TFs grows when new token factors are 1498 // encountered. 1499 for (unsigned i = 0; i < TFs.size(); ++i) { 1500 SDNode *TF = TFs[i]; 1501 1502 // Check each of the operands. 1503 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1504 SDValue Op = TF->getOperand(i); 1505 1506 switch (Op.getOpcode()) { 1507 case ISD::EntryToken: 1508 // Entry tokens don't need to be added to the list. They are 1509 // redundant. 1510 Changed = true; 1511 break; 1512 1513 case ISD::TokenFactor: 1514 if (Op.hasOneUse() && 1515 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1516 // Queue up for processing. 1517 TFs.push_back(Op.getNode()); 1518 // Clean up in case the token factor is removed. 1519 AddToWorklist(Op.getNode()); 1520 Changed = true; 1521 break; 1522 } 1523 // Fall thru 1524 1525 default: 1526 // Only add if it isn't already in the list. 1527 if (SeenOps.insert(Op.getNode()).second) 1528 Ops.push_back(Op); 1529 else 1530 Changed = true; 1531 break; 1532 } 1533 } 1534 } 1535 1536 SDValue Result; 1537 1538 // If we've changed things around then replace token factor. 1539 if (Changed) { 1540 if (Ops.empty()) { 1541 // The entry token is the only possible outcome. 1542 Result = DAG.getEntryNode(); 1543 } else { 1544 // New and improved token factor. 1545 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1546 } 1547 1548 // Add users to worklist if AA is enabled, since it may introduce 1549 // a lot of new chained token factors while removing memory deps. 1550 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA 1551 : DAG.getSubtarget().useAA(); 1552 return CombineTo(N, Result, UseAA /*add to worklist*/); 1553 } 1554 1555 return Result; 1556 } 1557 1558 /// MERGE_VALUES can always be eliminated. 1559 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1560 WorklistRemover DeadNodes(*this); 1561 // Replacing results may cause a different MERGE_VALUES to suddenly 1562 // be CSE'd with N, and carry its uses with it. Iterate until no 1563 // uses remain, to ensure that the node can be safely deleted. 1564 // First add the users of this node to the work list so that they 1565 // can be tried again once they have new operands. 1566 AddUsersToWorklist(N); 1567 do { 1568 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1569 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1570 } while (!N->use_empty()); 1571 deleteAndRecombine(N); 1572 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1573 } 1574 1575 SDValue DAGCombiner::visitADD(SDNode *N) { 1576 SDValue N0 = N->getOperand(0); 1577 SDValue N1 = N->getOperand(1); 1578 EVT VT = N0.getValueType(); 1579 1580 // fold vector ops 1581 if (VT.isVector()) { 1582 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1583 return FoldedVOp; 1584 1585 // fold (add x, 0) -> x, vector edition 1586 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1587 return N0; 1588 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1589 return N1; 1590 } 1591 1592 // fold (add x, undef) -> undef 1593 if (N0.getOpcode() == ISD::UNDEF) 1594 return N0; 1595 if (N1.getOpcode() == ISD::UNDEF) 1596 return N1; 1597 // fold (add c1, c2) -> c1+c2 1598 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1599 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1600 if (N0C && N1C) 1601 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1602 // canonicalize constant to RHS 1603 if (isConstantIntBuildVectorOrConstantInt(N0) && 1604 !isConstantIntBuildVectorOrConstantInt(N1)) 1605 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1606 // fold (add x, 0) -> x 1607 if (N1C && N1C->isNullValue()) 1608 return N0; 1609 // fold (add Sym, c) -> Sym+c 1610 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1611 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1612 GA->getOpcode() == ISD::GlobalAddress) 1613 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1614 GA->getOffset() + 1615 (uint64_t)N1C->getSExtValue()); 1616 // fold ((c1-A)+c2) -> (c1+c2)-A 1617 if (N1C && N0.getOpcode() == ISD::SUB) 1618 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1619 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1620 DAG.getConstant(N1C->getAPIntValue()+ 1621 N0C->getAPIntValue(), VT), 1622 N0.getOperand(1)); 1623 // reassociate add 1624 if (SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1)) 1625 return RADD; 1626 // fold ((0-A) + B) -> B-A 1627 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1628 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1629 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1630 // fold (A + (0-B)) -> A-B 1631 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1632 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1633 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1634 // fold (A+(B-A)) -> B 1635 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1636 return N1.getOperand(0); 1637 // fold ((B-A)+A) -> B 1638 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1639 return N0.getOperand(0); 1640 // fold (A+(B-(A+C))) to (B-C) 1641 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1642 N0 == N1.getOperand(1).getOperand(0)) 1643 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1644 N1.getOperand(1).getOperand(1)); 1645 // fold (A+(B-(C+A))) to (B-C) 1646 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1647 N0 == N1.getOperand(1).getOperand(1)) 1648 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1649 N1.getOperand(1).getOperand(0)); 1650 // fold (A+((B-A)+or-C)) to (B+or-C) 1651 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1652 N1.getOperand(0).getOpcode() == ISD::SUB && 1653 N0 == N1.getOperand(0).getOperand(1)) 1654 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1655 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1656 1657 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1658 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1659 SDValue N00 = N0.getOperand(0); 1660 SDValue N01 = N0.getOperand(1); 1661 SDValue N10 = N1.getOperand(0); 1662 SDValue N11 = N1.getOperand(1); 1663 1664 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1665 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1666 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1667 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1668 } 1669 1670 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1671 return SDValue(N, 0); 1672 1673 // fold (a+b) -> (a|b) iff a and b share no bits. 1674 if (VT.isInteger() && !VT.isVector()) { 1675 APInt LHSZero, LHSOne; 1676 APInt RHSZero, RHSOne; 1677 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1678 1679 if (LHSZero.getBoolValue()) { 1680 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1681 1682 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1683 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1684 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ 1685 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) 1686 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1687 } 1688 } 1689 } 1690 1691 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1692 if (N1.getOpcode() == ISD::SHL && 1693 N1.getOperand(0).getOpcode() == ISD::SUB) 1694 if (ConstantSDNode *C = 1695 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1696 if (C->getAPIntValue() == 0) 1697 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1698 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1699 N1.getOperand(0).getOperand(1), 1700 N1.getOperand(1))); 1701 if (N0.getOpcode() == ISD::SHL && 1702 N0.getOperand(0).getOpcode() == ISD::SUB) 1703 if (ConstantSDNode *C = 1704 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1705 if (C->getAPIntValue() == 0) 1706 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1707 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1708 N0.getOperand(0).getOperand(1), 1709 N0.getOperand(1))); 1710 1711 if (N1.getOpcode() == ISD::AND) { 1712 SDValue AndOp0 = N1.getOperand(0); 1713 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1714 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1715 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1716 1717 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1718 // and similar xforms where the inner op is either ~0 or 0. 1719 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1720 SDLoc DL(N); 1721 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1722 } 1723 } 1724 1725 // add (sext i1), X -> sub X, (zext i1) 1726 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1727 N0.getOperand(0).getValueType() == MVT::i1 && 1728 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1729 SDLoc DL(N); 1730 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1731 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1732 } 1733 1734 // add X, (sextinreg Y i1) -> sub X, (and Y 1) 1735 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1736 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1737 if (TN->getVT() == MVT::i1) { 1738 SDLoc DL(N); 1739 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1740 DAG.getConstant(1, VT)); 1741 return DAG.getNode(ISD::SUB, DL, VT, N0, ZExt); 1742 } 1743 } 1744 1745 return SDValue(); 1746 } 1747 1748 SDValue DAGCombiner::visitADDC(SDNode *N) { 1749 SDValue N0 = N->getOperand(0); 1750 SDValue N1 = N->getOperand(1); 1751 EVT VT = N0.getValueType(); 1752 1753 // If the flag result is dead, turn this into an ADD. 1754 if (!N->hasAnyUseOfValue(1)) 1755 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1756 DAG.getNode(ISD::CARRY_FALSE, 1757 SDLoc(N), MVT::Glue)); 1758 1759 // canonicalize constant to RHS. 1760 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1761 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1762 if (N0C && !N1C) 1763 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1764 1765 // fold (addc x, 0) -> x + no carry out 1766 if (N1C && N1C->isNullValue()) 1767 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1768 SDLoc(N), MVT::Glue)); 1769 1770 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1771 APInt LHSZero, LHSOne; 1772 APInt RHSZero, RHSOne; 1773 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1774 1775 if (LHSZero.getBoolValue()) { 1776 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1777 1778 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1779 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1780 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1781 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1782 DAG.getNode(ISD::CARRY_FALSE, 1783 SDLoc(N), MVT::Glue)); 1784 } 1785 1786 return SDValue(); 1787 } 1788 1789 SDValue DAGCombiner::visitADDE(SDNode *N) { 1790 SDValue N0 = N->getOperand(0); 1791 SDValue N1 = N->getOperand(1); 1792 SDValue CarryIn = N->getOperand(2); 1793 1794 // canonicalize constant to RHS 1795 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1796 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1797 if (N0C && !N1C) 1798 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1799 N1, N0, CarryIn); 1800 1801 // fold (adde x, y, false) -> (addc x, y) 1802 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1803 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1804 1805 return SDValue(); 1806 } 1807 1808 // Since it may not be valid to emit a fold to zero for vector initializers 1809 // check if we can before folding. 1810 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1811 SelectionDAG &DAG, 1812 bool LegalOperations, bool LegalTypes) { 1813 if (!VT.isVector()) 1814 return DAG.getConstant(0, VT); 1815 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1816 return DAG.getConstant(0, VT); 1817 return SDValue(); 1818 } 1819 1820 SDValue DAGCombiner::visitSUB(SDNode *N) { 1821 SDValue N0 = N->getOperand(0); 1822 SDValue N1 = N->getOperand(1); 1823 EVT VT = N0.getValueType(); 1824 1825 // fold vector ops 1826 if (VT.isVector()) { 1827 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1828 return FoldedVOp; 1829 1830 // fold (sub x, 0) -> x, vector edition 1831 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1832 return N0; 1833 } 1834 1835 // fold (sub x, x) -> 0 1836 // FIXME: Refactor this and xor and other similar operations together. 1837 if (N0 == N1) 1838 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1839 // fold (sub c1, c2) -> c1-c2 1840 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1841 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1842 if (N0C && N1C) 1843 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1844 // fold (sub x, c) -> (add x, -c) 1845 if (N1C) 1846 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1847 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1848 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1849 if (N0C && N0C->isAllOnesValue()) 1850 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1851 // fold A-(A-B) -> B 1852 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1853 return N1.getOperand(1); 1854 // fold (A+B)-A -> B 1855 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1856 return N0.getOperand(1); 1857 // fold (A+B)-B -> A 1858 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1859 return N0.getOperand(0); 1860 // fold C2-(A+C1) -> (C2-C1)-A 1861 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1862 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1863 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1864 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1865 VT); 1866 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1867 N1.getOperand(0)); 1868 } 1869 // fold ((A+(B+or-C))-B) -> A+or-C 1870 if (N0.getOpcode() == ISD::ADD && 1871 (N0.getOperand(1).getOpcode() == ISD::SUB || 1872 N0.getOperand(1).getOpcode() == ISD::ADD) && 1873 N0.getOperand(1).getOperand(0) == N1) 1874 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1875 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1876 // fold ((A+(C+B))-B) -> A+C 1877 if (N0.getOpcode() == ISD::ADD && 1878 N0.getOperand(1).getOpcode() == ISD::ADD && 1879 N0.getOperand(1).getOperand(1) == N1) 1880 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1881 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1882 // fold ((A-(B-C))-C) -> A-B 1883 if (N0.getOpcode() == ISD::SUB && 1884 N0.getOperand(1).getOpcode() == ISD::SUB && 1885 N0.getOperand(1).getOperand(1) == N1) 1886 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1887 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1888 1889 // If either operand of a sub is undef, the result is undef 1890 if (N0.getOpcode() == ISD::UNDEF) 1891 return N0; 1892 if (N1.getOpcode() == ISD::UNDEF) 1893 return N1; 1894 1895 // If the relocation model supports it, consider symbol offsets. 1896 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1897 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1898 // fold (sub Sym, c) -> Sym-c 1899 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1900 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1901 GA->getOffset() - 1902 (uint64_t)N1C->getSExtValue()); 1903 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1904 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1905 if (GA->getGlobal() == GB->getGlobal()) 1906 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1907 VT); 1908 } 1909 1910 // sub X, (sextinreg Y i1) -> add X, (and Y 1) 1911 if (N1.getOpcode() == ISD::SIGN_EXTEND_INREG) { 1912 VTSDNode *TN = cast<VTSDNode>(N1.getOperand(1)); 1913 if (TN->getVT() == MVT::i1) { 1914 SDLoc DL(N); 1915 SDValue ZExt = DAG.getNode(ISD::AND, DL, VT, N1.getOperand(0), 1916 DAG.getConstant(1, VT)); 1917 return DAG.getNode(ISD::ADD, DL, VT, N0, ZExt); 1918 } 1919 } 1920 1921 return SDValue(); 1922 } 1923 1924 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1925 SDValue N0 = N->getOperand(0); 1926 SDValue N1 = N->getOperand(1); 1927 EVT VT = N0.getValueType(); 1928 1929 // If the flag result is dead, turn this into an SUB. 1930 if (!N->hasAnyUseOfValue(1)) 1931 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1932 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1933 MVT::Glue)); 1934 1935 // fold (subc x, x) -> 0 + no borrow 1936 if (N0 == N1) 1937 return CombineTo(N, DAG.getConstant(0, VT), 1938 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1939 MVT::Glue)); 1940 1941 // fold (subc x, 0) -> x + no borrow 1942 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1943 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1944 if (N1C && N1C->isNullValue()) 1945 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1946 MVT::Glue)); 1947 1948 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1949 if (N0C && N0C->isAllOnesValue()) 1950 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1951 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1952 MVT::Glue)); 1953 1954 return SDValue(); 1955 } 1956 1957 SDValue DAGCombiner::visitSUBE(SDNode *N) { 1958 SDValue N0 = N->getOperand(0); 1959 SDValue N1 = N->getOperand(1); 1960 SDValue CarryIn = N->getOperand(2); 1961 1962 // fold (sube x, y, false) -> (subc x, y) 1963 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1964 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1965 1966 return SDValue(); 1967 } 1968 1969 SDValue DAGCombiner::visitMUL(SDNode *N) { 1970 SDValue N0 = N->getOperand(0); 1971 SDValue N1 = N->getOperand(1); 1972 EVT VT = N0.getValueType(); 1973 1974 // fold (mul x, undef) -> 0 1975 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1976 return DAG.getConstant(0, VT); 1977 1978 bool N0IsConst = false; 1979 bool N1IsConst = false; 1980 APInt ConstValue0, ConstValue1; 1981 // fold vector ops 1982 if (VT.isVector()) { 1983 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 1984 return FoldedVOp; 1985 1986 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1987 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1988 } else { 1989 N0IsConst = isa<ConstantSDNode>(N0); 1990 if (N0IsConst) 1991 ConstValue0 = cast<ConstantSDNode>(N0)->getAPIntValue(); 1992 N1IsConst = isa<ConstantSDNode>(N1); 1993 if (N1IsConst) 1994 ConstValue1 = cast<ConstantSDNode>(N1)->getAPIntValue(); 1995 } 1996 1997 // fold (mul c1, c2) -> c1*c2 1998 if (N0IsConst && N1IsConst) 1999 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 2000 2001 // canonicalize constant to RHS (vector doesn't have to splat) 2002 if (isConstantIntBuildVectorOrConstantInt(N0) && 2003 !isConstantIntBuildVectorOrConstantInt(N1)) 2004 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 2005 // fold (mul x, 0) -> 0 2006 if (N1IsConst && ConstValue1 == 0) 2007 return N1; 2008 // We require a splat of the entire scalar bit width for non-contiguous 2009 // bit patterns. 2010 bool IsFullSplat = 2011 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 2012 // fold (mul x, 1) -> x 2013 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 2014 return N0; 2015 // fold (mul x, -1) -> 0-x 2016 if (N1IsConst && ConstValue1.isAllOnesValue()) 2017 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2018 DAG.getConstant(0, VT), N0); 2019 // fold (mul x, (1 << c)) -> x << c 2020 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 2021 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 2022 DAG.getConstant(ConstValue1.logBase2(), 2023 getShiftAmountTy(N0.getValueType()))); 2024 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 2025 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 2026 unsigned Log2Val = (-ConstValue1).logBase2(); 2027 // FIXME: If the input is something that is easily negated (e.g. a 2028 // single-use add), we should put the negate there. 2029 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2030 DAG.getConstant(0, VT), 2031 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 2032 DAG.getConstant(Log2Val, 2033 getShiftAmountTy(N0.getValueType())))); 2034 } 2035 2036 APInt Val; 2037 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 2038 if (N1IsConst && N0.getOpcode() == ISD::SHL && 2039 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2040 isa<ConstantSDNode>(N0.getOperand(1)))) { 2041 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 2042 N1, N0.getOperand(1)); 2043 AddToWorklist(C3.getNode()); 2044 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 2045 N0.getOperand(0), C3); 2046 } 2047 2048 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 2049 // use. 2050 { 2051 SDValue Sh(nullptr,0), Y(nullptr,0); 2052 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 2053 if (N0.getOpcode() == ISD::SHL && 2054 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2055 isa<ConstantSDNode>(N0.getOperand(1))) && 2056 N0.getNode()->hasOneUse()) { 2057 Sh = N0; Y = N1; 2058 } else if (N1.getOpcode() == ISD::SHL && 2059 isa<ConstantSDNode>(N1.getOperand(1)) && 2060 N1.getNode()->hasOneUse()) { 2061 Sh = N1; Y = N0; 2062 } 2063 2064 if (Sh.getNode()) { 2065 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2066 Sh.getOperand(0), Y); 2067 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 2068 Mul, Sh.getOperand(1)); 2069 } 2070 } 2071 2072 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 2073 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 2074 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 2075 isa<ConstantSDNode>(N0.getOperand(1)))) 2076 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 2077 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 2078 N0.getOperand(0), N1), 2079 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 2080 N0.getOperand(1), N1)); 2081 2082 // reassociate mul 2083 if (SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1)) 2084 return RMUL; 2085 2086 return SDValue(); 2087 } 2088 2089 SDValue DAGCombiner::visitSDIV(SDNode *N) { 2090 SDValue N0 = N->getOperand(0); 2091 SDValue N1 = N->getOperand(1); 2092 EVT VT = N->getValueType(0); 2093 2094 // fold vector ops 2095 if (VT.isVector()) 2096 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2097 return FoldedVOp; 2098 2099 // fold (sdiv c1, c2) -> c1/c2 2100 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2101 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2102 if (N0C && N1C && !N1C->isNullValue()) 2103 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 2104 // fold (sdiv X, 1) -> X 2105 if (N1C && N1C->getAPIntValue() == 1LL) 2106 return N0; 2107 // fold (sdiv X, -1) -> 0-X 2108 if (N1C && N1C->isAllOnesValue()) 2109 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2110 DAG.getConstant(0, VT), N0); 2111 // If we know the sign bits of both operands are zero, strength reduce to a 2112 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2113 if (!VT.isVector()) { 2114 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2115 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2116 N0, N1); 2117 } 2118 2119 // fold (sdiv X, pow2) -> simple ops after legalize 2120 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || 2121 (-N1C->getAPIntValue()).isPowerOf2())) { 2122 // If dividing by powers of two is cheap, then don't perform the following 2123 // fold. 2124 if (TLI.isPow2SDivCheap()) 2125 return SDValue(); 2126 2127 // Target-specific implementation of sdiv x, pow2. 2128 SDValue Res = BuildSDIVPow2(N); 2129 if (Res.getNode()) 2130 return Res; 2131 2132 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2133 2134 // Splat the sign bit into the register 2135 SDValue SGN = 2136 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2137 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2138 getShiftAmountTy(N0.getValueType()))); 2139 AddToWorklist(SGN.getNode()); 2140 2141 // Add (N0 < 0) ? abs2 - 1 : 0; 2142 SDValue SRL = 2143 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2144 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2145 getShiftAmountTy(SGN.getValueType()))); 2146 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2147 AddToWorklist(SRL.getNode()); 2148 AddToWorklist(ADD.getNode()); // Divide by pow2 2149 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2150 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2151 2152 // If we're dividing by a positive value, we're done. Otherwise, we must 2153 // negate the result. 2154 if (N1C->getAPIntValue().isNonNegative()) 2155 return SRA; 2156 2157 AddToWorklist(SRA.getNode()); 2158 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2159 } 2160 2161 // If integer divide is expensive and we satisfy the requirements, emit an 2162 // alternate sequence. 2163 if (N1C && !TLI.isIntDivCheap()) { 2164 SDValue Op = BuildSDIV(N); 2165 if (Op.getNode()) return Op; 2166 } 2167 2168 // undef / X -> 0 2169 if (N0.getOpcode() == ISD::UNDEF) 2170 return DAG.getConstant(0, VT); 2171 // X / undef -> undef 2172 if (N1.getOpcode() == ISD::UNDEF) 2173 return N1; 2174 2175 return SDValue(); 2176 } 2177 2178 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2179 SDValue N0 = N->getOperand(0); 2180 SDValue N1 = N->getOperand(1); 2181 EVT VT = N->getValueType(0); 2182 2183 // fold vector ops 2184 if (VT.isVector()) 2185 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2186 return FoldedVOp; 2187 2188 // fold (udiv c1, c2) -> c1/c2 2189 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2190 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2191 if (N0C && N1C && !N1C->isNullValue()) 2192 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2193 // fold (udiv x, (1 << c)) -> x >>u c 2194 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2195 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2196 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2197 getShiftAmountTy(N0.getValueType()))); 2198 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2199 if (N1.getOpcode() == ISD::SHL) { 2200 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2201 if (SHC->getAPIntValue().isPowerOf2()) { 2202 EVT ADDVT = N1.getOperand(1).getValueType(); 2203 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2204 N1.getOperand(1), 2205 DAG.getConstant(SHC->getAPIntValue() 2206 .logBase2(), 2207 ADDVT)); 2208 AddToWorklist(Add.getNode()); 2209 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2210 } 2211 } 2212 } 2213 // fold (udiv x, c) -> alternate 2214 if (N1C && !TLI.isIntDivCheap()) { 2215 SDValue Op = BuildUDIV(N); 2216 if (Op.getNode()) return Op; 2217 } 2218 2219 // undef / X -> 0 2220 if (N0.getOpcode() == ISD::UNDEF) 2221 return DAG.getConstant(0, VT); 2222 // X / undef -> undef 2223 if (N1.getOpcode() == ISD::UNDEF) 2224 return N1; 2225 2226 return SDValue(); 2227 } 2228 2229 SDValue DAGCombiner::visitSREM(SDNode *N) { 2230 SDValue N0 = N->getOperand(0); 2231 SDValue N1 = N->getOperand(1); 2232 EVT VT = N->getValueType(0); 2233 2234 // fold (srem c1, c2) -> c1%c2 2235 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2236 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2237 if (N0C && N1C && !N1C->isNullValue()) 2238 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2239 // If we know the sign bits of both operands are zero, strength reduce to a 2240 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2241 if (!VT.isVector()) { 2242 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2243 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2244 } 2245 2246 // If X/C can be simplified by the division-by-constant logic, lower 2247 // X%C to the equivalent of X-X/C*C. 2248 if (N1C && !N1C->isNullValue()) { 2249 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2250 AddToWorklist(Div.getNode()); 2251 SDValue OptimizedDiv = combine(Div.getNode()); 2252 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2253 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2254 OptimizedDiv, N1); 2255 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2256 AddToWorklist(Mul.getNode()); 2257 return Sub; 2258 } 2259 } 2260 2261 // undef % X -> 0 2262 if (N0.getOpcode() == ISD::UNDEF) 2263 return DAG.getConstant(0, VT); 2264 // X % undef -> undef 2265 if (N1.getOpcode() == ISD::UNDEF) 2266 return N1; 2267 2268 return SDValue(); 2269 } 2270 2271 SDValue DAGCombiner::visitUREM(SDNode *N) { 2272 SDValue N0 = N->getOperand(0); 2273 SDValue N1 = N->getOperand(1); 2274 EVT VT = N->getValueType(0); 2275 2276 // fold (urem c1, c2) -> c1%c2 2277 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2278 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2279 if (N0C && N1C && !N1C->isNullValue()) 2280 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2281 // fold (urem x, pow2) -> (and x, pow2-1) 2282 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2283 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2284 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2285 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2286 if (N1.getOpcode() == ISD::SHL) { 2287 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2288 if (SHC->getAPIntValue().isPowerOf2()) { 2289 SDValue Add = 2290 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2291 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2292 VT)); 2293 AddToWorklist(Add.getNode()); 2294 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2295 } 2296 } 2297 } 2298 2299 // If X/C can be simplified by the division-by-constant logic, lower 2300 // X%C to the equivalent of X-X/C*C. 2301 if (N1C && !N1C->isNullValue()) { 2302 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2303 AddToWorklist(Div.getNode()); 2304 SDValue OptimizedDiv = combine(Div.getNode()); 2305 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2306 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2307 OptimizedDiv, N1); 2308 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2309 AddToWorklist(Mul.getNode()); 2310 return Sub; 2311 } 2312 } 2313 2314 // undef % X -> 0 2315 if (N0.getOpcode() == ISD::UNDEF) 2316 return DAG.getConstant(0, VT); 2317 // X % undef -> undef 2318 if (N1.getOpcode() == ISD::UNDEF) 2319 return N1; 2320 2321 return SDValue(); 2322 } 2323 2324 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2325 SDValue N0 = N->getOperand(0); 2326 SDValue N1 = N->getOperand(1); 2327 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2328 EVT VT = N->getValueType(0); 2329 SDLoc DL(N); 2330 2331 // fold (mulhs x, 0) -> 0 2332 if (N1C && N1C->isNullValue()) 2333 return N1; 2334 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2335 if (N1C && N1C->getAPIntValue() == 1) 2336 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2337 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2338 getShiftAmountTy(N0.getValueType()))); 2339 // fold (mulhs x, undef) -> 0 2340 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2341 return DAG.getConstant(0, VT); 2342 2343 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2344 // plus a shift. 2345 if (VT.isSimple() && !VT.isVector()) { 2346 MVT Simple = VT.getSimpleVT(); 2347 unsigned SimpleSize = Simple.getSizeInBits(); 2348 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2349 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2350 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2351 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2352 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2353 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2354 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2355 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2356 } 2357 } 2358 2359 return SDValue(); 2360 } 2361 2362 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2363 SDValue N0 = N->getOperand(0); 2364 SDValue N1 = N->getOperand(1); 2365 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2366 EVT VT = N->getValueType(0); 2367 SDLoc DL(N); 2368 2369 // fold (mulhu x, 0) -> 0 2370 if (N1C && N1C->isNullValue()) 2371 return N1; 2372 // fold (mulhu x, 1) -> 0 2373 if (N1C && N1C->getAPIntValue() == 1) 2374 return DAG.getConstant(0, N0.getValueType()); 2375 // fold (mulhu x, undef) -> 0 2376 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2377 return DAG.getConstant(0, VT); 2378 2379 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2380 // plus a shift. 2381 if (VT.isSimple() && !VT.isVector()) { 2382 MVT Simple = VT.getSimpleVT(); 2383 unsigned SimpleSize = Simple.getSizeInBits(); 2384 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2385 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2386 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2387 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2388 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2389 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2390 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2391 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2392 } 2393 } 2394 2395 return SDValue(); 2396 } 2397 2398 /// Perform optimizations common to nodes that compute two values. LoOp and HiOp 2399 /// give the opcodes for the two computations that are being performed. Return 2400 /// true if a simplification was made. 2401 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2402 unsigned HiOp) { 2403 // If the high half is not needed, just compute the low half. 2404 bool HiExists = N->hasAnyUseOfValue(1); 2405 if (!HiExists && 2406 (!LegalOperations || 2407 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2408 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2409 return CombineTo(N, Res, Res); 2410 } 2411 2412 // If the low half is not needed, just compute the high half. 2413 bool LoExists = N->hasAnyUseOfValue(0); 2414 if (!LoExists && 2415 (!LegalOperations || 2416 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2417 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2418 return CombineTo(N, Res, Res); 2419 } 2420 2421 // If both halves are used, return as it is. 2422 if (LoExists && HiExists) 2423 return SDValue(); 2424 2425 // If the two computed results can be simplified separately, separate them. 2426 if (LoExists) { 2427 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->ops()); 2428 AddToWorklist(Lo.getNode()); 2429 SDValue LoOpt = combine(Lo.getNode()); 2430 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2431 (!LegalOperations || 2432 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2433 return CombineTo(N, LoOpt, LoOpt); 2434 } 2435 2436 if (HiExists) { 2437 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->ops()); 2438 AddToWorklist(Hi.getNode()); 2439 SDValue HiOpt = combine(Hi.getNode()); 2440 if (HiOpt.getNode() && HiOpt != Hi && 2441 (!LegalOperations || 2442 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2443 return CombineTo(N, HiOpt, HiOpt); 2444 } 2445 2446 return SDValue(); 2447 } 2448 2449 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2450 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2451 if (Res.getNode()) return Res; 2452 2453 EVT VT = N->getValueType(0); 2454 SDLoc DL(N); 2455 2456 // If the type is twice as wide is legal, transform the mulhu to a wider 2457 // multiply plus a shift. 2458 if (VT.isSimple() && !VT.isVector()) { 2459 MVT Simple = VT.getSimpleVT(); 2460 unsigned SimpleSize = Simple.getSizeInBits(); 2461 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2462 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2463 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2464 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2465 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2466 // Compute the high part as N1. 2467 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2468 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2469 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2470 // Compute the low part as N0. 2471 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2472 return CombineTo(N, Lo, Hi); 2473 } 2474 } 2475 2476 return SDValue(); 2477 } 2478 2479 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2480 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2481 if (Res.getNode()) return Res; 2482 2483 EVT VT = N->getValueType(0); 2484 SDLoc DL(N); 2485 2486 // If the type is twice as wide is legal, transform the mulhu to a wider 2487 // multiply plus a shift. 2488 if (VT.isSimple() && !VT.isVector()) { 2489 MVT Simple = VT.getSimpleVT(); 2490 unsigned SimpleSize = Simple.getSizeInBits(); 2491 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2492 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2493 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2494 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2495 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2496 // Compute the high part as N1. 2497 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2498 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2499 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2500 // Compute the low part as N0. 2501 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2502 return CombineTo(N, Lo, Hi); 2503 } 2504 } 2505 2506 return SDValue(); 2507 } 2508 2509 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2510 // (smulo x, 2) -> (saddo x, x) 2511 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2512 if (C2->getAPIntValue() == 2) 2513 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2514 N->getOperand(0), N->getOperand(0)); 2515 2516 return SDValue(); 2517 } 2518 2519 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2520 // (umulo x, 2) -> (uaddo x, x) 2521 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2522 if (C2->getAPIntValue() == 2) 2523 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2524 N->getOperand(0), N->getOperand(0)); 2525 2526 return SDValue(); 2527 } 2528 2529 SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2530 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2531 if (Res.getNode()) return Res; 2532 2533 return SDValue(); 2534 } 2535 2536 SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2537 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2538 if (Res.getNode()) return Res; 2539 2540 return SDValue(); 2541 } 2542 2543 /// If this is a binary operator with two operands of the same opcode, try to 2544 /// simplify it. 2545 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2546 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2547 EVT VT = N0.getValueType(); 2548 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2549 2550 // Bail early if none of these transforms apply. 2551 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2552 2553 // For each of OP in AND/OR/XOR: 2554 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2555 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2556 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2557 // fold (OP (bswap x), (bswap y)) -> (bswap (OP x, y)) 2558 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2559 // 2560 // do not sink logical op inside of a vector extend, since it may combine 2561 // into a vsetcc. 2562 EVT Op0VT = N0.getOperand(0).getValueType(); 2563 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2564 N0.getOpcode() == ISD::SIGN_EXTEND || 2565 N0.getOpcode() == ISD::BSWAP || 2566 // Avoid infinite looping with PromoteIntBinOp. 2567 (N0.getOpcode() == ISD::ANY_EXTEND && 2568 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2569 (N0.getOpcode() == ISD::TRUNCATE && 2570 (!TLI.isZExtFree(VT, Op0VT) || 2571 !TLI.isTruncateFree(Op0VT, VT)) && 2572 TLI.isTypeLegal(Op0VT))) && 2573 !VT.isVector() && 2574 Op0VT == N1.getOperand(0).getValueType() && 2575 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2576 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2577 N0.getOperand(0).getValueType(), 2578 N0.getOperand(0), N1.getOperand(0)); 2579 AddToWorklist(ORNode.getNode()); 2580 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2581 } 2582 2583 // For each of OP in SHL/SRL/SRA/AND... 2584 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2585 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2586 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2587 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2588 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2589 N0.getOperand(1) == N1.getOperand(1)) { 2590 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2591 N0.getOperand(0).getValueType(), 2592 N0.getOperand(0), N1.getOperand(0)); 2593 AddToWorklist(ORNode.getNode()); 2594 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2595 ORNode, N0.getOperand(1)); 2596 } 2597 2598 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2599 // Only perform this optimization after type legalization and before 2600 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2601 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2602 // we don't want to undo this promotion. 2603 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2604 // on scalars. 2605 if ((N0.getOpcode() == ISD::BITCAST || 2606 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2607 Level == AfterLegalizeTypes) { 2608 SDValue In0 = N0.getOperand(0); 2609 SDValue In1 = N1.getOperand(0); 2610 EVT In0Ty = In0.getValueType(); 2611 EVT In1Ty = In1.getValueType(); 2612 SDLoc DL(N); 2613 // If both incoming values are integers, and the original types are the 2614 // same. 2615 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2616 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2617 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2618 AddToWorklist(Op.getNode()); 2619 return BC; 2620 } 2621 } 2622 2623 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2624 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2625 // If both shuffles use the same mask, and both shuffle within a single 2626 // vector, then it is worthwhile to move the swizzle after the operation. 2627 // The type-legalizer generates this pattern when loading illegal 2628 // vector types from memory. In many cases this allows additional shuffle 2629 // optimizations. 2630 // There are other cases where moving the shuffle after the xor/and/or 2631 // is profitable even if shuffles don't perform a swizzle. 2632 // If both shuffles use the same mask, and both shuffles have the same first 2633 // or second operand, then it might still be profitable to move the shuffle 2634 // after the xor/and/or operation. 2635 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2636 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2637 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2638 2639 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2640 "Inputs to shuffles are not the same type"); 2641 2642 // Check that both shuffles use the same mask. The masks are known to be of 2643 // the same length because the result vector type is the same. 2644 // Check also that shuffles have only one use to avoid introducing extra 2645 // instructions. 2646 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2647 SVN0->getMask().equals(SVN1->getMask())) { 2648 SDValue ShOp = N0->getOperand(1); 2649 2650 // Don't try to fold this node if it requires introducing a 2651 // build vector of all zeros that might be illegal at this stage. 2652 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2653 if (!LegalTypes) 2654 ShOp = DAG.getConstant(0, VT); 2655 else 2656 ShOp = SDValue(); 2657 } 2658 2659 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2660 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2661 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2662 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2663 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2664 N0->getOperand(0), N1->getOperand(0)); 2665 AddToWorklist(NewNode.getNode()); 2666 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2667 &SVN0->getMask()[0]); 2668 } 2669 2670 // Don't try to fold this node if it requires introducing a 2671 // build vector of all zeros that might be illegal at this stage. 2672 ShOp = N0->getOperand(0); 2673 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2674 if (!LegalTypes) 2675 ShOp = DAG.getConstant(0, VT); 2676 else 2677 ShOp = SDValue(); 2678 } 2679 2680 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2681 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2682 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2683 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2684 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2685 N0->getOperand(1), N1->getOperand(1)); 2686 AddToWorklist(NewNode.getNode()); 2687 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2688 &SVN0->getMask()[0]); 2689 } 2690 } 2691 } 2692 2693 return SDValue(); 2694 } 2695 2696 /// This contains all DAGCombine rules which reduce two values combined by 2697 /// an And operation to a single value. This makes them reusable in the context 2698 /// of visitSELECT(). Rules involving constants are not included as 2699 /// visitSELECT() already handles those cases. 2700 SDValue DAGCombiner::visitANDLike(SDValue N0, SDValue N1, 2701 SDNode *LocReference) { 2702 EVT VT = N1.getValueType(); 2703 2704 // fold (and x, undef) -> 0 2705 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2706 return DAG.getConstant(0, VT); 2707 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2708 SDValue LL, LR, RL, RR, CC0, CC1; 2709 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2710 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2711 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2712 2713 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2714 LL.getValueType().isInteger()) { 2715 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2716 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2717 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2718 LR.getValueType(), LL, RL); 2719 AddToWorklist(ORNode.getNode()); 2720 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2721 } 2722 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2723 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2724 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2725 LR.getValueType(), LL, RL); 2726 AddToWorklist(ANDNode.getNode()); 2727 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 2728 } 2729 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2730 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2731 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2732 LR.getValueType(), LL, RL); 2733 AddToWorklist(ORNode.getNode()); 2734 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 2735 } 2736 } 2737 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2738 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2739 Op0 == Op1 && LL.getValueType().isInteger() && 2740 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2741 cast<ConstantSDNode>(RR)->isAllOnesValue()) || 2742 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2743 cast<ConstantSDNode>(RR)->isNullValue()))) { 2744 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2745 LL, DAG.getConstant(1, LL.getValueType())); 2746 AddToWorklist(ADDNode.getNode()); 2747 return DAG.getSetCC(SDLoc(LocReference), VT, ADDNode, 2748 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2749 } 2750 // canonicalize equivalent to ll == rl 2751 if (LL == RR && LR == RL) { 2752 Op1 = ISD::getSetCCSwappedOperands(Op1); 2753 std::swap(RL, RR); 2754 } 2755 if (LL == RL && LR == RR) { 2756 bool isInteger = LL.getValueType().isInteger(); 2757 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2758 if (Result != ISD::SETCC_INVALID && 2759 (!LegalOperations || 2760 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2761 TLI.isOperationLegal(ISD::SETCC, 2762 getSetCCResultType(N0.getSimpleValueType()))))) 2763 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 2764 LL, LR, Result); 2765 } 2766 } 2767 2768 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2769 VT.getSizeInBits() <= 64) { 2770 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2771 APInt ADDC = ADDI->getAPIntValue(); 2772 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2773 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2774 // immediate for an add, but it is legal if its top c2 bits are set, 2775 // transform the ADD so the immediate doesn't need to be materialized 2776 // in a register. 2777 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2778 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2779 SRLI->getZExtValue()); 2780 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2781 ADDC |= Mask; 2782 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2783 SDValue NewAdd = 2784 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 2785 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 2786 CombineTo(N0.getNode(), NewAdd); 2787 // Return N so it doesn't get rechecked! 2788 return SDValue(LocReference, 0); 2789 } 2790 } 2791 } 2792 } 2793 } 2794 } 2795 2796 return SDValue(); 2797 } 2798 2799 SDValue DAGCombiner::visitAND(SDNode *N) { 2800 SDValue N0 = N->getOperand(0); 2801 SDValue N1 = N->getOperand(1); 2802 EVT VT = N1.getValueType(); 2803 2804 // fold vector ops 2805 if (VT.isVector()) { 2806 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 2807 return FoldedVOp; 2808 2809 // fold (and x, 0) -> 0, vector edition 2810 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2811 // do not return N0, because undef node may exist in N0 2812 return DAG.getConstant( 2813 APInt::getNullValue( 2814 N0.getValueType().getScalarType().getSizeInBits()), 2815 N0.getValueType()); 2816 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2817 // do not return N1, because undef node may exist in N1 2818 return DAG.getConstant( 2819 APInt::getNullValue( 2820 N1.getValueType().getScalarType().getSizeInBits()), 2821 N1.getValueType()); 2822 2823 // fold (and x, -1) -> x, vector edition 2824 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2825 return N1; 2826 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2827 return N0; 2828 } 2829 2830 // fold (and c1, c2) -> c1&c2 2831 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2832 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2833 if (N0C && N1C) 2834 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2835 // canonicalize constant to RHS 2836 if (isConstantIntBuildVectorOrConstantInt(N0) && 2837 !isConstantIntBuildVectorOrConstantInt(N1)) 2838 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2839 // fold (and x, -1) -> x 2840 if (N1C && N1C->isAllOnesValue()) 2841 return N0; 2842 // if (and x, c) is known to be zero, return 0 2843 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2844 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2845 APInt::getAllOnesValue(BitWidth))) 2846 return DAG.getConstant(0, VT); 2847 // reassociate and 2848 if (SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1)) 2849 return RAND; 2850 // fold (and (or x, C), D) -> D if (C & D) == D 2851 if (N1C && N0.getOpcode() == ISD::OR) 2852 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2853 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2854 return N1; 2855 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2856 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2857 SDValue N0Op0 = N0.getOperand(0); 2858 APInt Mask = ~N1C->getAPIntValue(); 2859 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2860 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2861 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2862 N0.getValueType(), N0Op0); 2863 2864 // Replace uses of the AND with uses of the Zero extend node. 2865 CombineTo(N, Zext); 2866 2867 // We actually want to replace all uses of the any_extend with the 2868 // zero_extend, to avoid duplicating things. This will later cause this 2869 // AND to be folded. 2870 CombineTo(N0.getNode(), Zext); 2871 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2872 } 2873 } 2874 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2875 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2876 // already be zero by virtue of the width of the base type of the load. 2877 // 2878 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2879 // more cases. 2880 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2881 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2882 N0.getOpcode() == ISD::LOAD) { 2883 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2884 N0 : N0.getOperand(0) ); 2885 2886 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2887 // This can be a pure constant or a vector splat, in which case we treat the 2888 // vector as a scalar and use the splat value. 2889 APInt Constant = APInt::getNullValue(1); 2890 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2891 Constant = C->getAPIntValue(); 2892 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2893 APInt SplatValue, SplatUndef; 2894 unsigned SplatBitSize; 2895 bool HasAnyUndefs; 2896 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2897 SplatBitSize, HasAnyUndefs); 2898 if (IsSplat) { 2899 // Undef bits can contribute to a possible optimisation if set, so 2900 // set them. 2901 SplatValue |= SplatUndef; 2902 2903 // The splat value may be something like "0x00FFFFFF", which means 0 for 2904 // the first vector value and FF for the rest, repeating. We need a mask 2905 // that will apply equally to all members of the vector, so AND all the 2906 // lanes of the constant together. 2907 EVT VT = Vector->getValueType(0); 2908 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2909 2910 // If the splat value has been compressed to a bitlength lower 2911 // than the size of the vector lane, we need to re-expand it to 2912 // the lane size. 2913 if (BitWidth > SplatBitSize) 2914 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2915 SplatBitSize < BitWidth; 2916 SplatBitSize = SplatBitSize * 2) 2917 SplatValue |= SplatValue.shl(SplatBitSize); 2918 2919 // Make sure that variable 'Constant' is only set if 'SplatBitSize' is a 2920 // multiple of 'BitWidth'. Otherwise, we could propagate a wrong value. 2921 if (SplatBitSize % BitWidth == 0) { 2922 Constant = APInt::getAllOnesValue(BitWidth); 2923 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2924 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 2925 } 2926 } 2927 } 2928 2929 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2930 // actually legal and isn't going to get expanded, else this is a false 2931 // optimisation. 2932 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2933 Load->getValueType(0), 2934 Load->getMemoryVT()); 2935 2936 // Resize the constant to the same size as the original memory access before 2937 // extension. If it is still the AllOnesValue then this AND is completely 2938 // unneeded. 2939 Constant = 2940 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2941 2942 bool B; 2943 switch (Load->getExtensionType()) { 2944 default: B = false; break; 2945 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2946 case ISD::ZEXTLOAD: 2947 case ISD::NON_EXTLOAD: B = true; break; 2948 } 2949 2950 if (B && Constant.isAllOnesValue()) { 2951 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2952 // preserve semantics once we get rid of the AND. 2953 SDValue NewLoad(Load, 0); 2954 if (Load->getExtensionType() == ISD::EXTLOAD) { 2955 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2956 Load->getValueType(0), SDLoc(Load), 2957 Load->getChain(), Load->getBasePtr(), 2958 Load->getOffset(), Load->getMemoryVT(), 2959 Load->getMemOperand()); 2960 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2961 if (Load->getNumValues() == 3) { 2962 // PRE/POST_INC loads have 3 values. 2963 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2964 NewLoad.getValue(2) }; 2965 CombineTo(Load, To, 3, true); 2966 } else { 2967 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2968 } 2969 } 2970 2971 // Fold the AND away, taking care not to fold to the old load node if we 2972 // replaced it. 2973 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2974 2975 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2976 } 2977 } 2978 2979 // fold (and (load x), 255) -> (zextload x, i8) 2980 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2981 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2982 if (N1C && (N0.getOpcode() == ISD::LOAD || 2983 (N0.getOpcode() == ISD::ANY_EXTEND && 2984 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2985 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2986 LoadSDNode *LN0 = HasAnyExt 2987 ? cast<LoadSDNode>(N0.getOperand(0)) 2988 : cast<LoadSDNode>(N0); 2989 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2990 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2991 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2992 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2993 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2994 EVT LoadedVT = LN0->getMemoryVT(); 2995 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2996 2997 if (ExtVT == LoadedVT && 2998 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 2999 ExtVT))) { 3000 3001 SDValue NewLoad = 3002 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3003 LN0->getChain(), LN0->getBasePtr(), ExtVT, 3004 LN0->getMemOperand()); 3005 AddToWorklist(N); 3006 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 3007 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3008 } 3009 3010 // Do not change the width of a volatile load. 3011 // Do not generate loads of non-round integer types since these can 3012 // be expensive (and would be wrong if the type is not byte sized). 3013 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 3014 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, LoadResultTy, 3015 ExtVT))) { 3016 EVT PtrType = LN0->getOperand(1).getValueType(); 3017 3018 unsigned Alignment = LN0->getAlignment(); 3019 SDValue NewPtr = LN0->getBasePtr(); 3020 3021 // For big endian targets, we need to add an offset to the pointer 3022 // to load the correct bytes. For little endian systems, we merely 3023 // need to read fewer bytes from the same pointer. 3024 if (TLI.isBigEndian()) { 3025 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 3026 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 3027 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 3028 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 3029 NewPtr, DAG.getConstant(PtrOff, PtrType)); 3030 Alignment = MinAlign(Alignment, PtrOff); 3031 } 3032 3033 AddToWorklist(NewPtr.getNode()); 3034 3035 SDValue Load = 3036 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 3037 LN0->getChain(), NewPtr, 3038 LN0->getPointerInfo(), 3039 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 3040 LN0->isInvariant(), Alignment, LN0->getAAInfo()); 3041 AddToWorklist(N); 3042 CombineTo(LN0, Load, Load.getValue(1)); 3043 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3044 } 3045 } 3046 } 3047 } 3048 3049 if (SDValue Combined = visitANDLike(N0, N1, N)) 3050 return Combined; 3051 3052 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 3053 if (N0.getOpcode() == N1.getOpcode()) { 3054 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3055 if (Tmp.getNode()) return Tmp; 3056 } 3057 3058 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 3059 // fold (and (sra)) -> (and (srl)) when possible. 3060 if (!VT.isVector() && 3061 SimplifyDemandedBits(SDValue(N, 0))) 3062 return SDValue(N, 0); 3063 3064 // fold (zext_inreg (extload x)) -> (zextload x) 3065 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 3066 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3067 EVT MemVT = LN0->getMemoryVT(); 3068 // If we zero all the possible extended bits, then we can turn this into 3069 // a zextload if we are running before legalize or the operation is legal. 3070 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3071 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3072 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3073 ((!LegalOperations && !LN0->isVolatile()) || 3074 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3075 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3076 LN0->getChain(), LN0->getBasePtr(), 3077 MemVT, LN0->getMemOperand()); 3078 AddToWorklist(N); 3079 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3080 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3081 } 3082 } 3083 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 3084 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 3085 N0.hasOneUse()) { 3086 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 3087 EVT MemVT = LN0->getMemoryVT(); 3088 // If we zero all the possible extended bits, then we can turn this into 3089 // a zextload if we are running before legalize or the operation is legal. 3090 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 3091 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 3092 BitWidth - MemVT.getScalarType().getSizeInBits())) && 3093 ((!LegalOperations && !LN0->isVolatile()) || 3094 TLI.isLoadExtLegal(ISD::ZEXTLOAD, VT, MemVT))) { 3095 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 3096 LN0->getChain(), LN0->getBasePtr(), 3097 MemVT, LN0->getMemOperand()); 3098 AddToWorklist(N); 3099 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 3100 return SDValue(N, 0); // Return N so it doesn't get rechecked! 3101 } 3102 } 3103 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 3104 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 3105 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 3106 N0.getOperand(1), false); 3107 if (BSwap.getNode()) 3108 return BSwap; 3109 } 3110 3111 return SDValue(); 3112 } 3113 3114 /// Match (a >> 8) | (a << 8) as (bswap a) >> 16. 3115 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 3116 bool DemandHighBits) { 3117 if (!LegalOperations) 3118 return SDValue(); 3119 3120 EVT VT = N->getValueType(0); 3121 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 3122 return SDValue(); 3123 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3124 return SDValue(); 3125 3126 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 3127 bool LookPassAnd0 = false; 3128 bool LookPassAnd1 = false; 3129 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 3130 std::swap(N0, N1); 3131 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 3132 std::swap(N0, N1); 3133 if (N0.getOpcode() == ISD::AND) { 3134 if (!N0.getNode()->hasOneUse()) 3135 return SDValue(); 3136 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3137 if (!N01C || N01C->getZExtValue() != 0xFF00) 3138 return SDValue(); 3139 N0 = N0.getOperand(0); 3140 LookPassAnd0 = true; 3141 } 3142 3143 if (N1.getOpcode() == ISD::AND) { 3144 if (!N1.getNode()->hasOneUse()) 3145 return SDValue(); 3146 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3147 if (!N11C || N11C->getZExtValue() != 0xFF) 3148 return SDValue(); 3149 N1 = N1.getOperand(0); 3150 LookPassAnd1 = true; 3151 } 3152 3153 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3154 std::swap(N0, N1); 3155 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3156 return SDValue(); 3157 if (!N0.getNode()->hasOneUse() || 3158 !N1.getNode()->hasOneUse()) 3159 return SDValue(); 3160 3161 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3162 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3163 if (!N01C || !N11C) 3164 return SDValue(); 3165 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3166 return SDValue(); 3167 3168 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3169 SDValue N00 = N0->getOperand(0); 3170 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3171 if (!N00.getNode()->hasOneUse()) 3172 return SDValue(); 3173 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3174 if (!N001C || N001C->getZExtValue() != 0xFF) 3175 return SDValue(); 3176 N00 = N00.getOperand(0); 3177 LookPassAnd0 = true; 3178 } 3179 3180 SDValue N10 = N1->getOperand(0); 3181 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3182 if (!N10.getNode()->hasOneUse()) 3183 return SDValue(); 3184 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3185 if (!N101C || N101C->getZExtValue() != 0xFF00) 3186 return SDValue(); 3187 N10 = N10.getOperand(0); 3188 LookPassAnd1 = true; 3189 } 3190 3191 if (N00 != N10) 3192 return SDValue(); 3193 3194 // Make sure everything beyond the low halfword gets set to zero since the SRL 3195 // 16 will clear the top bits. 3196 unsigned OpSizeInBits = VT.getSizeInBits(); 3197 if (DemandHighBits && OpSizeInBits > 16) { 3198 // If the left-shift isn't masked out then the only way this is a bswap is 3199 // if all bits beyond the low 8 are 0. In that case the entire pattern 3200 // reduces to a left shift anyway: leave it for other parts of the combiner. 3201 if (!LookPassAnd0) 3202 return SDValue(); 3203 3204 // However, if the right shift isn't masked out then it might be because 3205 // it's not needed. See if we can spot that too. 3206 if (!LookPassAnd1 && 3207 !DAG.MaskedValueIsZero( 3208 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3209 return SDValue(); 3210 } 3211 3212 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3213 if (OpSizeInBits > 16) 3214 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3215 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3216 return Res; 3217 } 3218 3219 /// Return true if the specified node is an element that makes up a 32-bit 3220 /// packed halfword byteswap. 3221 /// ((x & 0x000000ff) << 8) | 3222 /// ((x & 0x0000ff00) >> 8) | 3223 /// ((x & 0x00ff0000) << 8) | 3224 /// ((x & 0xff000000) >> 8) 3225 static bool isBSwapHWordElement(SDValue N, MutableArrayRef<SDNode *> Parts) { 3226 if (!N.getNode()->hasOneUse()) 3227 return false; 3228 3229 unsigned Opc = N.getOpcode(); 3230 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3231 return false; 3232 3233 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3234 if (!N1C) 3235 return false; 3236 3237 unsigned Num; 3238 switch (N1C->getZExtValue()) { 3239 default: 3240 return false; 3241 case 0xFF: Num = 0; break; 3242 case 0xFF00: Num = 1; break; 3243 case 0xFF0000: Num = 2; break; 3244 case 0xFF000000: Num = 3; break; 3245 } 3246 3247 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3248 SDValue N0 = N.getOperand(0); 3249 if (Opc == ISD::AND) { 3250 if (Num == 0 || Num == 2) { 3251 // (x >> 8) & 0xff 3252 // (x >> 8) & 0xff0000 3253 if (N0.getOpcode() != ISD::SRL) 3254 return false; 3255 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3256 if (!C || C->getZExtValue() != 8) 3257 return false; 3258 } else { 3259 // (x << 8) & 0xff00 3260 // (x << 8) & 0xff000000 3261 if (N0.getOpcode() != ISD::SHL) 3262 return false; 3263 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3264 if (!C || C->getZExtValue() != 8) 3265 return false; 3266 } 3267 } else if (Opc == ISD::SHL) { 3268 // (x & 0xff) << 8 3269 // (x & 0xff0000) << 8 3270 if (Num != 0 && Num != 2) 3271 return false; 3272 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3273 if (!C || C->getZExtValue() != 8) 3274 return false; 3275 } else { // Opc == ISD::SRL 3276 // (x & 0xff00) >> 8 3277 // (x & 0xff000000) >> 8 3278 if (Num != 1 && Num != 3) 3279 return false; 3280 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3281 if (!C || C->getZExtValue() != 8) 3282 return false; 3283 } 3284 3285 if (Parts[Num]) 3286 return false; 3287 3288 Parts[Num] = N0.getOperand(0).getNode(); 3289 return true; 3290 } 3291 3292 /// Match a 32-bit packed halfword bswap. That is 3293 /// ((x & 0x000000ff) << 8) | 3294 /// ((x & 0x0000ff00) >> 8) | 3295 /// ((x & 0x00ff0000) << 8) | 3296 /// ((x & 0xff000000) >> 8) 3297 /// => (rotl (bswap x), 16) 3298 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3299 if (!LegalOperations) 3300 return SDValue(); 3301 3302 EVT VT = N->getValueType(0); 3303 if (VT != MVT::i32) 3304 return SDValue(); 3305 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3306 return SDValue(); 3307 3308 // Look for either 3309 // (or (or (and), (and)), (or (and), (and))) 3310 // (or (or (or (and), (and)), (and)), (and)) 3311 if (N0.getOpcode() != ISD::OR) 3312 return SDValue(); 3313 SDValue N00 = N0.getOperand(0); 3314 SDValue N01 = N0.getOperand(1); 3315 SDNode *Parts[4] = {}; 3316 3317 if (N1.getOpcode() == ISD::OR && 3318 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3319 // (or (or (and), (and)), (or (and), (and))) 3320 SDValue N000 = N00.getOperand(0); 3321 if (!isBSwapHWordElement(N000, Parts)) 3322 return SDValue(); 3323 3324 SDValue N001 = N00.getOperand(1); 3325 if (!isBSwapHWordElement(N001, Parts)) 3326 return SDValue(); 3327 SDValue N010 = N01.getOperand(0); 3328 if (!isBSwapHWordElement(N010, Parts)) 3329 return SDValue(); 3330 SDValue N011 = N01.getOperand(1); 3331 if (!isBSwapHWordElement(N011, Parts)) 3332 return SDValue(); 3333 } else { 3334 // (or (or (or (and), (and)), (and)), (and)) 3335 if (!isBSwapHWordElement(N1, Parts)) 3336 return SDValue(); 3337 if (!isBSwapHWordElement(N01, Parts)) 3338 return SDValue(); 3339 if (N00.getOpcode() != ISD::OR) 3340 return SDValue(); 3341 SDValue N000 = N00.getOperand(0); 3342 if (!isBSwapHWordElement(N000, Parts)) 3343 return SDValue(); 3344 SDValue N001 = N00.getOperand(1); 3345 if (!isBSwapHWordElement(N001, Parts)) 3346 return SDValue(); 3347 } 3348 3349 // Make sure the parts are all coming from the same node. 3350 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3351 return SDValue(); 3352 3353 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3354 SDValue(Parts[0],0)); 3355 3356 // Result of the bswap should be rotated by 16. If it's not legal, then 3357 // do (x << 16) | (x >> 16). 3358 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3359 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3360 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3361 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3362 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3363 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3364 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3365 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3366 } 3367 3368 /// This contains all DAGCombine rules which reduce two values combined by 3369 /// an Or operation to a single value \see visitANDLike(). 3370 SDValue DAGCombiner::visitORLike(SDValue N0, SDValue N1, SDNode *LocReference) { 3371 EVT VT = N1.getValueType(); 3372 // fold (or x, undef) -> -1 3373 if (!LegalOperations && 3374 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3375 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3376 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3377 } 3378 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3379 SDValue LL, LR, RL, RR, CC0, CC1; 3380 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3381 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3382 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3383 3384 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3385 LL.getValueType().isInteger()) { 3386 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3387 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3388 if (cast<ConstantSDNode>(LR)->isNullValue() && 3389 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3390 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3391 LR.getValueType(), LL, RL); 3392 AddToWorklist(ORNode.getNode()); 3393 return DAG.getSetCC(SDLoc(LocReference), VT, ORNode, LR, Op1); 3394 } 3395 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3396 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3397 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3398 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3399 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3400 LR.getValueType(), LL, RL); 3401 AddToWorklist(ANDNode.getNode()); 3402 return DAG.getSetCC(SDLoc(LocReference), VT, ANDNode, LR, Op1); 3403 } 3404 } 3405 // canonicalize equivalent to ll == rl 3406 if (LL == RR && LR == RL) { 3407 Op1 = ISD::getSetCCSwappedOperands(Op1); 3408 std::swap(RL, RR); 3409 } 3410 if (LL == RL && LR == RR) { 3411 bool isInteger = LL.getValueType().isInteger(); 3412 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3413 if (Result != ISD::SETCC_INVALID && 3414 (!LegalOperations || 3415 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3416 TLI.isOperationLegal(ISD::SETCC, 3417 getSetCCResultType(N0.getValueType()))))) 3418 return DAG.getSetCC(SDLoc(LocReference), N0.getValueType(), 3419 LL, LR, Result); 3420 } 3421 } 3422 3423 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3424 if (N0.getOpcode() == ISD::AND && 3425 N1.getOpcode() == ISD::AND && 3426 N0.getOperand(1).getOpcode() == ISD::Constant && 3427 N1.getOperand(1).getOpcode() == ISD::Constant && 3428 // Don't increase # computations. 3429 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3430 // We can only do this xform if we know that bits from X that are set in C2 3431 // but not in C1 are already zero. Likewise for Y. 3432 const APInt &LHSMask = 3433 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3434 const APInt &RHSMask = 3435 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3436 3437 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3438 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3439 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3440 N0.getOperand(0), N1.getOperand(0)); 3441 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, X, 3442 DAG.getConstant(LHSMask | RHSMask, VT)); 3443 } 3444 } 3445 3446 // (or (and X, M), (and X, N)) -> (and X, (or M, N)) 3447 if (N0.getOpcode() == ISD::AND && 3448 N1.getOpcode() == ISD::AND && 3449 N0.getOperand(0) == N1.getOperand(0) && 3450 // Don't increase # computations. 3451 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3452 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3453 N0.getOperand(1), N1.getOperand(1)); 3454 return DAG.getNode(ISD::AND, SDLoc(LocReference), VT, N0.getOperand(0), X); 3455 } 3456 3457 return SDValue(); 3458 } 3459 3460 SDValue DAGCombiner::visitOR(SDNode *N) { 3461 SDValue N0 = N->getOperand(0); 3462 SDValue N1 = N->getOperand(1); 3463 EVT VT = N1.getValueType(); 3464 3465 // fold vector ops 3466 if (VT.isVector()) { 3467 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3468 return FoldedVOp; 3469 3470 // fold (or x, 0) -> x, vector edition 3471 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3472 return N1; 3473 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3474 return N0; 3475 3476 // fold (or x, -1) -> -1, vector edition 3477 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3478 // do not return N0, because undef node may exist in N0 3479 return DAG.getConstant( 3480 APInt::getAllOnesValue( 3481 N0.getValueType().getScalarType().getSizeInBits()), 3482 N0.getValueType()); 3483 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3484 // do not return N1, because undef node may exist in N1 3485 return DAG.getConstant( 3486 APInt::getAllOnesValue( 3487 N1.getValueType().getScalarType().getSizeInBits()), 3488 N1.getValueType()); 3489 3490 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3491 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3492 // Do this only if the resulting shuffle is legal. 3493 if (isa<ShuffleVectorSDNode>(N0) && 3494 isa<ShuffleVectorSDNode>(N1) && 3495 // Avoid folding a node with illegal type. 3496 TLI.isTypeLegal(VT) && 3497 N0->getOperand(1) == N1->getOperand(1) && 3498 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3499 bool CanFold = true; 3500 unsigned NumElts = VT.getVectorNumElements(); 3501 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3502 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3503 // We construct two shuffle masks: 3504 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3505 // and N1 as the second operand. 3506 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3507 // and N0 as the second operand. 3508 // We do this because OR is commutable and therefore there might be 3509 // two ways to fold this node into a shuffle. 3510 SmallVector<int,4> Mask1; 3511 SmallVector<int,4> Mask2; 3512 3513 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3514 int M0 = SV0->getMaskElt(i); 3515 int M1 = SV1->getMaskElt(i); 3516 3517 // Both shuffle indexes are undef. Propagate Undef. 3518 if (M0 < 0 && M1 < 0) { 3519 Mask1.push_back(M0); 3520 Mask2.push_back(M0); 3521 continue; 3522 } 3523 3524 if (M0 < 0 || M1 < 0 || 3525 (M0 < (int)NumElts && M1 < (int)NumElts) || 3526 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3527 CanFold = false; 3528 break; 3529 } 3530 3531 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3532 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3533 } 3534 3535 if (CanFold) { 3536 // Fold this sequence only if the resulting shuffle is 'legal'. 3537 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3538 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3539 N1->getOperand(0), &Mask1[0]); 3540 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3541 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3542 N0->getOperand(0), &Mask2[0]); 3543 } 3544 } 3545 } 3546 3547 // fold (or c1, c2) -> c1|c2 3548 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3549 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3550 if (N0C && N1C) 3551 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3552 // canonicalize constant to RHS 3553 if (isConstantIntBuildVectorOrConstantInt(N0) && 3554 !isConstantIntBuildVectorOrConstantInt(N1)) 3555 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3556 // fold (or x, 0) -> x 3557 if (N1C && N1C->isNullValue()) 3558 return N0; 3559 // fold (or x, -1) -> -1 3560 if (N1C && N1C->isAllOnesValue()) 3561 return N1; 3562 // fold (or x, c) -> c iff (x & ~c) == 0 3563 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3564 return N1; 3565 3566 if (SDValue Combined = visitORLike(N0, N1, N)) 3567 return Combined; 3568 3569 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3570 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3571 if (BSwap.getNode()) 3572 return BSwap; 3573 BSwap = MatchBSwapHWordLow(N, N0, N1); 3574 if (BSwap.getNode()) 3575 return BSwap; 3576 3577 // reassociate or 3578 if (SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1)) 3579 return ROR; 3580 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3581 // iff (c1 & c2) == 0. 3582 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3583 isa<ConstantSDNode>(N0.getOperand(1))) { 3584 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3585 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3586 if (SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)) 3587 return DAG.getNode( 3588 ISD::AND, SDLoc(N), VT, 3589 DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), COR); 3590 return SDValue(); 3591 } 3592 } 3593 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3594 if (N0.getOpcode() == N1.getOpcode()) { 3595 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3596 if (Tmp.getNode()) return Tmp; 3597 } 3598 3599 // See if this is some rotate idiom. 3600 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3601 return SDValue(Rot, 0); 3602 3603 // Simplify the operands using demanded-bits information. 3604 if (!VT.isVector() && 3605 SimplifyDemandedBits(SDValue(N, 0))) 3606 return SDValue(N, 0); 3607 3608 return SDValue(); 3609 } 3610 3611 /// Match "(X shl/srl V1) & V2" where V2 may not be present. 3612 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3613 if (Op.getOpcode() == ISD::AND) { 3614 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3615 Mask = Op.getOperand(1); 3616 Op = Op.getOperand(0); 3617 } else { 3618 return false; 3619 } 3620 } 3621 3622 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3623 Shift = Op; 3624 return true; 3625 } 3626 3627 return false; 3628 } 3629 3630 // Return true if we can prove that, whenever Neg and Pos are both in the 3631 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3632 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3633 // 3634 // (or (shift1 X, Neg), (shift2 X, Pos)) 3635 // 3636 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3637 // in direction shift1 by Neg. The range [0, OpSize) means that we only need 3638 // to consider shift amounts with defined behavior. 3639 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3640 // If OpSize is a power of 2 then: 3641 // 3642 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3643 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3644 // 3645 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3646 // for the stronger condition: 3647 // 3648 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3649 // 3650 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3651 // we can just replace Neg with Neg' for the rest of the function. 3652 // 3653 // In other cases we check for the even stronger condition: 3654 // 3655 // Neg == OpSize - Pos [B] 3656 // 3657 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3658 // behavior if Pos == 0 (and consequently Neg == OpSize). 3659 // 3660 // We could actually use [A] whenever OpSize is a power of 2, but the 3661 // only extra cases that it would match are those uninteresting ones 3662 // where Neg and Pos are never in range at the same time. E.g. for 3663 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3664 // as well as (sub 32, Pos), but: 3665 // 3666 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3667 // 3668 // always invokes undefined behavior for 32-bit X. 3669 // 3670 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3671 unsigned MaskLoBits = 0; 3672 if (Neg.getOpcode() == ISD::AND && 3673 isPowerOf2_64(OpSize) && 3674 Neg.getOperand(1).getOpcode() == ISD::Constant && 3675 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3676 Neg = Neg.getOperand(0); 3677 MaskLoBits = Log2_64(OpSize); 3678 } 3679 3680 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3681 if (Neg.getOpcode() != ISD::SUB) 3682 return 0; 3683 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3684 if (!NegC) 3685 return 0; 3686 SDValue NegOp1 = Neg.getOperand(1); 3687 3688 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3689 // Pos'. The truncation is redundant for the purpose of the equality. 3690 if (MaskLoBits && 3691 Pos.getOpcode() == ISD::AND && 3692 Pos.getOperand(1).getOpcode() == ISD::Constant && 3693 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3694 Pos = Pos.getOperand(0); 3695 3696 // The condition we need is now: 3697 // 3698 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3699 // 3700 // If NegOp1 == Pos then we need: 3701 // 3702 // OpSize & Mask == NegC & Mask 3703 // 3704 // (because "x & Mask" is a truncation and distributes through subtraction). 3705 APInt Width; 3706 if (Pos == NegOp1) 3707 Width = NegC->getAPIntValue(); 3708 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3709 // Then the condition we want to prove becomes: 3710 // 3711 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3712 // 3713 // which, again because "x & Mask" is a truncation, becomes: 3714 // 3715 // NegC & Mask == (OpSize - PosC) & Mask 3716 // OpSize & Mask == (NegC + PosC) & Mask 3717 else if (Pos.getOpcode() == ISD::ADD && 3718 Pos.getOperand(0) == NegOp1 && 3719 Pos.getOperand(1).getOpcode() == ISD::Constant) 3720 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3721 NegC->getAPIntValue()); 3722 else 3723 return false; 3724 3725 // Now we just need to check that OpSize & Mask == Width & Mask. 3726 if (MaskLoBits) 3727 // Opsize & Mask is 0 since Mask is Opsize - 1. 3728 return Width.getLoBits(MaskLoBits) == 0; 3729 return Width == OpSize; 3730 } 3731 3732 // A subroutine of MatchRotate used once we have found an OR of two opposite 3733 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3734 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3735 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3736 // Neg with outer conversions stripped away. 3737 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3738 SDValue Neg, SDValue InnerPos, 3739 SDValue InnerNeg, unsigned PosOpcode, 3740 unsigned NegOpcode, SDLoc DL) { 3741 // fold (or (shl x, (*ext y)), 3742 // (srl x, (*ext (sub 32, y)))) -> 3743 // (rotl x, y) or (rotr x, (sub 32, y)) 3744 // 3745 // fold (or (shl x, (*ext (sub 32, y))), 3746 // (srl x, (*ext y))) -> 3747 // (rotr x, y) or (rotl x, (sub 32, y)) 3748 EVT VT = Shifted.getValueType(); 3749 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3750 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3751 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3752 HasPos ? Pos : Neg).getNode(); 3753 } 3754 3755 return nullptr; 3756 } 3757 3758 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 3759 // idioms for rotate, and if the target supports rotation instructions, generate 3760 // a rot[lr]. 3761 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3762 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3763 EVT VT = LHS.getValueType(); 3764 if (!TLI.isTypeLegal(VT)) return nullptr; 3765 3766 // The target must have at least one rotate flavor. 3767 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3768 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3769 if (!HasROTL && !HasROTR) return nullptr; 3770 3771 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3772 SDValue LHSShift; // The shift. 3773 SDValue LHSMask; // AND value if any. 3774 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3775 return nullptr; // Not part of a rotate. 3776 3777 SDValue RHSShift; // The shift. 3778 SDValue RHSMask; // AND value if any. 3779 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3780 return nullptr; // Not part of a rotate. 3781 3782 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3783 return nullptr; // Not shifting the same value. 3784 3785 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3786 return nullptr; // Shifts must disagree. 3787 3788 // Canonicalize shl to left side in a shl/srl pair. 3789 if (RHSShift.getOpcode() == ISD::SHL) { 3790 std::swap(LHS, RHS); 3791 std::swap(LHSShift, RHSShift); 3792 std::swap(LHSMask , RHSMask ); 3793 } 3794 3795 unsigned OpSizeInBits = VT.getSizeInBits(); 3796 SDValue LHSShiftArg = LHSShift.getOperand(0); 3797 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3798 SDValue RHSShiftArg = RHSShift.getOperand(0); 3799 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3800 3801 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3802 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3803 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3804 RHSShiftAmt.getOpcode() == ISD::Constant) { 3805 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3806 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3807 if ((LShVal + RShVal) != OpSizeInBits) 3808 return nullptr; 3809 3810 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3811 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3812 3813 // If there is an AND of either shifted operand, apply it to the result. 3814 if (LHSMask.getNode() || RHSMask.getNode()) { 3815 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3816 3817 if (LHSMask.getNode()) { 3818 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3819 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3820 } 3821 if (RHSMask.getNode()) { 3822 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3823 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3824 } 3825 3826 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3827 } 3828 3829 return Rot.getNode(); 3830 } 3831 3832 // If there is a mask here, and we have a variable shift, we can't be sure 3833 // that we're masking out the right stuff. 3834 if (LHSMask.getNode() || RHSMask.getNode()) 3835 return nullptr; 3836 3837 // If the shift amount is sign/zext/any-extended just peel it off. 3838 SDValue LExtOp0 = LHSShiftAmt; 3839 SDValue RExtOp0 = RHSShiftAmt; 3840 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3841 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3842 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3843 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3844 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3845 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3846 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3847 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3848 LExtOp0 = LHSShiftAmt.getOperand(0); 3849 RExtOp0 = RHSShiftAmt.getOperand(0); 3850 } 3851 3852 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3853 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3854 if (TryL) 3855 return TryL; 3856 3857 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3858 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3859 if (TryR) 3860 return TryR; 3861 3862 return nullptr; 3863 } 3864 3865 SDValue DAGCombiner::visitXOR(SDNode *N) { 3866 SDValue N0 = N->getOperand(0); 3867 SDValue N1 = N->getOperand(1); 3868 EVT VT = N0.getValueType(); 3869 3870 // fold vector ops 3871 if (VT.isVector()) { 3872 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 3873 return FoldedVOp; 3874 3875 // fold (xor x, 0) -> x, vector edition 3876 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3877 return N1; 3878 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3879 return N0; 3880 } 3881 3882 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3883 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3884 return DAG.getConstant(0, VT); 3885 // fold (xor x, undef) -> undef 3886 if (N0.getOpcode() == ISD::UNDEF) 3887 return N0; 3888 if (N1.getOpcode() == ISD::UNDEF) 3889 return N1; 3890 // fold (xor c1, c2) -> c1^c2 3891 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3892 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3893 if (N0C && N1C) 3894 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3895 // canonicalize constant to RHS 3896 if (isConstantIntBuildVectorOrConstantInt(N0) && 3897 !isConstantIntBuildVectorOrConstantInt(N1)) 3898 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3899 // fold (xor x, 0) -> x 3900 if (N1C && N1C->isNullValue()) 3901 return N0; 3902 // reassociate xor 3903 if (SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1)) 3904 return RXOR; 3905 3906 // fold !(x cc y) -> (x !cc y) 3907 SDValue LHS, RHS, CC; 3908 if (TLI.isConstTrueVal(N1.getNode()) && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3909 bool isInt = LHS.getValueType().isInteger(); 3910 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3911 isInt); 3912 3913 if (!LegalOperations || 3914 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3915 switch (N0.getOpcode()) { 3916 default: 3917 llvm_unreachable("Unhandled SetCC Equivalent!"); 3918 case ISD::SETCC: 3919 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3920 case ISD::SELECT_CC: 3921 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3922 N0.getOperand(3), NotCC); 3923 } 3924 } 3925 } 3926 3927 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3928 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3929 N0.getNode()->hasOneUse() && 3930 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3931 SDValue V = N0.getOperand(0); 3932 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3933 DAG.getConstant(1, V.getValueType())); 3934 AddToWorklist(V.getNode()); 3935 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3936 } 3937 3938 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3939 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3940 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3941 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3942 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3943 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3944 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3945 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3946 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3947 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3948 } 3949 } 3950 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3951 if (N1C && N1C->isAllOnesValue() && 3952 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3953 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3954 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3955 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3956 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3957 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3958 AddToWorklist(LHS.getNode()); AddToWorklist(RHS.getNode()); 3959 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3960 } 3961 } 3962 // fold (xor (and x, y), y) -> (and (not x), y) 3963 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3964 N0->getOperand(1) == N1) { 3965 SDValue X = N0->getOperand(0); 3966 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3967 AddToWorklist(NotX.getNode()); 3968 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3969 } 3970 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3971 if (N1C && N0.getOpcode() == ISD::XOR) { 3972 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3973 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3974 if (N00C) 3975 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3976 DAG.getConstant(N1C->getAPIntValue() ^ 3977 N00C->getAPIntValue(), VT)); 3978 if (N01C) 3979 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3980 DAG.getConstant(N1C->getAPIntValue() ^ 3981 N01C->getAPIntValue(), VT)); 3982 } 3983 // fold (xor x, x) -> 0 3984 if (N0 == N1) 3985 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3986 3987 // fold (xor (shl 1, x), -1) -> (rotl ~1, x) 3988 // Here is a concrete example of this equivalence: 3989 // i16 x == 14 3990 // i16 shl == 1 << 14 == 16384 == 0b0100000000000000 3991 // i16 xor == ~(1 << 14) == 49151 == 0b1011111111111111 3992 // 3993 // => 3994 // 3995 // i16 ~1 == 0b1111111111111110 3996 // i16 rol(~1, 14) == 0b1011111111111111 3997 // 3998 // Some additional tips to help conceptualize this transform: 3999 // - Try to see the operation as placing a single zero in a value of all ones. 4000 // - There exists no value for x which would allow the result to contain zero. 4001 // - Values of x larger than the bitwidth are undefined and do not require a 4002 // consistent result. 4003 // - Pushing the zero left requires shifting one bits in from the right. 4004 // A rotate left of ~1 is a nice way of achieving the desired result. 4005 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 4006 if (auto *N1C = dyn_cast<ConstantSDNode>(N1.getNode())) 4007 if (N0.getOpcode() == ISD::SHL) 4008 if (auto *ShlLHS = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 4009 if (N1C->isAllOnesValue() && ShlLHS->isOne()) 4010 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, DAG.getConstant(~1, VT), 4011 N0.getOperand(1)); 4012 4013 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 4014 if (N0.getOpcode() == N1.getOpcode()) { 4015 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 4016 if (Tmp.getNode()) return Tmp; 4017 } 4018 4019 // Simplify the expression using non-local knowledge. 4020 if (!VT.isVector() && 4021 SimplifyDemandedBits(SDValue(N, 0))) 4022 return SDValue(N, 0); 4023 4024 return SDValue(); 4025 } 4026 4027 /// Handle transforms common to the three shifts, when the shift amount is a 4028 /// constant. 4029 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 4030 // We can't and shouldn't fold opaque constants. 4031 if (Amt->isOpaque()) 4032 return SDValue(); 4033 4034 SDNode *LHS = N->getOperand(0).getNode(); 4035 if (!LHS->hasOneUse()) return SDValue(); 4036 4037 // We want to pull some binops through shifts, so that we have (and (shift)) 4038 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 4039 // thing happens with address calculations, so it's important to canonicalize 4040 // it. 4041 bool HighBitSet = false; // Can we transform this if the high bit is set? 4042 4043 switch (LHS->getOpcode()) { 4044 default: return SDValue(); 4045 case ISD::OR: 4046 case ISD::XOR: 4047 HighBitSet = false; // We can only transform sra if the high bit is clear. 4048 break; 4049 case ISD::AND: 4050 HighBitSet = true; // We can only transform sra if the high bit is set. 4051 break; 4052 case ISD::ADD: 4053 if (N->getOpcode() != ISD::SHL) 4054 return SDValue(); // only shl(add) not sr[al](add). 4055 HighBitSet = false; // We can only transform sra if the high bit is clear. 4056 break; 4057 } 4058 4059 // We require the RHS of the binop to be a constant and not opaque as well. 4060 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 4061 if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); 4062 4063 // FIXME: disable this unless the input to the binop is a shift by a constant. 4064 // If it is not a shift, it pessimizes some common cases like: 4065 // 4066 // void foo(int *X, int i) { X[i & 1235] = 1; } 4067 // int bar(int *X, int i) { return X[i & 255]; } 4068 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 4069 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 4070 BinOpLHSVal->getOpcode() != ISD::SRA && 4071 BinOpLHSVal->getOpcode() != ISD::SRL) || 4072 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 4073 return SDValue(); 4074 4075 EVT VT = N->getValueType(0); 4076 4077 // If this is a signed shift right, and the high bit is modified by the 4078 // logical operation, do not perform the transformation. The highBitSet 4079 // boolean indicates the value of the high bit of the constant which would 4080 // cause it to be modified for this operation. 4081 if (N->getOpcode() == ISD::SRA) { 4082 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 4083 if (BinOpRHSSignSet != HighBitSet) 4084 return SDValue(); 4085 } 4086 4087 if (!TLI.isDesirableToCommuteWithShift(LHS)) 4088 return SDValue(); 4089 4090 // Fold the constants, shifting the binop RHS by the shift amount. 4091 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 4092 N->getValueType(0), 4093 LHS->getOperand(1), N->getOperand(1)); 4094 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 4095 4096 // Create the new shift. 4097 SDValue NewShift = DAG.getNode(N->getOpcode(), 4098 SDLoc(LHS->getOperand(0)), 4099 VT, LHS->getOperand(0), N->getOperand(1)); 4100 4101 // Create the new binop. 4102 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 4103 } 4104 4105 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 4106 assert(N->getOpcode() == ISD::TRUNCATE); 4107 assert(N->getOperand(0).getOpcode() == ISD::AND); 4108 4109 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 4110 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 4111 SDValue N01 = N->getOperand(0).getOperand(1); 4112 4113 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 4114 EVT TruncVT = N->getValueType(0); 4115 SDValue N00 = N->getOperand(0).getOperand(0); 4116 APInt TruncC = N01C->getAPIntValue(); 4117 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 4118 4119 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 4120 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 4121 DAG.getConstant(TruncC, TruncVT)); 4122 } 4123 } 4124 4125 return SDValue(); 4126 } 4127 4128 SDValue DAGCombiner::visitRotate(SDNode *N) { 4129 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 4130 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 4131 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 4132 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 4133 if (NewOp1.getNode()) 4134 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 4135 N->getOperand(0), NewOp1); 4136 } 4137 return SDValue(); 4138 } 4139 4140 SDValue DAGCombiner::visitSHL(SDNode *N) { 4141 SDValue N0 = N->getOperand(0); 4142 SDValue N1 = N->getOperand(1); 4143 EVT VT = N0.getValueType(); 4144 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 4145 4146 // fold vector ops 4147 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4148 if (VT.isVector()) { 4149 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4150 return FoldedVOp; 4151 4152 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 4153 // If setcc produces all-one true value then: 4154 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 4155 if (N1CV && N1CV->isConstant()) { 4156 if (N0.getOpcode() == ISD::AND) { 4157 SDValue N00 = N0->getOperand(0); 4158 SDValue N01 = N0->getOperand(1); 4159 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 4160 4161 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 4162 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 4163 TargetLowering::ZeroOrNegativeOneBooleanContent) { 4164 if (SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV)) 4165 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 4166 } 4167 } else { 4168 N1C = isConstOrConstSplat(N1); 4169 } 4170 } 4171 } 4172 4173 // fold (shl c1, c2) -> c1<<c2 4174 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4175 if (N0C && N1C) 4176 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 4177 // fold (shl 0, x) -> 0 4178 if (N0C && N0C->isNullValue()) 4179 return N0; 4180 // fold (shl x, c >= size(x)) -> undef 4181 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4182 return DAG.getUNDEF(VT); 4183 // fold (shl x, 0) -> x 4184 if (N1C && N1C->isNullValue()) 4185 return N0; 4186 // fold (shl undef, x) -> 0 4187 if (N0.getOpcode() == ISD::UNDEF) 4188 return DAG.getConstant(0, VT); 4189 // if (shl x, c) is known to be zero, return 0 4190 if (DAG.MaskedValueIsZero(SDValue(N, 0), 4191 APInt::getAllOnesValue(OpSizeInBits))) 4192 return DAG.getConstant(0, VT); 4193 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 4194 if (N1.getOpcode() == ISD::TRUNCATE && 4195 N1.getOperand(0).getOpcode() == ISD::AND) { 4196 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4197 if (NewOp1.getNode()) 4198 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4199 } 4200 4201 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4202 return SDValue(N, 0); 4203 4204 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4205 if (N1C && N0.getOpcode() == ISD::SHL) { 4206 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4207 uint64_t c1 = N0C1->getZExtValue(); 4208 uint64_t c2 = N1C->getZExtValue(); 4209 if (c1 + c2 >= OpSizeInBits) 4210 return DAG.getConstant(0, VT); 4211 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4212 DAG.getConstant(c1 + c2, N1.getValueType())); 4213 } 4214 } 4215 4216 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4217 // For this to be valid, the second form must not preserve any of the bits 4218 // that are shifted out by the inner shift in the first form. This means 4219 // the outer shift size must be >= the number of bits added by the ext. 4220 // As a corollary, we don't care what kind of ext it is. 4221 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4222 N0.getOpcode() == ISD::ANY_EXTEND || 4223 N0.getOpcode() == ISD::SIGN_EXTEND) && 4224 N0.getOperand(0).getOpcode() == ISD::SHL) { 4225 SDValue N0Op0 = N0.getOperand(0); 4226 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4227 uint64_t c1 = N0Op0C1->getZExtValue(); 4228 uint64_t c2 = N1C->getZExtValue(); 4229 EVT InnerShiftVT = N0Op0.getValueType(); 4230 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4231 if (c2 >= OpSizeInBits - InnerShiftSize) { 4232 if (c1 + c2 >= OpSizeInBits) 4233 return DAG.getConstant(0, VT); 4234 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4235 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4236 N0Op0->getOperand(0)), 4237 DAG.getConstant(c1 + c2, N1.getValueType())); 4238 } 4239 } 4240 } 4241 4242 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4243 // Only fold this if the inner zext has no other uses to avoid increasing 4244 // the total number of instructions. 4245 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4246 N0.getOperand(0).getOpcode() == ISD::SRL) { 4247 SDValue N0Op0 = N0.getOperand(0); 4248 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4249 uint64_t c1 = N0Op0C1->getZExtValue(); 4250 if (c1 < VT.getScalarSizeInBits()) { 4251 uint64_t c2 = N1C->getZExtValue(); 4252 if (c1 == c2) { 4253 SDValue NewOp0 = N0.getOperand(0); 4254 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4255 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4256 NewOp0, DAG.getConstant(c2, CountVT)); 4257 AddToWorklist(NewSHL.getNode()); 4258 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4259 } 4260 } 4261 } 4262 } 4263 4264 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4265 // (and (srl x, (sub c1, c2), MASK) 4266 // Only fold this if the inner shift has no other uses -- if it does, folding 4267 // this will increase the total number of instructions. 4268 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4269 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4270 uint64_t c1 = N0C1->getZExtValue(); 4271 if (c1 < OpSizeInBits) { 4272 uint64_t c2 = N1C->getZExtValue(); 4273 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4274 SDValue Shift; 4275 if (c2 > c1) { 4276 Mask = Mask.shl(c2 - c1); 4277 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4278 DAG.getConstant(c2 - c1, N1.getValueType())); 4279 } else { 4280 Mask = Mask.lshr(c1 - c2); 4281 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4282 DAG.getConstant(c1 - c2, N1.getValueType())); 4283 } 4284 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4285 DAG.getConstant(Mask, VT)); 4286 } 4287 } 4288 } 4289 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4290 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4291 unsigned BitSize = VT.getScalarSizeInBits(); 4292 SDValue HiBitsMask = 4293 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4294 BitSize - N1C->getZExtValue()), VT); 4295 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4296 HiBitsMask); 4297 } 4298 4299 // fold (shl (add x, c1), c2) -> (add (shl x, c2), c1 << c2) 4300 // Variant of version done on multiply, except mul by a power of 2 is turned 4301 // into a shift. 4302 APInt Val; 4303 if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 4304 (isa<ConstantSDNode>(N0.getOperand(1)) || 4305 isConstantSplatVector(N0.getOperand(1).getNode(), Val))) { 4306 SDValue Shl0 = DAG.getNode(ISD::SHL, SDLoc(N0), VT, N0.getOperand(0), N1); 4307 SDValue Shl1 = DAG.getNode(ISD::SHL, SDLoc(N1), VT, N0.getOperand(1), N1); 4308 return DAG.getNode(ISD::ADD, SDLoc(N), VT, Shl0, Shl1); 4309 } 4310 4311 if (N1C) { 4312 SDValue NewSHL = visitShiftByConstant(N, N1C); 4313 if (NewSHL.getNode()) 4314 return NewSHL; 4315 } 4316 4317 return SDValue(); 4318 } 4319 4320 SDValue DAGCombiner::visitSRA(SDNode *N) { 4321 SDValue N0 = N->getOperand(0); 4322 SDValue N1 = N->getOperand(1); 4323 EVT VT = N0.getValueType(); 4324 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4325 4326 // fold vector ops 4327 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4328 if (VT.isVector()) { 4329 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4330 return FoldedVOp; 4331 4332 N1C = isConstOrConstSplat(N1); 4333 } 4334 4335 // fold (sra c1, c2) -> (sra c1, c2) 4336 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4337 if (N0C && N1C) 4338 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4339 // fold (sra 0, x) -> 0 4340 if (N0C && N0C->isNullValue()) 4341 return N0; 4342 // fold (sra -1, x) -> -1 4343 if (N0C && N0C->isAllOnesValue()) 4344 return N0; 4345 // fold (sra x, (setge c, size(x))) -> undef 4346 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4347 return DAG.getUNDEF(VT); 4348 // fold (sra x, 0) -> x 4349 if (N1C && N1C->isNullValue()) 4350 return N0; 4351 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4352 // sext_inreg. 4353 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4354 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4355 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4356 if (VT.isVector()) 4357 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4358 ExtVT, VT.getVectorNumElements()); 4359 if ((!LegalOperations || 4360 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4361 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4362 N0.getOperand(0), DAG.getValueType(ExtVT)); 4363 } 4364 4365 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4366 if (N1C && N0.getOpcode() == ISD::SRA) { 4367 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4368 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4369 if (Sum >= OpSizeInBits) 4370 Sum = OpSizeInBits - 1; 4371 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4372 DAG.getConstant(Sum, N1.getValueType())); 4373 } 4374 } 4375 4376 // fold (sra (shl X, m), (sub result_size, n)) 4377 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4378 // result_size - n != m. 4379 // If truncate is free for the target sext(shl) is likely to result in better 4380 // code. 4381 if (N0.getOpcode() == ISD::SHL && N1C) { 4382 // Get the two constanst of the shifts, CN0 = m, CN = n. 4383 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4384 if (N01C) { 4385 LLVMContext &Ctx = *DAG.getContext(); 4386 // Determine what the truncate's result bitsize and type would be. 4387 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4388 4389 if (VT.isVector()) 4390 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4391 4392 // Determine the residual right-shift amount. 4393 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4394 4395 // If the shift is not a no-op (in which case this should be just a sign 4396 // extend already), the truncated to type is legal, sign_extend is legal 4397 // on that type, and the truncate to that type is both legal and free, 4398 // perform the transform. 4399 if ((ShiftAmt > 0) && 4400 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4401 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4402 TLI.isTruncateFree(VT, TruncVT)) { 4403 4404 SDValue Amt = DAG.getConstant(ShiftAmt, 4405 getShiftAmountTy(N0.getOperand(0).getValueType())); 4406 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4407 N0.getOperand(0), Amt); 4408 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4409 Shift); 4410 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4411 N->getValueType(0), Trunc); 4412 } 4413 } 4414 } 4415 4416 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4417 if (N1.getOpcode() == ISD::TRUNCATE && 4418 N1.getOperand(0).getOpcode() == ISD::AND) { 4419 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4420 if (NewOp1.getNode()) 4421 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4422 } 4423 4424 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4425 // if c1 is equal to the number of bits the trunc removes 4426 if (N0.getOpcode() == ISD::TRUNCATE && 4427 (N0.getOperand(0).getOpcode() == ISD::SRL || 4428 N0.getOperand(0).getOpcode() == ISD::SRA) && 4429 N0.getOperand(0).hasOneUse() && 4430 N0.getOperand(0).getOperand(1).hasOneUse() && 4431 N1C) { 4432 SDValue N0Op0 = N0.getOperand(0); 4433 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4434 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4435 EVT LargeVT = N0Op0.getValueType(); 4436 4437 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4438 SDValue Amt = 4439 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4440 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4441 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4442 N0Op0.getOperand(0), Amt); 4443 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4444 } 4445 } 4446 } 4447 4448 // Simplify, based on bits shifted out of the LHS. 4449 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4450 return SDValue(N, 0); 4451 4452 4453 // If the sign bit is known to be zero, switch this to a SRL. 4454 if (DAG.SignBitIsZero(N0)) 4455 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4456 4457 if (N1C) { 4458 SDValue NewSRA = visitShiftByConstant(N, N1C); 4459 if (NewSRA.getNode()) 4460 return NewSRA; 4461 } 4462 4463 return SDValue(); 4464 } 4465 4466 SDValue DAGCombiner::visitSRL(SDNode *N) { 4467 SDValue N0 = N->getOperand(0); 4468 SDValue N1 = N->getOperand(1); 4469 EVT VT = N0.getValueType(); 4470 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4471 4472 // fold vector ops 4473 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4474 if (VT.isVector()) { 4475 if (SDValue FoldedVOp = SimplifyVBinOp(N)) 4476 return FoldedVOp; 4477 4478 N1C = isConstOrConstSplat(N1); 4479 } 4480 4481 // fold (srl c1, c2) -> c1 >>u c2 4482 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4483 if (N0C && N1C) 4484 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4485 // fold (srl 0, x) -> 0 4486 if (N0C && N0C->isNullValue()) 4487 return N0; 4488 // fold (srl x, c >= size(x)) -> undef 4489 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4490 return DAG.getUNDEF(VT); 4491 // fold (srl x, 0) -> x 4492 if (N1C && N1C->isNullValue()) 4493 return N0; 4494 // if (srl x, c) is known to be zero, return 0 4495 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4496 APInt::getAllOnesValue(OpSizeInBits))) 4497 return DAG.getConstant(0, VT); 4498 4499 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4500 if (N1C && N0.getOpcode() == ISD::SRL) { 4501 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4502 uint64_t c1 = N01C->getZExtValue(); 4503 uint64_t c2 = N1C->getZExtValue(); 4504 if (c1 + c2 >= OpSizeInBits) 4505 return DAG.getConstant(0, VT); 4506 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4507 DAG.getConstant(c1 + c2, N1.getValueType())); 4508 } 4509 } 4510 4511 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4512 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4513 N0.getOperand(0).getOpcode() == ISD::SRL && 4514 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4515 uint64_t c1 = 4516 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4517 uint64_t c2 = N1C->getZExtValue(); 4518 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4519 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4520 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4521 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4522 if (c1 + OpSizeInBits == InnerShiftSize) { 4523 if (c1 + c2 >= InnerShiftSize) 4524 return DAG.getConstant(0, VT); 4525 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4526 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4527 N0.getOperand(0)->getOperand(0), 4528 DAG.getConstant(c1 + c2, ShiftCountVT))); 4529 } 4530 } 4531 4532 // fold (srl (shl x, c), c) -> (and x, cst2) 4533 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4534 unsigned BitSize = N0.getScalarValueSizeInBits(); 4535 if (BitSize <= 64) { 4536 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4537 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4538 DAG.getConstant(~0ULL >> ShAmt, VT)); 4539 } 4540 } 4541 4542 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4543 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4544 // Shifting in all undef bits? 4545 EVT SmallVT = N0.getOperand(0).getValueType(); 4546 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4547 if (N1C->getZExtValue() >= BitSize) 4548 return DAG.getUNDEF(VT); 4549 4550 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4551 uint64_t ShiftAmt = N1C->getZExtValue(); 4552 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4553 N0.getOperand(0), 4554 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4555 AddToWorklist(SmallShift.getNode()); 4556 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4557 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4558 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4559 DAG.getConstant(Mask, VT)); 4560 } 4561 } 4562 4563 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4564 // bit, which is unmodified by sra. 4565 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4566 if (N0.getOpcode() == ISD::SRA) 4567 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4568 } 4569 4570 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4571 if (N1C && N0.getOpcode() == ISD::CTLZ && 4572 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4573 APInt KnownZero, KnownOne; 4574 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4575 4576 // If any of the input bits are KnownOne, then the input couldn't be all 4577 // zeros, thus the result of the srl will always be zero. 4578 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4579 4580 // If all of the bits input the to ctlz node are known to be zero, then 4581 // the result of the ctlz is "32" and the result of the shift is one. 4582 APInt UnknownBits = ~KnownZero; 4583 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4584 4585 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4586 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4587 // Okay, we know that only that the single bit specified by UnknownBits 4588 // could be set on input to the CTLZ node. If this bit is set, the SRL 4589 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4590 // to an SRL/XOR pair, which is likely to simplify more. 4591 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4592 SDValue Op = N0.getOperand(0); 4593 4594 if (ShAmt) { 4595 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4596 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4597 AddToWorklist(Op.getNode()); 4598 } 4599 4600 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4601 Op, DAG.getConstant(1, VT)); 4602 } 4603 } 4604 4605 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4606 if (N1.getOpcode() == ISD::TRUNCATE && 4607 N1.getOperand(0).getOpcode() == ISD::AND) { 4608 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4609 if (NewOp1.getNode()) 4610 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4611 } 4612 4613 // fold operands of srl based on knowledge that the low bits are not 4614 // demanded. 4615 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4616 return SDValue(N, 0); 4617 4618 if (N1C) { 4619 SDValue NewSRL = visitShiftByConstant(N, N1C); 4620 if (NewSRL.getNode()) 4621 return NewSRL; 4622 } 4623 4624 // Attempt to convert a srl of a load into a narrower zero-extending load. 4625 SDValue NarrowLoad = ReduceLoadWidth(N); 4626 if (NarrowLoad.getNode()) 4627 return NarrowLoad; 4628 4629 // Here is a common situation. We want to optimize: 4630 // 4631 // %a = ... 4632 // %b = and i32 %a, 2 4633 // %c = srl i32 %b, 1 4634 // brcond i32 %c ... 4635 // 4636 // into 4637 // 4638 // %a = ... 4639 // %b = and %a, 2 4640 // %c = setcc eq %b, 0 4641 // brcond %c ... 4642 // 4643 // However when after the source operand of SRL is optimized into AND, the SRL 4644 // itself may not be optimized further. Look for it and add the BRCOND into 4645 // the worklist. 4646 if (N->hasOneUse()) { 4647 SDNode *Use = *N->use_begin(); 4648 if (Use->getOpcode() == ISD::BRCOND) 4649 AddToWorklist(Use); 4650 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4651 // Also look pass the truncate. 4652 Use = *Use->use_begin(); 4653 if (Use->getOpcode() == ISD::BRCOND) 4654 AddToWorklist(Use); 4655 } 4656 } 4657 4658 return SDValue(); 4659 } 4660 4661 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4662 SDValue N0 = N->getOperand(0); 4663 EVT VT = N->getValueType(0); 4664 4665 // fold (ctlz c1) -> c2 4666 if (isa<ConstantSDNode>(N0)) 4667 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4668 return SDValue(); 4669 } 4670 4671 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4672 SDValue N0 = N->getOperand(0); 4673 EVT VT = N->getValueType(0); 4674 4675 // fold (ctlz_zero_undef c1) -> c2 4676 if (isa<ConstantSDNode>(N0)) 4677 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4678 return SDValue(); 4679 } 4680 4681 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4682 SDValue N0 = N->getOperand(0); 4683 EVT VT = N->getValueType(0); 4684 4685 // fold (cttz c1) -> c2 4686 if (isa<ConstantSDNode>(N0)) 4687 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4688 return SDValue(); 4689 } 4690 4691 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4692 SDValue N0 = N->getOperand(0); 4693 EVT VT = N->getValueType(0); 4694 4695 // fold (cttz_zero_undef c1) -> c2 4696 if (isa<ConstantSDNode>(N0)) 4697 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4698 return SDValue(); 4699 } 4700 4701 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4702 SDValue N0 = N->getOperand(0); 4703 EVT VT = N->getValueType(0); 4704 4705 // fold (ctpop c1) -> c2 4706 if (isa<ConstantSDNode>(N0)) 4707 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4708 return SDValue(); 4709 } 4710 4711 4712 /// \brief Generate Min/Max node 4713 static SDValue combineMinNumMaxNum(SDLoc DL, EVT VT, SDValue LHS, SDValue RHS, 4714 SDValue True, SDValue False, 4715 ISD::CondCode CC, const TargetLowering &TLI, 4716 SelectionDAG &DAG) { 4717 if (!(LHS == True && RHS == False) && !(LHS == False && RHS == True)) 4718 return SDValue(); 4719 4720 switch (CC) { 4721 case ISD::SETOLT: 4722 case ISD::SETOLE: 4723 case ISD::SETLT: 4724 case ISD::SETLE: 4725 case ISD::SETULT: 4726 case ISD::SETULE: { 4727 unsigned Opcode = (LHS == True) ? ISD::FMINNUM : ISD::FMAXNUM; 4728 if (TLI.isOperationLegal(Opcode, VT)) 4729 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4730 return SDValue(); 4731 } 4732 case ISD::SETOGT: 4733 case ISD::SETOGE: 4734 case ISD::SETGT: 4735 case ISD::SETGE: 4736 case ISD::SETUGT: 4737 case ISD::SETUGE: { 4738 unsigned Opcode = (LHS == True) ? ISD::FMAXNUM : ISD::FMINNUM; 4739 if (TLI.isOperationLegal(Opcode, VT)) 4740 return DAG.getNode(Opcode, DL, VT, LHS, RHS); 4741 return SDValue(); 4742 } 4743 default: 4744 return SDValue(); 4745 } 4746 } 4747 4748 SDValue DAGCombiner::visitSELECT(SDNode *N) { 4749 SDValue N0 = N->getOperand(0); 4750 SDValue N1 = N->getOperand(1); 4751 SDValue N2 = N->getOperand(2); 4752 EVT VT = N->getValueType(0); 4753 EVT VT0 = N0.getValueType(); 4754 4755 // fold (select C, X, X) -> X 4756 if (N1 == N2) 4757 return N1; 4758 // fold (select true, X, Y) -> X 4759 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4760 if (N0C && !N0C->isNullValue()) 4761 return N1; 4762 // fold (select false, X, Y) -> Y 4763 if (N0C && N0C->isNullValue()) 4764 return N2; 4765 // fold (select C, 1, X) -> (or C, X) 4766 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4767 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4768 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4769 // fold (select C, 0, 1) -> (xor C, 1) 4770 // We can't do this reliably if integer based booleans have different contents 4771 // to floating point based booleans. This is because we can't tell whether we 4772 // have an integer-based boolean or a floating-point-based boolean unless we 4773 // can find the SETCC that produced it and inspect its operands. This is 4774 // fairly easy if C is the SETCC node, but it can potentially be 4775 // undiscoverable (or not reasonably discoverable). For example, it could be 4776 // in another basic block or it could require searching a complicated 4777 // expression. 4778 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4779 if (VT.isInteger() && 4780 (VT0 == MVT::i1 || (VT0.isInteger() && 4781 TLI.getBooleanContents(false, false) == 4782 TLI.getBooleanContents(false, true) && 4783 TLI.getBooleanContents(false, false) == 4784 TargetLowering::ZeroOrOneBooleanContent)) && 4785 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4786 SDValue XORNode; 4787 if (VT == VT0) 4788 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4789 N0, DAG.getConstant(1, VT0)); 4790 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4791 N0, DAG.getConstant(1, VT0)); 4792 AddToWorklist(XORNode.getNode()); 4793 if (VT.bitsGT(VT0)) 4794 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4795 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4796 } 4797 // fold (select C, 0, X) -> (and (not C), X) 4798 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4799 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4800 AddToWorklist(NOTNode.getNode()); 4801 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4802 } 4803 // fold (select C, X, 1) -> (or (not C), X) 4804 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4805 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4806 AddToWorklist(NOTNode.getNode()); 4807 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4808 } 4809 // fold (select C, X, 0) -> (and C, X) 4810 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4811 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4812 // fold (select X, X, Y) -> (or X, Y) 4813 // fold (select X, 1, Y) -> (or X, Y) 4814 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4815 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4816 // fold (select X, Y, X) -> (and X, Y) 4817 // fold (select X, Y, 0) -> (and X, Y) 4818 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4819 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4820 4821 // If we can fold this based on the true/false value, do so. 4822 if (SimplifySelectOps(N, N1, N2)) 4823 return SDValue(N, 0); // Don't revisit N. 4824 4825 // fold selects based on a setcc into other things, such as min/max/abs 4826 if (N0.getOpcode() == ISD::SETCC) { 4827 // select x, y (fcmp lt x, y) -> fminnum x, y 4828 // select x, y (fcmp gt x, y) -> fmaxnum x, y 4829 // 4830 // This is OK if we don't care about what happens if either operand is a 4831 // NaN. 4832 // 4833 4834 // FIXME: Instead of testing for UnsafeFPMath, this should be checking for 4835 // no signed zeros as well as no nans. 4836 const TargetOptions &Options = DAG.getTarget().Options; 4837 if (Options.UnsafeFPMath && 4838 VT.isFloatingPoint() && N0.hasOneUse() && 4839 DAG.isKnownNeverNaN(N1) && DAG.isKnownNeverNaN(N2)) { 4840 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4841 4842 SDValue FMinMax = 4843 combineMinNumMaxNum(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), 4844 N1, N2, CC, TLI, DAG); 4845 if (FMinMax) 4846 return FMinMax; 4847 } 4848 4849 if ((!LegalOperations && 4850 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 4851 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4852 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4853 N0.getOperand(0), N0.getOperand(1), 4854 N1, N2, N0.getOperand(2)); 4855 return SimplifySelect(SDLoc(N), N0, N1, N2); 4856 } 4857 4858 if (VT0 == MVT::i1) { 4859 if (TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 4860 // select (and Cond0, Cond1), X, Y 4861 // -> select Cond0, (select Cond1, X, Y), Y 4862 if (N0->getOpcode() == ISD::AND && N0->hasOneUse()) { 4863 SDValue Cond0 = N0->getOperand(0); 4864 SDValue Cond1 = N0->getOperand(1); 4865 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 4866 N1.getValueType(), Cond1, N1, N2); 4867 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, 4868 InnerSelect, N2); 4869 } 4870 // select (or Cond0, Cond1), X, Y -> select Cond0, X, (select Cond1, X, Y) 4871 if (N0->getOpcode() == ISD::OR && N0->hasOneUse()) { 4872 SDValue Cond0 = N0->getOperand(0); 4873 SDValue Cond1 = N0->getOperand(1); 4874 SDValue InnerSelect = DAG.getNode(ISD::SELECT, SDLoc(N), 4875 N1.getValueType(), Cond1, N1, N2); 4876 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Cond0, N1, 4877 InnerSelect); 4878 } 4879 } 4880 4881 // select Cond0, (select Cond1, X, Y), Y -> select (and Cond0, Cond1), X, Y 4882 if (N1->getOpcode() == ISD::SELECT) { 4883 SDValue N1_0 = N1->getOperand(0); 4884 SDValue N1_1 = N1->getOperand(1); 4885 SDValue N1_2 = N1->getOperand(2); 4886 if (N1_2 == N2 && N0.getValueType() == N1_0.getValueType()) { 4887 // Create the actual and node if we can generate good code for it. 4888 if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 4889 SDValue And = DAG.getNode(ISD::AND, SDLoc(N), N0.getValueType(), 4890 N0, N1_0); 4891 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), And, 4892 N1_1, N2); 4893 } 4894 // Otherwise see if we can optimize the "and" to a better pattern. 4895 if (SDValue Combined = visitANDLike(N0, N1_0, N)) 4896 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 4897 N1_1, N2); 4898 } 4899 } 4900 // select Cond0, X, (select Cond1, X, Y) -> select (or Cond0, Cond1), X, Y 4901 if (N2->getOpcode() == ISD::SELECT) { 4902 SDValue N2_0 = N2->getOperand(0); 4903 SDValue N2_1 = N2->getOperand(1); 4904 SDValue N2_2 = N2->getOperand(2); 4905 if (N2_1 == N1 && N0.getValueType() == N2_0.getValueType()) { 4906 // Create the actual or node if we can generate good code for it. 4907 if (!TLI.shouldNormalizeToSelectSequence(*DAG.getContext(), VT)) { 4908 SDValue Or = DAG.getNode(ISD::OR, SDLoc(N), N0.getValueType(), 4909 N0, N2_0); 4910 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Or, 4911 N1, N2_2); 4912 } 4913 // Otherwise see if we can optimize to a better pattern. 4914 if (SDValue Combined = visitORLike(N0, N2_0, N)) 4915 return DAG.getNode(ISD::SELECT, SDLoc(N), N1.getValueType(), Combined, 4916 N1, N2_2); 4917 } 4918 } 4919 } 4920 4921 return SDValue(); 4922 } 4923 4924 static 4925 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 4926 SDLoc DL(N); 4927 EVT LoVT, HiVT; 4928 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4929 4930 // Split the inputs. 4931 SDValue Lo, Hi, LL, LH, RL, RH; 4932 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4933 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4934 4935 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4936 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4937 4938 return std::make_pair(Lo, Hi); 4939 } 4940 4941 // This function assumes all the vselect's arguments are CONCAT_VECTOR 4942 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4943 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 4944 SDLoc dl(N); 4945 SDValue Cond = N->getOperand(0); 4946 SDValue LHS = N->getOperand(1); 4947 SDValue RHS = N->getOperand(2); 4948 EVT VT = N->getValueType(0); 4949 int NumElems = VT.getVectorNumElements(); 4950 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4951 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4952 Cond.getOpcode() == ISD::BUILD_VECTOR); 4953 4954 // CONCAT_VECTOR can take an arbitrary number of arguments. We only care about 4955 // binary ones here. 4956 if (LHS->getNumOperands() != 2 || RHS->getNumOperands() != 2) 4957 return SDValue(); 4958 4959 // We're sure we have an even number of elements due to the 4960 // concat_vectors we have as arguments to vselect. 4961 // Skip BV elements until we find one that's not an UNDEF 4962 // After we find an UNDEF element, keep looping until we get to half the 4963 // length of the BV and see if all the non-undef nodes are the same. 4964 ConstantSDNode *BottomHalf = nullptr; 4965 for (int i = 0; i < NumElems / 2; ++i) { 4966 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4967 continue; 4968 4969 if (BottomHalf == nullptr) 4970 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4971 else if (Cond->getOperand(i).getNode() != BottomHalf) 4972 return SDValue(); 4973 } 4974 4975 // Do the same for the second half of the BuildVector 4976 ConstantSDNode *TopHalf = nullptr; 4977 for (int i = NumElems / 2; i < NumElems; ++i) { 4978 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4979 continue; 4980 4981 if (TopHalf == nullptr) 4982 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4983 else if (Cond->getOperand(i).getNode() != TopHalf) 4984 return SDValue(); 4985 } 4986 4987 assert(TopHalf && BottomHalf && 4988 "One half of the selector was all UNDEFs and the other was all the " 4989 "same value. This should have been addressed before this function."); 4990 return DAG.getNode( 4991 ISD::CONCAT_VECTORS, dl, VT, 4992 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4993 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4994 } 4995 4996 SDValue DAGCombiner::visitMSTORE(SDNode *N) { 4997 4998 if (Level >= AfterLegalizeTypes) 4999 return SDValue(); 5000 5001 MaskedStoreSDNode *MST = dyn_cast<MaskedStoreSDNode>(N); 5002 SDValue Mask = MST->getMask(); 5003 SDValue Data = MST->getValue(); 5004 SDLoc DL(N); 5005 5006 // If the MSTORE data type requires splitting and the mask is provided by a 5007 // SETCC, then split both nodes and its operands before legalization. This 5008 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5009 // and enables future optimizations (e.g. min/max pattern matching on X86). 5010 if (Mask.getOpcode() == ISD::SETCC) { 5011 5012 // Check if any splitting is required. 5013 if (TLI.getTypeAction(*DAG.getContext(), Data.getValueType()) != 5014 TargetLowering::TypeSplitVector) 5015 return SDValue(); 5016 5017 SDValue MaskLo, MaskHi, Lo, Hi; 5018 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5019 5020 EVT LoVT, HiVT; 5021 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MST->getValueType(0)); 5022 5023 SDValue Chain = MST->getChain(); 5024 SDValue Ptr = MST->getBasePtr(); 5025 5026 EVT MemoryVT = MST->getMemoryVT(); 5027 unsigned Alignment = MST->getOriginalAlignment(); 5028 5029 // if Alignment is equal to the vector size, 5030 // take the half of it for the second part 5031 unsigned SecondHalfAlignment = 5032 (Alignment == Data->getValueType(0).getSizeInBits()/8) ? 5033 Alignment/2 : Alignment; 5034 5035 EVT LoMemVT, HiMemVT; 5036 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5037 5038 SDValue DataLo, DataHi; 5039 std::tie(DataLo, DataHi) = DAG.SplitVector(Data, DL); 5040 5041 MachineMemOperand *MMO = DAG.getMachineFunction(). 5042 getMachineMemOperand(MST->getPointerInfo(), 5043 MachineMemOperand::MOStore, LoMemVT.getStoreSize(), 5044 Alignment, MST->getAAInfo(), MST->getRanges()); 5045 5046 Lo = DAG.getMaskedStore(Chain, DL, DataLo, Ptr, MaskLo, LoMemVT, MMO, 5047 MST->isTruncatingStore()); 5048 5049 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5050 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5051 DAG.getConstant(IncrementSize, Ptr.getValueType())); 5052 5053 MMO = DAG.getMachineFunction(). 5054 getMachineMemOperand(MST->getPointerInfo(), 5055 MachineMemOperand::MOStore, HiMemVT.getStoreSize(), 5056 SecondHalfAlignment, MST->getAAInfo(), 5057 MST->getRanges()); 5058 5059 Hi = DAG.getMaskedStore(Chain, DL, DataHi, Ptr, MaskHi, HiMemVT, MMO, 5060 MST->isTruncatingStore()); 5061 5062 AddToWorklist(Lo.getNode()); 5063 AddToWorklist(Hi.getNode()); 5064 5065 return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); 5066 } 5067 return SDValue(); 5068 } 5069 5070 SDValue DAGCombiner::visitMLOAD(SDNode *N) { 5071 5072 if (Level >= AfterLegalizeTypes) 5073 return SDValue(); 5074 5075 MaskedLoadSDNode *MLD = dyn_cast<MaskedLoadSDNode>(N); 5076 SDValue Mask = MLD->getMask(); 5077 SDLoc DL(N); 5078 5079 // If the MLOAD result requires splitting and the mask is provided by a 5080 // SETCC, then split both nodes and its operands before legalization. This 5081 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5082 // and enables future optimizations (e.g. min/max pattern matching on X86). 5083 5084 if (Mask.getOpcode() == ISD::SETCC) { 5085 EVT VT = N->getValueType(0); 5086 5087 // Check if any splitting is required. 5088 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5089 TargetLowering::TypeSplitVector) 5090 return SDValue(); 5091 5092 SDValue MaskLo, MaskHi, Lo, Hi; 5093 std::tie(MaskLo, MaskHi) = SplitVSETCC(Mask.getNode(), DAG); 5094 5095 SDValue Src0 = MLD->getSrc0(); 5096 SDValue Src0Lo, Src0Hi; 5097 std::tie(Src0Lo, Src0Hi) = DAG.SplitVector(Src0, DL); 5098 5099 EVT LoVT, HiVT; 5100 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(MLD->getValueType(0)); 5101 5102 SDValue Chain = MLD->getChain(); 5103 SDValue Ptr = MLD->getBasePtr(); 5104 EVT MemoryVT = MLD->getMemoryVT(); 5105 unsigned Alignment = MLD->getOriginalAlignment(); 5106 5107 // if Alignment is equal to the vector size, 5108 // take the half of it for the second part 5109 unsigned SecondHalfAlignment = 5110 (Alignment == MLD->getValueType(0).getSizeInBits()/8) ? 5111 Alignment/2 : Alignment; 5112 5113 EVT LoMemVT, HiMemVT; 5114 std::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); 5115 5116 MachineMemOperand *MMO = DAG.getMachineFunction(). 5117 getMachineMemOperand(MLD->getPointerInfo(), 5118 MachineMemOperand::MOLoad, LoMemVT.getStoreSize(), 5119 Alignment, MLD->getAAInfo(), MLD->getRanges()); 5120 5121 Lo = DAG.getMaskedLoad(LoVT, DL, Chain, Ptr, MaskLo, Src0Lo, LoMemVT, MMO, 5122 ISD::NON_EXTLOAD); 5123 5124 unsigned IncrementSize = LoMemVT.getSizeInBits()/8; 5125 Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, 5126 DAG.getConstant(IncrementSize, Ptr.getValueType())); 5127 5128 MMO = DAG.getMachineFunction(). 5129 getMachineMemOperand(MLD->getPointerInfo(), 5130 MachineMemOperand::MOLoad, HiMemVT.getStoreSize(), 5131 SecondHalfAlignment, MLD->getAAInfo(), MLD->getRanges()); 5132 5133 Hi = DAG.getMaskedLoad(HiVT, DL, Chain, Ptr, MaskHi, Src0Hi, HiMemVT, MMO, 5134 ISD::NON_EXTLOAD); 5135 5136 AddToWorklist(Lo.getNode()); 5137 AddToWorklist(Hi.getNode()); 5138 5139 // Build a factor node to remember that this load is independent of the 5140 // other one. 5141 Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo.getValue(1), 5142 Hi.getValue(1)); 5143 5144 // Legalized the chain result - switch anything that used the old chain to 5145 // use the new one. 5146 DAG.ReplaceAllUsesOfValueWith(SDValue(MLD, 1), Chain); 5147 5148 SDValue LoadRes = DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 5149 5150 SDValue RetOps[] = { LoadRes, Chain }; 5151 return DAG.getMergeValues(RetOps, DL); 5152 } 5153 return SDValue(); 5154 } 5155 5156 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 5157 SDValue N0 = N->getOperand(0); 5158 SDValue N1 = N->getOperand(1); 5159 SDValue N2 = N->getOperand(2); 5160 SDLoc DL(N); 5161 5162 // Canonicalize integer abs. 5163 // vselect (setg[te] X, 0), X, -X -> 5164 // vselect (setgt X, -1), X, -X -> 5165 // vselect (setl[te] X, 0), -X, X -> 5166 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 5167 if (N0.getOpcode() == ISD::SETCC) { 5168 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 5169 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5170 bool isAbs = false; 5171 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 5172 5173 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 5174 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 5175 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 5176 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 5177 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 5178 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 5179 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 5180 5181 if (isAbs) { 5182 EVT VT = LHS.getValueType(); 5183 SDValue Shift = DAG.getNode( 5184 ISD::SRA, DL, VT, LHS, 5185 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 5186 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 5187 AddToWorklist(Shift.getNode()); 5188 AddToWorklist(Add.getNode()); 5189 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 5190 } 5191 } 5192 5193 // If the VSELECT result requires splitting and the mask is provided by a 5194 // SETCC, then split both nodes and its operands before legalization. This 5195 // prevents the type legalizer from unrolling SETCC into scalar comparisons 5196 // and enables future optimizations (e.g. min/max pattern matching on X86). 5197 if (N0.getOpcode() == ISD::SETCC) { 5198 EVT VT = N->getValueType(0); 5199 5200 // Check if any splitting is required. 5201 if (TLI.getTypeAction(*DAG.getContext(), VT) != 5202 TargetLowering::TypeSplitVector) 5203 return SDValue(); 5204 5205 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 5206 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 5207 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 5208 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 5209 5210 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 5211 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 5212 5213 // Add the new VSELECT nodes to the work list in case they need to be split 5214 // again. 5215 AddToWorklist(Lo.getNode()); 5216 AddToWorklist(Hi.getNode()); 5217 5218 return DAG.getNode(ISD::