1 //===-- DAGCombiner.cpp - Implement a DAG node combiner -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass combines dag nodes to form fewer, simpler DAG nodes. It can be run 11 // both before and after the DAG is legalized. 12 // 13 // This pass is not a substitute for the LLVM IR instcombine pass. This pass is 14 // primarily intended to handle simplification opportunities that are implicit 15 // in the LLVM IR and exposed by the various codegen lowering phases. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/SelectionDAG.h" 20 #include "llvm/ADT/SmallPtrSet.h" 21 #include "llvm/ADT/Statistic.h" 22 #include "llvm/Analysis/AliasAnalysis.h" 23 #include "llvm/CodeGen/MachineFrameInfo.h" 24 #include "llvm/CodeGen/MachineFunction.h" 25 #include "llvm/IR/DataLayout.h" 26 #include "llvm/IR/DerivedTypes.h" 27 #include "llvm/IR/Function.h" 28 #include "llvm/IR/LLVMContext.h" 29 #include "llvm/Support/CommandLine.h" 30 #include "llvm/Support/Debug.h" 31 #include "llvm/Support/ErrorHandling.h" 32 #include "llvm/Support/MathExtras.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Target/TargetLowering.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Target/TargetOptions.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 #include <algorithm> 40 using namespace llvm; 41 42 #define DEBUG_TYPE "dagcombine" 43 44 STATISTIC(NodesCombined , "Number of dag nodes combined"); 45 STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); 46 STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); 47 STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); 48 STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); 49 STATISTIC(SlicedLoads, "Number of load sliced"); 50 51 namespace { 52 static cl::opt<bool> 53 CombinerAA("combiner-alias-analysis", cl::Hidden, 54 cl::desc("Enable DAG combiner alias-analysis heuristics")); 55 56 static cl::opt<bool> 57 CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, 58 cl::desc("Enable DAG combiner's use of IR alias analysis")); 59 60 static cl::opt<bool> 61 UseTBAA("combiner-use-tbaa", cl::Hidden, cl::init(true), 62 cl::desc("Enable DAG combiner's use of TBAA")); 63 64 #ifndef NDEBUG 65 static cl::opt<std::string> 66 CombinerAAOnlyFunc("combiner-aa-only-func", cl::Hidden, 67 cl::desc("Only use DAG-combiner alias analysis in this" 68 " function")); 69 #endif 70 71 /// Hidden option to stress test load slicing, i.e., when this option 72 /// is enabled, load slicing bypasses most of its profitability guards. 73 static cl::opt<bool> 74 StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, 75 cl::desc("Bypass the profitability model of load " 76 "slicing"), 77 cl::init(false)); 78 79 //------------------------------ DAGCombiner ---------------------------------// 80 81 class DAGCombiner { 82 SelectionDAG &DAG; 83 const TargetLowering &TLI; 84 CombineLevel Level; 85 CodeGenOpt::Level OptLevel; 86 bool LegalOperations; 87 bool LegalTypes; 88 bool ForCodeSize; 89 90 // Worklist of all of the nodes that need to be simplified. 91 // 92 // This has the semantics that when adding to the worklist, 93 // the item added must be next to be processed. It should 94 // also only appear once. The naive approach to this takes 95 // linear time. 96 // 97 // To reduce the insert/remove time to logarithmic, we use 98 // a set and a vector to maintain our worklist. 99 // 100 // The set contains the items on the worklist, but does not 101 // maintain the order they should be visited. 102 // 103 // The vector maintains the order nodes should be visited, but may 104 // contain duplicate or removed nodes. When choosing a node to 105 // visit, we pop off the order stack until we find an item that is 106 // also in the contents set. All operations are O(log N). 107 SmallPtrSet<SDNode*, 64> WorkListContents; 108 SmallVector<SDNode*, 64> WorkListOrder; 109 110 // AA - Used for DAG load/store alias analysis. 111 AliasAnalysis &AA; 112 113 /// AddUsersToWorkList - When an instruction is simplified, add all users of 114 /// the instruction to the work lists because they might get more simplified 115 /// now. 116 /// 117 void AddUsersToWorkList(SDNode *N) { 118 for (SDNode *Node : N->uses()) 119 AddToWorkList(Node); 120 } 121 122 /// visit - call the node-specific routine that knows how to fold each 123 /// particular type of node. 124 SDValue visit(SDNode *N); 125 126 public: 127 /// AddToWorkList - Add to the work list making sure its instance is at the 128 /// back (next to be processed.) 129 void AddToWorkList(SDNode *N) { 130 WorkListContents.insert(N); 131 WorkListOrder.push_back(N); 132 } 133 134 /// removeFromWorkList - remove all instances of N from the worklist. 135 /// 136 void removeFromWorkList(SDNode *N) { 137 WorkListContents.erase(N); 138 } 139 140 SDValue CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 141 bool AddTo = true); 142 143 SDValue CombineTo(SDNode *N, SDValue Res, bool AddTo = true) { 144 return CombineTo(N, &Res, 1, AddTo); 145 } 146 147 SDValue CombineTo(SDNode *N, SDValue Res0, SDValue Res1, 148 bool AddTo = true) { 149 SDValue To[] = { Res0, Res1 }; 150 return CombineTo(N, To, 2, AddTo); 151 } 152 153 void CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO); 154 155 private: 156 157 /// SimplifyDemandedBits - Check the specified integer node value to see if 158 /// it can be simplified or if things it uses can be simplified by bit 159 /// propagation. If so, return true. 160 bool SimplifyDemandedBits(SDValue Op) { 161 unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); 162 APInt Demanded = APInt::getAllOnesValue(BitWidth); 163 return SimplifyDemandedBits(Op, Demanded); 164 } 165 166 bool SimplifyDemandedBits(SDValue Op, const APInt &Demanded); 167 168 bool CombineToPreIndexedLoadStore(SDNode *N); 169 bool CombineToPostIndexedLoadStore(SDNode *N); 170 bool SliceUpLoad(SDNode *N); 171 172 /// \brief Replace an ISD::EXTRACT_VECTOR_ELT of a load with a narrowed 173 /// load. 174 /// 175 /// \param EVE ISD::EXTRACT_VECTOR_ELT to be replaced. 176 /// \param InVecVT type of the input vector to EVE with bitcasts resolved. 177 /// \param EltNo index of the vector element to load. 178 /// \param OriginalLoad load that EVE came from to be replaced. 179 /// \returns EVE on success SDValue() on failure. 180 SDValue ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 181 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad); 182 void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); 183 SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); 184 SDValue SExtPromoteOperand(SDValue Op, EVT PVT); 185 SDValue ZExtPromoteOperand(SDValue Op, EVT PVT); 186 SDValue PromoteIntBinOp(SDValue Op); 187 SDValue PromoteIntShiftOp(SDValue Op); 188 SDValue PromoteExtend(SDValue Op); 189 bool PromoteLoad(SDValue Op); 190 191 void ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 192 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 193 ISD::NodeType ExtType); 194 195 /// combine - call the node-specific routine that knows how to fold each 196 /// particular type of node. If that doesn't do anything, try the 197 /// target-specific DAG combines. 198 SDValue combine(SDNode *N); 199 200 // Visitation implementation - Implement dag node combining for different 201 // node types. The semantics are as follows: 202 // Return Value: 203 // SDValue.getNode() == 0 - No change was made 204 // SDValue.getNode() == N - N was replaced, is dead and has been handled. 205 // otherwise - N should be replaced by the returned Operand. 206 // 207 SDValue visitTokenFactor(SDNode *N); 208 SDValue visitMERGE_VALUES(SDNode *N); 209 SDValue visitADD(SDNode *N); 210 SDValue visitSUB(SDNode *N); 211 SDValue visitADDC(SDNode *N); 212 SDValue visitSUBC(SDNode *N); 213 SDValue visitADDE(SDNode *N); 214 SDValue visitSUBE(SDNode *N); 215 SDValue visitMUL(SDNode *N); 216 SDValue visitSDIV(SDNode *N); 217 SDValue visitUDIV(SDNode *N); 218 SDValue visitSREM(SDNode *N); 219 SDValue visitUREM(SDNode *N); 220 SDValue visitMULHU(SDNode *N); 221 SDValue visitMULHS(SDNode *N); 222 SDValue visitSMUL_LOHI(SDNode *N); 223 SDValue visitUMUL_LOHI(SDNode *N); 224 SDValue visitSMULO(SDNode *N); 225 SDValue visitUMULO(SDNode *N); 226 SDValue visitSDIVREM(SDNode *N); 227 SDValue visitUDIVREM(SDNode *N); 228 SDValue visitAND(SDNode *N); 229 SDValue visitOR(SDNode *N); 230 SDValue visitXOR(SDNode *N); 231 SDValue SimplifyVBinOp(SDNode *N); 232 SDValue SimplifyVUnaryOp(SDNode *N); 233 SDValue visitSHL(SDNode *N); 234 SDValue visitSRA(SDNode *N); 235 SDValue visitSRL(SDNode *N); 236 SDValue visitRotate(SDNode *N); 237 SDValue visitCTLZ(SDNode *N); 238 SDValue visitCTLZ_ZERO_UNDEF(SDNode *N); 239 SDValue visitCTTZ(SDNode *N); 240 SDValue visitCTTZ_ZERO_UNDEF(SDNode *N); 241 SDValue visitCTPOP(SDNode *N); 242 SDValue visitSELECT(SDNode *N); 243 SDValue visitVSELECT(SDNode *N); 244 SDValue visitSELECT_CC(SDNode *N); 245 SDValue visitSETCC(SDNode *N); 246 SDValue visitSIGN_EXTEND(SDNode *N); 247 SDValue visitZERO_EXTEND(SDNode *N); 248 SDValue visitANY_EXTEND(SDNode *N); 249 SDValue visitSIGN_EXTEND_INREG(SDNode *N); 250 SDValue visitTRUNCATE(SDNode *N); 251 SDValue visitBITCAST(SDNode *N); 252 SDValue visitBUILD_PAIR(SDNode *N); 253 SDValue visitFADD(SDNode *N); 254 SDValue visitFSUB(SDNode *N); 255 SDValue visitFMUL(SDNode *N); 256 SDValue visitFMA(SDNode *N); 257 SDValue visitFDIV(SDNode *N); 258 SDValue visitFREM(SDNode *N); 259 SDValue visitFCOPYSIGN(SDNode *N); 260 SDValue visitSINT_TO_FP(SDNode *N); 261 SDValue visitUINT_TO_FP(SDNode *N); 262 SDValue visitFP_TO_SINT(SDNode *N); 263 SDValue visitFP_TO_UINT(SDNode *N); 264 SDValue visitFP_ROUND(SDNode *N); 265 SDValue visitFP_ROUND_INREG(SDNode *N); 266 SDValue visitFP_EXTEND(SDNode *N); 267 SDValue visitFNEG(SDNode *N); 268 SDValue visitFABS(SDNode *N); 269 SDValue visitFCEIL(SDNode *N); 270 SDValue visitFTRUNC(SDNode *N); 271 SDValue visitFFLOOR(SDNode *N); 272 SDValue visitBRCOND(SDNode *N); 273 SDValue visitBR_CC(SDNode *N); 274 SDValue visitLOAD(SDNode *N); 275 SDValue visitSTORE(SDNode *N); 276 SDValue visitINSERT_VECTOR_ELT(SDNode *N); 277 SDValue visitEXTRACT_VECTOR_ELT(SDNode *N); 278 SDValue visitBUILD_VECTOR(SDNode *N); 279 SDValue visitCONCAT_VECTORS(SDNode *N); 280 SDValue visitEXTRACT_SUBVECTOR(SDNode *N); 281 SDValue visitVECTOR_SHUFFLE(SDNode *N); 282 SDValue visitINSERT_SUBVECTOR(SDNode *N); 283 284 SDValue XformToShuffleWithZero(SDNode *N); 285 SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); 286 287 SDValue visitShiftByConstant(SDNode *N, ConstantSDNode *Amt); 288 289 bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); 290 SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); 291 SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); 292 SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, 293 SDValue N3, ISD::CondCode CC, 294 bool NotExtCompare = false); 295 SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, 296 SDLoc DL, bool foldBooleans = true); 297 298 bool isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 299 SDValue &CC) const; 300 bool isOneUseSetCC(SDValue N) const; 301 302 SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 303 unsigned HiOp); 304 SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); 305 SDValue ConstantFoldBITCASTofBUILD_VECTOR(SDNode *, EVT); 306 SDValue BuildSDIV(SDNode *N); 307 SDValue BuildUDIV(SDNode *N); 308 SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 309 bool DemandHighBits = true); 310 SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); 311 SDNode *MatchRotatePosNeg(SDValue Shifted, SDValue Pos, SDValue Neg, 312 SDValue InnerPos, SDValue InnerNeg, 313 unsigned PosOpcode, unsigned NegOpcode, 314 SDLoc DL); 315 SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); 316 SDValue ReduceLoadWidth(SDNode *N); 317 SDValue ReduceLoadOpStoreWidth(SDNode *N); 318 SDValue TransformFPLoadStorePair(SDNode *N); 319 SDValue reduceBuildVecExtToExtBuildVec(SDNode *N); 320 SDValue reduceBuildVecConvertToConvertBuildVec(SDNode *N); 321 322 SDValue GetDemandedBits(SDValue V, const APInt &Mask); 323 324 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 325 /// looking for aliasing nodes and adding them to the Aliases vector. 326 void GatherAllAliases(SDNode *N, SDValue OriginalChain, 327 SmallVectorImpl<SDValue> &Aliases); 328 329 /// isAlias - Return true if there is any possibility that the two addresses 330 /// overlap. 331 bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const; 332 333 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, 334 /// looking for a better chain (aliasing node.) 335 SDValue FindBetterChain(SDNode *N, SDValue Chain); 336 337 /// Merge consecutive store operations into a wide store. 338 /// This optimization uses wide integers or vectors when possible. 339 /// \return True if some memory operations were changed. 340 bool MergeConsecutiveStores(StoreSDNode *N); 341 342 /// \brief Try to transform a truncation where C is a constant: 343 /// (trunc (and X, C)) -> (and (trunc X), (trunc C)) 344 /// 345 /// \p N needs to be a truncation and its first operand an AND. Other 346 /// requirements are checked by the function (e.g. that trunc is 347 /// single-use) and if missed an empty SDValue is returned. 348 SDValue distributeTruncateThroughAnd(SDNode *N); 349 350 public: 351 DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) 352 : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), 353 OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { 354 AttributeSet FnAttrs = 355 DAG.getMachineFunction().getFunction()->getAttributes(); 356 ForCodeSize = 357 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, 358 Attribute::OptimizeForSize) || 359 FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); 360 } 361 362 /// Run - runs the dag combiner on all nodes in the work list 363 void Run(CombineLevel AtLevel); 364 365 SelectionDAG &getDAG() const { return DAG; } 366 367 /// getShiftAmountTy - Returns a type large enough to hold any valid 368 /// shift amount - before type legalization these can be huge. 369 EVT getShiftAmountTy(EVT LHSTy) { 370 assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); 371 if (LHSTy.isVector()) 372 return LHSTy; 373 return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) 374 : TLI.getPointerTy(); 375 } 376 377 /// isTypeLegal - This method returns true if we are running before type 378 /// legalization or if the specified VT is legal. 379 bool isTypeLegal(const EVT &VT) { 380 if (!LegalTypes) return true; 381 return TLI.isTypeLegal(VT); 382 } 383 384 /// getSetCCResultType - Convenience wrapper around 385 /// TargetLowering::getSetCCResultType 386 EVT getSetCCResultType(EVT VT) const { 387 return TLI.getSetCCResultType(*DAG.getContext(), VT); 388 } 389 }; 390 } 391 392 393 namespace { 394 /// WorkListRemover - This class is a DAGUpdateListener that removes any deleted 395 /// nodes from the worklist. 396 class WorkListRemover : public SelectionDAG::DAGUpdateListener { 397 DAGCombiner &DC; 398 public: 399 explicit WorkListRemover(DAGCombiner &dc) 400 : SelectionDAG::DAGUpdateListener(dc.getDAG()), DC(dc) {} 401 402 void NodeDeleted(SDNode *N, SDNode *E) override { 403 DC.removeFromWorkList(N); 404 } 405 }; 406 } 407 408 //===----------------------------------------------------------------------===// 409 // TargetLowering::DAGCombinerInfo implementation 410 //===----------------------------------------------------------------------===// 411 412 void TargetLowering::DAGCombinerInfo::AddToWorklist(SDNode *N) { 413 ((DAGCombiner*)DC)->AddToWorkList(N); 414 } 415 416 void TargetLowering::DAGCombinerInfo::RemoveFromWorklist(SDNode *N) { 417 ((DAGCombiner*)DC)->removeFromWorkList(N); 418 } 419 420 SDValue TargetLowering::DAGCombinerInfo:: 421 CombineTo(SDNode *N, const std::vector<SDValue> &To, bool AddTo) { 422 return ((DAGCombiner*)DC)->CombineTo(N, &To[0], To.size(), AddTo); 423 } 424 425 SDValue TargetLowering::DAGCombinerInfo:: 426 CombineTo(SDNode *N, SDValue Res, bool AddTo) { 427 return ((DAGCombiner*)DC)->CombineTo(N, Res, AddTo); 428 } 429 430 431 SDValue TargetLowering::DAGCombinerInfo:: 432 CombineTo(SDNode *N, SDValue Res0, SDValue Res1, bool AddTo) { 433 return ((DAGCombiner*)DC)->CombineTo(N, Res0, Res1, AddTo); 434 } 435 436 void TargetLowering::DAGCombinerInfo:: 437 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 438 return ((DAGCombiner*)DC)->CommitTargetLoweringOpt(TLO); 439 } 440 441 //===----------------------------------------------------------------------===// 442 // Helper Functions 443 //===----------------------------------------------------------------------===// 444 445 /// isNegatibleForFree - Return 1 if we can compute the negated form of the 446 /// specified expression for the same cost as the expression itself, or 2 if we 447 /// can compute the negated form more cheaply than the expression itself. 448 static char isNegatibleForFree(SDValue Op, bool LegalOperations, 449 const TargetLowering &TLI, 450 const TargetOptions *Options, 451 unsigned Depth = 0) { 452 // fneg is removable even if it has multiple uses. 453 if (Op.getOpcode() == ISD::FNEG) return 2; 454 455 // Don't allow anything with multiple uses. 456 if (!Op.hasOneUse()) return 0; 457 458 // Don't recurse exponentially. 459 if (Depth > 6) return 0; 460 461 switch (Op.getOpcode()) { 462 default: return false; 463 case ISD::ConstantFP: 464 // Don't invert constant FP values after legalize. The negated constant 465 // isn't necessarily legal. 466 return LegalOperations ? 0 : 1; 467 case ISD::FADD: 468 // FIXME: determine better conditions for this xform. 469 if (!Options->UnsafeFPMath) return 0; 470 471 // After operation legalization, it might not be legal to create new FSUBs. 472 if (LegalOperations && 473 !TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) 474 return 0; 475 476 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 477 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 478 Options, Depth + 1)) 479 return V; 480 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 481 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 482 Depth + 1); 483 case ISD::FSUB: 484 // We can't turn -(A-B) into B-A when we honor signed zeros. 485 if (!Options->UnsafeFPMath) return 0; 486 487 // fold (fneg (fsub A, B)) -> (fsub B, A) 488 return 1; 489 490 case ISD::FMUL: 491 case ISD::FDIV: 492 if (Options->HonorSignDependentRoundingFPMath()) return 0; 493 494 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) or (fmul X, (fneg Y)) 495 if (char V = isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, 496 Options, Depth + 1)) 497 return V; 498 499 return isNegatibleForFree(Op.getOperand(1), LegalOperations, TLI, Options, 500 Depth + 1); 501 502 case ISD::FP_EXTEND: 503 case ISD::FP_ROUND: 504 case ISD::FSIN: 505 return isNegatibleForFree(Op.getOperand(0), LegalOperations, TLI, Options, 506 Depth + 1); 507 } 508 } 509 510 /// GetNegatedExpression - If isNegatibleForFree returns true, this function 511 /// returns the newly negated expression. 512 static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, 513 bool LegalOperations, unsigned Depth = 0) { 514 // fneg is removable even if it has multiple uses. 515 if (Op.getOpcode() == ISD::FNEG) return Op.getOperand(0); 516 517 // Don't allow anything with multiple uses. 518 assert(Op.hasOneUse() && "Unknown reuse!"); 519 520 assert(Depth <= 6 && "GetNegatedExpression doesn't match isNegatibleForFree"); 521 switch (Op.getOpcode()) { 522 default: llvm_unreachable("Unknown code"); 523 case ISD::ConstantFP: { 524 APFloat V = cast<ConstantFPSDNode>(Op)->getValueAPF(); 525 V.changeSign(); 526 return DAG.getConstantFP(V, Op.getValueType()); 527 } 528 case ISD::FADD: 529 // FIXME: determine better conditions for this xform. 530 assert(DAG.getTarget().Options.UnsafeFPMath); 531 532 // fold (fneg (fadd A, B)) -> (fsub (fneg A), B) 533 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 534 DAG.getTargetLoweringInfo(), 535 &DAG.getTarget().Options, Depth+1)) 536 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 537 GetNegatedExpression(Op.getOperand(0), DAG, 538 LegalOperations, Depth+1), 539 Op.getOperand(1)); 540 // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) 541 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 542 GetNegatedExpression(Op.getOperand(1), DAG, 543 LegalOperations, Depth+1), 544 Op.getOperand(0)); 545 case ISD::FSUB: 546 // We can't turn -(A-B) into B-A when we honor signed zeros. 547 assert(DAG.getTarget().Options.UnsafeFPMath); 548 549 // fold (fneg (fsub 0, B)) -> B 550 if (ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(Op.getOperand(0))) 551 if (N0CFP->getValueAPF().isZero()) 552 return Op.getOperand(1); 553 554 // fold (fneg (fsub A, B)) -> (fsub B, A) 555 return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), 556 Op.getOperand(1), Op.getOperand(0)); 557 558 case ISD::FMUL: 559 case ISD::FDIV: 560 assert(!DAG.getTarget().Options.HonorSignDependentRoundingFPMath()); 561 562 // fold (fneg (fmul X, Y)) -> (fmul (fneg X), Y) 563 if (isNegatibleForFree(Op.getOperand(0), LegalOperations, 564 DAG.getTargetLoweringInfo(), 565 &DAG.getTarget().Options, Depth+1)) 566 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 567 GetNegatedExpression(Op.getOperand(0), DAG, 568 LegalOperations, Depth+1), 569 Op.getOperand(1)); 570 571 // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) 572 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 573 Op.getOperand(0), 574 GetNegatedExpression(Op.getOperand(1), DAG, 575 LegalOperations, Depth+1)); 576 577 case ISD::FP_EXTEND: 578 case ISD::FSIN: 579 return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), 580 GetNegatedExpression(Op.getOperand(0), DAG, 581 LegalOperations, Depth+1)); 582 case ISD::FP_ROUND: 583 return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), 584 GetNegatedExpression(Op.getOperand(0), DAG, 585 LegalOperations, Depth+1), 586 Op.getOperand(1)); 587 } 588 } 589 590 // isSetCCEquivalent - Return true if this node is a setcc, or is a select_cc 591 // that selects between the target values used for true and false, making it 592 // equivalent to a setcc. Also, set the incoming LHS, RHS, and CC references to 593 // the appropriate nodes based on the type of node we are checking. This 594 // simplifies life a bit for the callers. 595 bool DAGCombiner::isSetCCEquivalent(SDValue N, SDValue &LHS, SDValue &RHS, 596 SDValue &CC) const { 597 if (N.getOpcode() == ISD::SETCC) { 598 LHS = N.getOperand(0); 599 RHS = N.getOperand(1); 600 CC = N.getOperand(2); 601 return true; 602 } 603 604 if (N.getOpcode() != ISD::SELECT_CC || 605 !TLI.isConstTrueVal(N.getOperand(2).getNode()) || 606 !TLI.isConstFalseVal(N.getOperand(3).getNode())) 607 return false; 608 609 LHS = N.getOperand(0); 610 RHS = N.getOperand(1); 611 CC = N.getOperand(4); 612 return true; 613 } 614 615 // isOneUseSetCC - Return true if this is a SetCC-equivalent operation with only 616 // one use. If this is true, it allows the users to invert the operation for 617 // free when it is profitable to do so. 618 bool DAGCombiner::isOneUseSetCC(SDValue N) const { 619 SDValue N0, N1, N2; 620 if (isSetCCEquivalent(N, N0, N1, N2) && N.getNode()->hasOneUse()) 621 return true; 622 return false; 623 } 624 625 /// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose 626 /// elements are all the same constant or undefined. 627 static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { 628 BuildVectorSDNode *C = dyn_cast<BuildVectorSDNode>(N); 629 if (!C) 630 return false; 631 632 APInt SplatUndef; 633 unsigned SplatBitSize; 634 bool HasAnyUndefs; 635 EVT EltVT = N->getValueType(0).getVectorElementType(); 636 return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, 637 HasAnyUndefs) && 638 EltVT.getSizeInBits() >= SplatBitSize); 639 } 640 641 // \brief Returns the SDNode if it is a constant BuildVector or constant. 642 static SDNode *isConstantBuildVectorOrConstantInt(SDValue N) { 643 if (isa<ConstantSDNode>(N)) 644 return N.getNode(); 645 BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N); 646 if(BV && BV->isConstant()) 647 return BV; 648 return nullptr; 649 } 650 651 // \brief Returns the SDNode if it is a constant splat BuildVector or constant 652 // int. 653 static ConstantSDNode *isConstOrConstSplat(SDValue N) { 654 if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) 655 return CN; 656 657 if (BuildVectorSDNode *BV = dyn_cast<BuildVectorSDNode>(N)) { 658 BitVector UndefElements; 659 ConstantSDNode *CN = BV->getConstantSplatNode(&UndefElements); 660 661 // BuildVectors can truncate their operands. Ignore that case here. 662 // FIXME: We blindly ignore splats which include undef which is overly 663 // pessimistic. 664 if (CN && UndefElements.none() && 665 CN->getValueType(0) == N.getValueType().getScalarType()) 666 return CN; 667 } 668 669 return nullptr; 670 } 671 672 SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, 673 SDValue N0, SDValue N1) { 674 EVT VT = N0.getValueType(); 675 if (N0.getOpcode() == Opc) { 676 if (SDNode *L = isConstantBuildVectorOrConstantInt(N0.getOperand(1))) { 677 if (SDNode *R = isConstantBuildVectorOrConstantInt(N1)) { 678 // reassoc. (op (op x, c1), c2) -> (op x, (op c1, c2)) 679 SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, L, R); 680 if (!OpNode.getNode()) 681 return SDValue(); 682 return DAG.getNode(Opc, DL, VT, N0.getOperand(0), OpNode); 683 } 684 if (N0.hasOneUse()) { 685 // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one 686 // use 687 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); 688 if (!OpNode.getNode()) 689 return SDValue(); 690 AddToWorkList(OpNode.getNode()); 691 return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); 692 } 693 } 694 } 695 696 if (N1.getOpcode() == Opc) { 697 if (SDNode *R = isConstantBuildVectorOrConstantInt(N1.getOperand(1))) { 698 if (SDNode *L = isConstantBuildVectorOrConstantInt(N0)) { 699 // reassoc. (op c2, (op x, c1)) -> (op x, (op c1, c2)) 700 SDValue OpNode = DAG.FoldConstantArithmetic(Opc, VT, R, L); 701 if (!OpNode.getNode()) 702 return SDValue(); 703 return DAG.getNode(Opc, DL, VT, N1.getOperand(0), OpNode); 704 } 705 if (N1.hasOneUse()) { 706 // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one 707 // use 708 SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); 709 if (!OpNode.getNode()) 710 return SDValue(); 711 AddToWorkList(OpNode.getNode()); 712 return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); 713 } 714 } 715 } 716 717 return SDValue(); 718 } 719 720 SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo, 721 bool AddTo) { 722 assert(N->getNumValues() == NumTo && "Broken CombineTo call!"); 723 ++NodesCombined; 724 DEBUG(dbgs() << "\nReplacing.1 "; 725 N->dump(&DAG); 726 dbgs() << "\nWith: "; 727 To[0].getNode()->dump(&DAG); 728 dbgs() << " and " << NumTo-1 << " other values\n"; 729 for (unsigned i = 0, e = NumTo; i != e; ++i) 730 assert((!To[i].getNode() || 731 N->getValueType(i) == To[i].getValueType()) && 732 "Cannot combine value to value of different type!")); 733 WorkListRemover DeadNodes(*this); 734 DAG.ReplaceAllUsesWith(N, To); 735 if (AddTo) { 736 // Push the new nodes and any users onto the worklist 737 for (unsigned i = 0, e = NumTo; i != e; ++i) { 738 if (To[i].getNode()) { 739 AddToWorkList(To[i].getNode()); 740 AddUsersToWorkList(To[i].getNode()); 741 } 742 } 743 } 744 745 // Finally, if the node is now dead, remove it from the graph. The node 746 // may not be dead if the replacement process recursively simplified to 747 // something else needing this node. 748 if (N->use_empty()) { 749 // Nodes can be reintroduced into the worklist. Make sure we do not 750 // process a node that has been replaced. 751 removeFromWorkList(N); 752 753 // Finally, since the node is now dead, remove it from the graph. 754 DAG.DeleteNode(N); 755 } 756 return SDValue(N, 0); 757 } 758 759 void DAGCombiner:: 760 CommitTargetLoweringOpt(const TargetLowering::TargetLoweringOpt &TLO) { 761 // Replace all uses. If any nodes become isomorphic to other nodes and 762 // are deleted, make sure to remove them from our worklist. 763 WorkListRemover DeadNodes(*this); 764 DAG.ReplaceAllUsesOfValueWith(TLO.Old, TLO.New); 765 766 // Push the new node and any (possibly new) users onto the worklist. 767 AddToWorkList(TLO.New.getNode()); 768 AddUsersToWorkList(TLO.New.getNode()); 769 770 // Finally, if the node is now dead, remove it from the graph. The node 771 // may not be dead if the replacement process recursively simplified to 772 // something else needing this node. 773 if (TLO.Old.getNode()->use_empty()) { 774 removeFromWorkList(TLO.Old.getNode()); 775 776 // If the operands of this node are only used by the node, they will now 777 // be dead. Make sure to visit them first to delete dead nodes early. 778 for (unsigned i = 0, e = TLO.Old.getNode()->getNumOperands(); i != e; ++i) 779 if (TLO.Old.getNode()->getOperand(i).getNode()->hasOneUse()) 780 AddToWorkList(TLO.Old.getNode()->getOperand(i).getNode()); 781 782 DAG.DeleteNode(TLO.Old.getNode()); 783 } 784 } 785 786 /// SimplifyDemandedBits - Check the specified integer node value to see if 787 /// it can be simplified or if things it uses can be simplified by bit 788 /// propagation. If so, return true. 789 bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { 790 TargetLowering::TargetLoweringOpt TLO(DAG, LegalTypes, LegalOperations); 791 APInt KnownZero, KnownOne; 792 if (!TLI.SimplifyDemandedBits(Op, Demanded, KnownZero, KnownOne, TLO)) 793 return false; 794 795 // Revisit the node. 796 AddToWorkList(Op.getNode()); 797 798 // Replace the old value with the new one. 799 ++NodesCombined; 800 DEBUG(dbgs() << "\nReplacing.2 "; 801 TLO.Old.getNode()->dump(&DAG); 802 dbgs() << "\nWith: "; 803 TLO.New.getNode()->dump(&DAG); 804 dbgs() << '\n'); 805 806 CommitTargetLoweringOpt(TLO); 807 return true; 808 } 809 810 void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { 811 SDLoc dl(Load); 812 EVT VT = Load->getValueType(0); 813 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); 814 815 DEBUG(dbgs() << "\nReplacing.9 "; 816 Load->dump(&DAG); 817 dbgs() << "\nWith: "; 818 Trunc.getNode()->dump(&DAG); 819 dbgs() << '\n'); 820 WorkListRemover DeadNodes(*this); 821 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 0), Trunc); 822 DAG.ReplaceAllUsesOfValueWith(SDValue(Load, 1), SDValue(ExtLoad, 1)); 823 removeFromWorkList(Load); 824 DAG.DeleteNode(Load); 825 AddToWorkList(Trunc.getNode()); 826 } 827 828 SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { 829 Replace = false; 830 SDLoc dl(Op); 831 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Op)) { 832 EVT MemVT = LD->getMemoryVT(); 833 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 834 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 835 : ISD::EXTLOAD) 836 : LD->getExtensionType(); 837 Replace = true; 838 return DAG.getExtLoad(ExtType, dl, PVT, 839 LD->getChain(), LD->getBasePtr(), 840 MemVT, LD->getMemOperand()); 841 } 842 843 unsigned Opc = Op.getOpcode(); 844 switch (Opc) { 845 default: break; 846 case ISD::AssertSext: 847 return DAG.getNode(ISD::AssertSext, dl, PVT, 848 SExtPromoteOperand(Op.getOperand(0), PVT), 849 Op.getOperand(1)); 850 case ISD::AssertZext: 851 return DAG.getNode(ISD::AssertZext, dl, PVT, 852 ZExtPromoteOperand(Op.getOperand(0), PVT), 853 Op.getOperand(1)); 854 case ISD::Constant: { 855 unsigned ExtOpc = 856 Op.getValueType().isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; 857 return DAG.getNode(ExtOpc, dl, PVT, Op); 858 } 859 } 860 861 if (!TLI.isOperationLegal(ISD::ANY_EXTEND, PVT)) 862 return SDValue(); 863 return DAG.getNode(ISD::ANY_EXTEND, dl, PVT, Op); 864 } 865 866 SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { 867 if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) 868 return SDValue(); 869 EVT OldVT = Op.getValueType(); 870 SDLoc dl(Op); 871 bool Replace = false; 872 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 873 if (!NewOp.getNode()) 874 return SDValue(); 875 AddToWorkList(NewOp.getNode()); 876 877 if (Replace) 878 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 879 return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, NewOp.getValueType(), NewOp, 880 DAG.getValueType(OldVT)); 881 } 882 883 SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { 884 EVT OldVT = Op.getValueType(); 885 SDLoc dl(Op); 886 bool Replace = false; 887 SDValue NewOp = PromoteOperand(Op, PVT, Replace); 888 if (!NewOp.getNode()) 889 return SDValue(); 890 AddToWorkList(NewOp.getNode()); 891 892 if (Replace) 893 ReplaceLoadWithPromotedLoad(Op.getNode(), NewOp.getNode()); 894 return DAG.getZeroExtendInReg(NewOp, dl, OldVT); 895 } 896 897 /// PromoteIntBinOp - Promote the specified integer binary operation if the 898 /// target indicates it is beneficial. e.g. On x86, it's usually better to 899 /// promote i16 operations to i32 since i16 instructions are longer. 900 SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { 901 if (!LegalOperations) 902 return SDValue(); 903 904 EVT VT = Op.getValueType(); 905 if (VT.isVector() || !VT.isInteger()) 906 return SDValue(); 907 908 // If operation type is 'undesirable', e.g. i16 on x86, consider 909 // promoting it. 910 unsigned Opc = Op.getOpcode(); 911 if (TLI.isTypeDesirableForOp(Opc, VT)) 912 return SDValue(); 913 914 EVT PVT = VT; 915 // Consult target whether it is a good idea to promote this operation and 916 // what's the right type to promote it to. 917 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 918 assert(PVT != VT && "Don't know what type to promote to!"); 919 920 bool Replace0 = false; 921 SDValue N0 = Op.getOperand(0); 922 SDValue NN0 = PromoteOperand(N0, PVT, Replace0); 923 if (!NN0.getNode()) 924 return SDValue(); 925 926 bool Replace1 = false; 927 SDValue N1 = Op.getOperand(1); 928 SDValue NN1; 929 if (N0 == N1) 930 NN1 = NN0; 931 else { 932 NN1 = PromoteOperand(N1, PVT, Replace1); 933 if (!NN1.getNode()) 934 return SDValue(); 935 } 936 937 AddToWorkList(NN0.getNode()); 938 if (NN1.getNode()) 939 AddToWorkList(NN1.getNode()); 940 941 if (Replace0) 942 ReplaceLoadWithPromotedLoad(N0.getNode(), NN0.getNode()); 943 if (Replace1) 944 ReplaceLoadWithPromotedLoad(N1.getNode(), NN1.getNode()); 945 946 DEBUG(dbgs() << "\nPromoting "; 947 Op.getNode()->dump(&DAG)); 948 SDLoc dl(Op); 949 return DAG.getNode(ISD::TRUNCATE, dl, VT, 950 DAG.getNode(Opc, dl, PVT, NN0, NN1)); 951 } 952 return SDValue(); 953 } 954 955 /// PromoteIntShiftOp - Promote the specified integer shift operation if the 956 /// target indicates it is beneficial. e.g. On x86, it's usually better to 957 /// promote i16 operations to i32 since i16 instructions are longer. 958 SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { 959 if (!LegalOperations) 960 return SDValue(); 961 962 EVT VT = Op.getValueType(); 963 if (VT.isVector() || !VT.isInteger()) 964 return SDValue(); 965 966 // If operation type is 'undesirable', e.g. i16 on x86, consider 967 // promoting it. 968 unsigned Opc = Op.getOpcode(); 969 if (TLI.isTypeDesirableForOp(Opc, VT)) 970 return SDValue(); 971 972 EVT PVT = VT; 973 // Consult target whether it is a good idea to promote this operation and 974 // what's the right type to promote it to. 975 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 976 assert(PVT != VT && "Don't know what type to promote to!"); 977 978 bool Replace = false; 979 SDValue N0 = Op.getOperand(0); 980 if (Opc == ISD::SRA) 981 N0 = SExtPromoteOperand(Op.getOperand(0), PVT); 982 else if (Opc == ISD::SRL) 983 N0 = ZExtPromoteOperand(Op.getOperand(0), PVT); 984 else 985 N0 = PromoteOperand(N0, PVT, Replace); 986 if (!N0.getNode()) 987 return SDValue(); 988 989 AddToWorkList(N0.getNode()); 990 if (Replace) 991 ReplaceLoadWithPromotedLoad(Op.getOperand(0).getNode(), N0.getNode()); 992 993 DEBUG(dbgs() << "\nPromoting "; 994 Op.getNode()->dump(&DAG)); 995 SDLoc dl(Op); 996 return DAG.getNode(ISD::TRUNCATE, dl, VT, 997 DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); 998 } 999 return SDValue(); 1000 } 1001 1002 SDValue DAGCombiner::PromoteExtend(SDValue Op) { 1003 if (!LegalOperations) 1004 return SDValue(); 1005 1006 EVT VT = Op.getValueType(); 1007 if (VT.isVector() || !VT.isInteger()) 1008 return SDValue(); 1009 1010 // If operation type is 'undesirable', e.g. i16 on x86, consider 1011 // promoting it. 1012 unsigned Opc = Op.getOpcode(); 1013 if (TLI.isTypeDesirableForOp(Opc, VT)) 1014 return SDValue(); 1015 1016 EVT PVT = VT; 1017 // Consult target whether it is a good idea to promote this operation and 1018 // what's the right type to promote it to. 1019 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1020 assert(PVT != VT && "Don't know what type to promote to!"); 1021 // fold (aext (aext x)) -> (aext x) 1022 // fold (aext (zext x)) -> (zext x) 1023 // fold (aext (sext x)) -> (sext x) 1024 DEBUG(dbgs() << "\nPromoting "; 1025 Op.getNode()->dump(&DAG)); 1026 return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); 1027 } 1028 return SDValue(); 1029 } 1030 1031 bool DAGCombiner::PromoteLoad(SDValue Op) { 1032 if (!LegalOperations) 1033 return false; 1034 1035 EVT VT = Op.getValueType(); 1036 if (VT.isVector() || !VT.isInteger()) 1037 return false; 1038 1039 // If operation type is 'undesirable', e.g. i16 on x86, consider 1040 // promoting it. 1041 unsigned Opc = Op.getOpcode(); 1042 if (TLI.isTypeDesirableForOp(Opc, VT)) 1043 return false; 1044 1045 EVT PVT = VT; 1046 // Consult target whether it is a good idea to promote this operation and 1047 // what's the right type to promote it to. 1048 if (TLI.IsDesirableToPromoteOp(Op, PVT)) { 1049 assert(PVT != VT && "Don't know what type to promote to!"); 1050 1051 SDLoc dl(Op); 1052 SDNode *N = Op.getNode(); 1053 LoadSDNode *LD = cast<LoadSDNode>(N); 1054 EVT MemVT = LD->getMemoryVT(); 1055 ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) 1056 ? (TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT) ? ISD::ZEXTLOAD 1057 : ISD::EXTLOAD) 1058 : LD->getExtensionType(); 1059 SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, 1060 LD->getChain(), LD->getBasePtr(), 1061 MemVT, LD->getMemOperand()); 1062 SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); 1063 1064 DEBUG(dbgs() << "\nPromoting "; 1065 N->dump(&DAG); 1066 dbgs() << "\nTo: "; 1067 Result.getNode()->dump(&DAG); 1068 dbgs() << '\n'); 1069 WorkListRemover DeadNodes(*this); 1070 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result); 1071 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), NewLD.getValue(1)); 1072 removeFromWorkList(N); 1073 DAG.DeleteNode(N); 1074 AddToWorkList(Result.getNode()); 1075 return true; 1076 } 1077 return false; 1078 } 1079 1080 1081 //===----------------------------------------------------------------------===// 1082 // Main DAG Combiner implementation 1083 //===----------------------------------------------------------------------===// 1084 1085 void DAGCombiner::Run(CombineLevel AtLevel) { 1086 // set the instance variables, so that the various visit routines may use it. 1087 Level = AtLevel; 1088 LegalOperations = Level >= AfterLegalizeVectorOps; 1089 LegalTypes = Level >= AfterLegalizeTypes; 1090 1091 // Add all the dag nodes to the worklist. 1092 for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), 1093 E = DAG.allnodes_end(); I != E; ++I) 1094 AddToWorkList(I); 1095 1096 // Create a dummy node (which is not added to allnodes), that adds a reference 1097 // to the root node, preventing it from being deleted, and tracking any 1098 // changes of the root. 1099 HandleSDNode Dummy(DAG.getRoot()); 1100 1101 // The root of the dag may dangle to deleted nodes until the dag combiner is 1102 // done. Set it to null to avoid confusion. 1103 DAG.setRoot(SDValue()); 1104 1105 // while the worklist isn't empty, find a node and 1106 // try and combine it. 1107 while (!WorkListContents.empty()) { 1108 SDNode *N; 1109 // The WorkListOrder holds the SDNodes in order, but it may contain 1110 // duplicates. 1111 // In order to avoid a linear scan, we use a set (O(log N)) to hold what the 1112 // worklist *should* contain, and check the node we want to visit is should 1113 // actually be visited. 1114 do { 1115 N = WorkListOrder.pop_back_val(); 1116 } while (!WorkListContents.erase(N)); 1117 1118 // If N has no uses, it is dead. Make sure to revisit all N's operands once 1119 // N is deleted from the DAG, since they too may now be dead or may have a 1120 // reduced number of uses, allowing other xforms. 1121 if (N->use_empty() && N != &Dummy) { 1122 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1123 AddToWorkList(N->getOperand(i).getNode()); 1124 1125 DAG.DeleteNode(N); 1126 continue; 1127 } 1128 1129 SDValue RV = combine(N); 1130 1131 if (!RV.getNode()) 1132 continue; 1133 1134 ++NodesCombined; 1135 1136 // If we get back the same node we passed in, rather than a new node or 1137 // zero, we know that the node must have defined multiple values and 1138 // CombineTo was used. Since CombineTo takes care of the worklist 1139 // mechanics for us, we have no work to do in this case. 1140 if (RV.getNode() == N) 1141 continue; 1142 1143 assert(N->getOpcode() != ISD::DELETED_NODE && 1144 RV.getNode()->getOpcode() != ISD::DELETED_NODE && 1145 "Node was deleted but visit returned new node!"); 1146 1147 DEBUG(dbgs() << "\nReplacing.3 "; 1148 N->dump(&DAG); 1149 dbgs() << "\nWith: "; 1150 RV.getNode()->dump(&DAG); 1151 dbgs() << '\n'); 1152 1153 // Transfer debug value. 1154 DAG.TransferDbgValues(SDValue(N, 0), RV); 1155 WorkListRemover DeadNodes(*this); 1156 if (N->getNumValues() == RV.getNode()->getNumValues()) 1157 DAG.ReplaceAllUsesWith(N, RV.getNode()); 1158 else { 1159 assert(N->getValueType(0) == RV.getValueType() && 1160 N->getNumValues() == 1 && "Type mismatch"); 1161 SDValue OpV = RV; 1162 DAG.ReplaceAllUsesWith(N, &OpV); 1163 } 1164 1165 // Push the new node and any users onto the worklist 1166 AddToWorkList(RV.getNode()); 1167 AddUsersToWorkList(RV.getNode()); 1168 1169 // Add any uses of the old node to the worklist in case this node is the 1170 // last one that uses them. They may become dead after this node is 1171 // deleted. 1172 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1173 AddToWorkList(N->getOperand(i).getNode()); 1174 1175 // Finally, if the node is now dead, remove it from the graph. The node 1176 // may not be dead if the replacement process recursively simplified to 1177 // something else needing this node. 1178 if (N->use_empty()) { 1179 // Nodes can be reintroduced into the worklist. Make sure we do not 1180 // process a node that has been replaced. 1181 removeFromWorkList(N); 1182 1183 // Finally, since the node is now dead, remove it from the graph. 1184 DAG.DeleteNode(N); 1185 } 1186 } 1187 1188 // If the root changed (e.g. it was a dead load, update the root). 1189 DAG.setRoot(Dummy.getValue()); 1190 DAG.RemoveDeadNodes(); 1191 } 1192 1193 SDValue DAGCombiner::visit(SDNode *N) { 1194 switch (N->getOpcode()) { 1195 default: break; 1196 case ISD::TokenFactor: return visitTokenFactor(N); 1197 case ISD::MERGE_VALUES: return visitMERGE_VALUES(N); 1198 case ISD::ADD: return visitADD(N); 1199 case ISD::SUB: return visitSUB(N); 1200 case ISD::ADDC: return visitADDC(N); 1201 case ISD::SUBC: return visitSUBC(N); 1202 case ISD::ADDE: return visitADDE(N); 1203 case ISD::SUBE: return visitSUBE(N); 1204 case ISD::MUL: return visitMUL(N); 1205 case ISD::SDIV: return visitSDIV(N); 1206 case ISD::UDIV: return visitUDIV(N); 1207 case ISD::SREM: return visitSREM(N); 1208 case ISD::UREM: return visitUREM(N); 1209 case ISD::MULHU: return visitMULHU(N); 1210 case ISD::MULHS: return visitMULHS(N); 1211 case ISD::SMUL_LOHI: return visitSMUL_LOHI(N); 1212 case ISD::UMUL_LOHI: return visitUMUL_LOHI(N); 1213 case ISD::SMULO: return visitSMULO(N); 1214 case ISD::UMULO: return visitUMULO(N); 1215 case ISD::SDIVREM: return visitSDIVREM(N); 1216 case ISD::UDIVREM: return visitUDIVREM(N); 1217 case ISD::AND: return visitAND(N); 1218 case ISD::OR: return visitOR(N); 1219 case ISD::XOR: return visitXOR(N); 1220 case ISD::SHL: return visitSHL(N); 1221 case ISD::SRA: return visitSRA(N); 1222 case ISD::SRL: return visitSRL(N); 1223 case ISD::ROTR: 1224 case ISD::ROTL: return visitRotate(N); 1225 case ISD::CTLZ: return visitCTLZ(N); 1226 case ISD::CTLZ_ZERO_UNDEF: return visitCTLZ_ZERO_UNDEF(N); 1227 case ISD::CTTZ: return visitCTTZ(N); 1228 case ISD::CTTZ_ZERO_UNDEF: return visitCTTZ_ZERO_UNDEF(N); 1229 case ISD::CTPOP: return visitCTPOP(N); 1230 case ISD::SELECT: return visitSELECT(N); 1231 case ISD::VSELECT: return visitVSELECT(N); 1232 case ISD::SELECT_CC: return visitSELECT_CC(N); 1233 case ISD::SETCC: return visitSETCC(N); 1234 case ISD::SIGN_EXTEND: return visitSIGN_EXTEND(N); 1235 case ISD::ZERO_EXTEND: return visitZERO_EXTEND(N); 1236 case ISD::ANY_EXTEND: return visitANY_EXTEND(N); 1237 case ISD::SIGN_EXTEND_INREG: return visitSIGN_EXTEND_INREG(N); 1238 case ISD::TRUNCATE: return visitTRUNCATE(N); 1239 case ISD::BITCAST: return visitBITCAST(N); 1240 case ISD::BUILD_PAIR: return visitBUILD_PAIR(N); 1241 case ISD::FADD: return visitFADD(N); 1242 case ISD::FSUB: return visitFSUB(N); 1243 case ISD::FMUL: return visitFMUL(N); 1244 case ISD::FMA: return visitFMA(N); 1245 case ISD::FDIV: return visitFDIV(N); 1246 case ISD::FREM: return visitFREM(N); 1247 case ISD::FCOPYSIGN: return visitFCOPYSIGN(N); 1248 case ISD::SINT_TO_FP: return visitSINT_TO_FP(N); 1249 case ISD::UINT_TO_FP: return visitUINT_TO_FP(N); 1250 case ISD::FP_TO_SINT: return visitFP_TO_SINT(N); 1251 case ISD::FP_TO_UINT: return visitFP_TO_UINT(N); 1252 case ISD::FP_ROUND: return visitFP_ROUND(N); 1253 case ISD::FP_ROUND_INREG: return visitFP_ROUND_INREG(N); 1254 case ISD::FP_EXTEND: return visitFP_EXTEND(N); 1255 case ISD::FNEG: return visitFNEG(N); 1256 case ISD::FABS: return visitFABS(N); 1257 case ISD::FFLOOR: return visitFFLOOR(N); 1258 case ISD::FCEIL: return visitFCEIL(N); 1259 case ISD::FTRUNC: return visitFTRUNC(N); 1260 case ISD::BRCOND: return visitBRCOND(N); 1261 case ISD::BR_CC: return visitBR_CC(N); 1262 case ISD::LOAD: return visitLOAD(N); 1263 case ISD::STORE: return visitSTORE(N); 1264 case ISD::INSERT_VECTOR_ELT: return visitINSERT_VECTOR_ELT(N); 1265 case ISD::EXTRACT_VECTOR_ELT: return visitEXTRACT_VECTOR_ELT(N); 1266 case ISD::BUILD_VECTOR: return visitBUILD_VECTOR(N); 1267 case ISD::CONCAT_VECTORS: return visitCONCAT_VECTORS(N); 1268 case ISD::EXTRACT_SUBVECTOR: return visitEXTRACT_SUBVECTOR(N); 1269 case ISD::VECTOR_SHUFFLE: return visitVECTOR_SHUFFLE(N); 1270 case ISD::INSERT_SUBVECTOR: return visitINSERT_SUBVECTOR(N); 1271 } 1272 return SDValue(); 1273 } 1274 1275 SDValue DAGCombiner::combine(SDNode *N) { 1276 SDValue RV = visit(N); 1277 1278 // If nothing happened, try a target-specific DAG combine. 1279 if (!RV.getNode()) { 1280 assert(N->getOpcode() != ISD::DELETED_NODE && 1281 "Node was deleted but visit returned NULL!"); 1282 1283 if (N->getOpcode() >= ISD::BUILTIN_OP_END || 1284 TLI.hasTargetDAGCombine((ISD::NodeType)N->getOpcode())) { 1285 1286 // Expose the DAG combiner to the target combiner impls. 1287 TargetLowering::DAGCombinerInfo 1288 DagCombineInfo(DAG, Level, false, this); 1289 1290 RV = TLI.PerformDAGCombine(N, DagCombineInfo); 1291 } 1292 } 1293 1294 // If nothing happened still, try promoting the operation. 1295 if (!RV.getNode()) { 1296 switch (N->getOpcode()) { 1297 default: break; 1298 case ISD::ADD: 1299 case ISD::SUB: 1300 case ISD::MUL: 1301 case ISD::AND: 1302 case ISD::OR: 1303 case ISD::XOR: 1304 RV = PromoteIntBinOp(SDValue(N, 0)); 1305 break; 1306 case ISD::SHL: 1307 case ISD::SRA: 1308 case ISD::SRL: 1309 RV = PromoteIntShiftOp(SDValue(N, 0)); 1310 break; 1311 case ISD::SIGN_EXTEND: 1312 case ISD::ZERO_EXTEND: 1313 case ISD::ANY_EXTEND: 1314 RV = PromoteExtend(SDValue(N, 0)); 1315 break; 1316 case ISD::LOAD: 1317 if (PromoteLoad(SDValue(N, 0))) 1318 RV = SDValue(N, 0); 1319 break; 1320 } 1321 } 1322 1323 // If N is a commutative binary node, try commuting it to enable more 1324 // sdisel CSE. 1325 if (!RV.getNode() && SelectionDAG::isCommutativeBinOp(N->getOpcode()) && 1326 N->getNumValues() == 1) { 1327 SDValue N0 = N->getOperand(0); 1328 SDValue N1 = N->getOperand(1); 1329 1330 // Constant operands are canonicalized to RHS. 1331 if (isa<ConstantSDNode>(N0) || !isa<ConstantSDNode>(N1)) { 1332 SDValue Ops[] = {N1, N0}; 1333 SDNode *CSENode; 1334 if (const BinaryWithFlagsSDNode *BinNode = 1335 dyn_cast<BinaryWithFlagsSDNode>(N)) { 1336 CSENode = DAG.getNodeIfExists( 1337 N->getOpcode(), N->getVTList(), Ops, BinNode->hasNoUnsignedWrap(), 1338 BinNode->hasNoSignedWrap(), BinNode->isExact()); 1339 } else { 1340 CSENode = DAG.getNodeIfExists(N->getOpcode(), N->getVTList(), Ops); 1341 } 1342 if (CSENode) 1343 return SDValue(CSENode, 0); 1344 } 1345 } 1346 1347 return RV; 1348 } 1349 1350 /// getInputChainForNode - Given a node, return its input chain if it has one, 1351 /// otherwise return a null sd operand. 1352 static SDValue getInputChainForNode(SDNode *N) { 1353 if (unsigned NumOps = N->getNumOperands()) { 1354 if (N->getOperand(0).getValueType() == MVT::Other) 1355 return N->getOperand(0); 1356 if (N->getOperand(NumOps-1).getValueType() == MVT::Other) 1357 return N->getOperand(NumOps-1); 1358 for (unsigned i = 1; i < NumOps-1; ++i) 1359 if (N->getOperand(i).getValueType() == MVT::Other) 1360 return N->getOperand(i); 1361 } 1362 return SDValue(); 1363 } 1364 1365 SDValue DAGCombiner::visitTokenFactor(SDNode *N) { 1366 // If N has two operands, where one has an input chain equal to the other, 1367 // the 'other' chain is redundant. 1368 if (N->getNumOperands() == 2) { 1369 if (getInputChainForNode(N->getOperand(0).getNode()) == N->getOperand(1)) 1370 return N->getOperand(0); 1371 if (getInputChainForNode(N->getOperand(1).getNode()) == N->getOperand(0)) 1372 return N->getOperand(1); 1373 } 1374 1375 SmallVector<SDNode *, 8> TFs; // List of token factors to visit. 1376 SmallVector<SDValue, 8> Ops; // Ops for replacing token factor. 1377 SmallPtrSet<SDNode*, 16> SeenOps; 1378 bool Changed = false; // If we should replace this token factor. 1379 1380 // Start out with this token factor. 1381 TFs.push_back(N); 1382 1383 // Iterate through token factors. The TFs grows when new token factors are 1384 // encountered. 1385 for (unsigned i = 0; i < TFs.size(); ++i) { 1386 SDNode *TF = TFs[i]; 1387 1388 // Check each of the operands. 1389 for (unsigned i = 0, ie = TF->getNumOperands(); i != ie; ++i) { 1390 SDValue Op = TF->getOperand(i); 1391 1392 switch (Op.getOpcode()) { 1393 case ISD::EntryToken: 1394 // Entry tokens don't need to be added to the list. They are 1395 // rededundant. 1396 Changed = true; 1397 break; 1398 1399 case ISD::TokenFactor: 1400 if (Op.hasOneUse() && 1401 std::find(TFs.begin(), TFs.end(), Op.getNode()) == TFs.end()) { 1402 // Queue up for processing. 1403 TFs.push_back(Op.getNode()); 1404 // Clean up in case the token factor is removed. 1405 AddToWorkList(Op.getNode()); 1406 Changed = true; 1407 break; 1408 } 1409 // Fall thru 1410 1411 default: 1412 // Only add if it isn't already in the list. 1413 if (SeenOps.insert(Op.getNode())) 1414 Ops.push_back(Op); 1415 else 1416 Changed = true; 1417 break; 1418 } 1419 } 1420 } 1421 1422 SDValue Result; 1423 1424 // If we've change things around then replace token factor. 1425 if (Changed) { 1426 if (Ops.empty()) { 1427 // The entry token is the only possible outcome. 1428 Result = DAG.getEntryNode(); 1429 } else { 1430 // New and improved token factor. 1431 Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Ops); 1432 } 1433 1434 // Don't add users to work list. 1435 return CombineTo(N, Result, false); 1436 } 1437 1438 return Result; 1439 } 1440 1441 /// MERGE_VALUES can always be eliminated. 1442 SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { 1443 WorkListRemover DeadNodes(*this); 1444 // Replacing results may cause a different MERGE_VALUES to suddenly 1445 // be CSE'd with N, and carry its uses with it. Iterate until no 1446 // uses remain, to ensure that the node can be safely deleted. 1447 // First add the users of this node to the work list so that they 1448 // can be tried again once they have new operands. 1449 AddUsersToWorkList(N); 1450 do { 1451 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) 1452 DAG.ReplaceAllUsesOfValueWith(SDValue(N, i), N->getOperand(i)); 1453 } while (!N->use_empty()); 1454 removeFromWorkList(N); 1455 DAG.DeleteNode(N); 1456 return SDValue(N, 0); // Return N so it doesn't get rechecked! 1457 } 1458 1459 static 1460 SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, 1461 SelectionDAG &DAG) { 1462 EVT VT = N0.getValueType(); 1463 SDValue N00 = N0.getOperand(0); 1464 SDValue N01 = N0.getOperand(1); 1465 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N01); 1466 1467 if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && 1468 isa<ConstantSDNode>(N00.getOperand(1))) { 1469 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1470 N0 = DAG.getNode(ISD::ADD, SDLoc(N0), VT, 1471 DAG.getNode(ISD::SHL, SDLoc(N00), VT, 1472 N00.getOperand(0), N01), 1473 DAG.getNode(ISD::SHL, SDLoc(N01), VT, 1474 N00.getOperand(1), N01)); 1475 return DAG.getNode(ISD::ADD, DL, VT, N0, N1); 1476 } 1477 1478 return SDValue(); 1479 } 1480 1481 SDValue DAGCombiner::visitADD(SDNode *N) { 1482 SDValue N0 = N->getOperand(0); 1483 SDValue N1 = N->getOperand(1); 1484 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1485 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1486 EVT VT = N0.getValueType(); 1487 1488 // fold vector ops 1489 if (VT.isVector()) { 1490 SDValue FoldedVOp = SimplifyVBinOp(N); 1491 if (FoldedVOp.getNode()) return FoldedVOp; 1492 1493 // fold (add x, 0) -> x, vector edition 1494 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1495 return N0; 1496 if (ISD::isBuildVectorAllZeros(N0.getNode())) 1497 return N1; 1498 } 1499 1500 // fold (add x, undef) -> undef 1501 if (N0.getOpcode() == ISD::UNDEF) 1502 return N0; 1503 if (N1.getOpcode() == ISD::UNDEF) 1504 return N1; 1505 // fold (add c1, c2) -> c1+c2 1506 if (N0C && N1C) 1507 return DAG.FoldConstantArithmetic(ISD::ADD, VT, N0C, N1C); 1508 // canonicalize constant to RHS 1509 if (N0C && !N1C) 1510 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); 1511 // fold (add x, 0) -> x 1512 if (N1C && N1C->isNullValue()) 1513 return N0; 1514 // fold (add Sym, c) -> Sym+c 1515 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1516 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && 1517 GA->getOpcode() == ISD::GlobalAddress) 1518 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1519 GA->getOffset() + 1520 (uint64_t)N1C->getSExtValue()); 1521 // fold ((c1-A)+c2) -> (c1+c2)-A 1522 if (N1C && N0.getOpcode() == ISD::SUB) 1523 if (ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getOperand(0))) 1524 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1525 DAG.getConstant(N1C->getAPIntValue()+ 1526 N0C->getAPIntValue(), VT), 1527 N0.getOperand(1)); 1528 // reassociate add 1529 SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); 1530 if (RADD.getNode()) 1531 return RADD; 1532 // fold ((0-A) + B) -> B-A 1533 if (N0.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N0.getOperand(0)) && 1534 cast<ConstantSDNode>(N0.getOperand(0))->isNullValue()) 1535 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); 1536 // fold (A + (0-B)) -> A-B 1537 if (N1.getOpcode() == ISD::SUB && isa<ConstantSDNode>(N1.getOperand(0)) && 1538 cast<ConstantSDNode>(N1.getOperand(0))->isNullValue()) 1539 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); 1540 // fold (A+(B-A)) -> B 1541 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) 1542 return N1.getOperand(0); 1543 // fold ((B-A)+A) -> B 1544 if (N0.getOpcode() == ISD::SUB && N1 == N0.getOperand(1)) 1545 return N0.getOperand(0); 1546 // fold (A+(B-(A+C))) to (B-C) 1547 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1548 N0 == N1.getOperand(1).getOperand(0)) 1549 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1550 N1.getOperand(1).getOperand(1)); 1551 // fold (A+(B-(C+A))) to (B-C) 1552 if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && 1553 N0 == N1.getOperand(1).getOperand(1)) 1554 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), 1555 N1.getOperand(1).getOperand(0)); 1556 // fold (A+((B-A)+or-C)) to (B+or-C) 1557 if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && 1558 N1.getOperand(0).getOpcode() == ISD::SUB && 1559 N0 == N1.getOperand(0).getOperand(1)) 1560 return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, 1561 N1.getOperand(0).getOperand(0), N1.getOperand(1)); 1562 1563 // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant 1564 if (N0.getOpcode() == ISD::SUB && N1.getOpcode() == ISD::SUB) { 1565 SDValue N00 = N0.getOperand(0); 1566 SDValue N01 = N0.getOperand(1); 1567 SDValue N10 = N1.getOperand(0); 1568 SDValue N11 = N1.getOperand(1); 1569 1570 if (isa<ConstantSDNode>(N00) || isa<ConstantSDNode>(N10)) 1571 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1572 DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), 1573 DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); 1574 } 1575 1576 if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) 1577 return SDValue(N, 0); 1578 1579 // fold (a+b) -> (a|b) iff a and b share no bits. 1580 if (VT.isInteger() && !VT.isVector()) { 1581 APInt LHSZero, LHSOne; 1582 APInt RHSZero, RHSOne; 1583 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1584 1585 if (LHSZero.getBoolValue()) { 1586 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1587 1588 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1589 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1590 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero){ 1591 if (!LegalOperations || TLI.isOperationLegal(ISD::OR, VT)) 1592 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); 1593 } 1594 } 1595 } 1596 1597 // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<<c2), ) 1598 if (N0.getOpcode() == ISD::SHL && N0.getNode()->hasOneUse()) { 1599 SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); 1600 if (Result.getNode()) return Result; 1601 } 1602 if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { 1603 SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); 1604 if (Result.getNode()) return Result; 1605 } 1606 1607 // fold (add x, shl(0 - y, n)) -> sub(x, shl(y, n)) 1608 if (N1.getOpcode() == ISD::SHL && 1609 N1.getOperand(0).getOpcode() == ISD::SUB) 1610 if (ConstantSDNode *C = 1611 dyn_cast<ConstantSDNode>(N1.getOperand(0).getOperand(0))) 1612 if (C->getAPIntValue() == 0) 1613 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, 1614 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1615 N1.getOperand(0).getOperand(1), 1616 N1.getOperand(1))); 1617 if (N0.getOpcode() == ISD::SHL && 1618 N0.getOperand(0).getOpcode() == ISD::SUB) 1619 if (ConstantSDNode *C = 1620 dyn_cast<ConstantSDNode>(N0.getOperand(0).getOperand(0))) 1621 if (C->getAPIntValue() == 0) 1622 return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, 1623 DAG.getNode(ISD::SHL, SDLoc(N), VT, 1624 N0.getOperand(0).getOperand(1), 1625 N0.getOperand(1))); 1626 1627 if (N1.getOpcode() == ISD::AND) { 1628 SDValue AndOp0 = N1.getOperand(0); 1629 ConstantSDNode *AndOp1 = dyn_cast<ConstantSDNode>(N1->getOperand(1)); 1630 unsigned NumSignBits = DAG.ComputeNumSignBits(AndOp0); 1631 unsigned DestBits = VT.getScalarType().getSizeInBits(); 1632 1633 // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) 1634 // and similar xforms where the inner op is either ~0 or 0. 1635 if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { 1636 SDLoc DL(N); 1637 return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); 1638 } 1639 } 1640 1641 // add (sext i1), X -> sub X, (zext i1) 1642 if (N0.getOpcode() == ISD::SIGN_EXTEND && 1643 N0.getOperand(0).getValueType() == MVT::i1 && 1644 !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { 1645 SDLoc DL(N); 1646 SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); 1647 return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); 1648 } 1649 1650 return SDValue(); 1651 } 1652 1653 SDValue DAGCombiner::visitADDC(SDNode *N) { 1654 SDValue N0 = N->getOperand(0); 1655 SDValue N1 = N->getOperand(1); 1656 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1657 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1658 EVT VT = N0.getValueType(); 1659 1660 // If the flag result is dead, turn this into an ADD. 1661 if (!N->hasAnyUseOfValue(1)) 1662 return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), 1663 DAG.getNode(ISD::CARRY_FALSE, 1664 SDLoc(N), MVT::Glue)); 1665 1666 // canonicalize constant to RHS. 1667 if (N0C && !N1C) 1668 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); 1669 1670 // fold (addc x, 0) -> x + no carry out 1671 if (N1C && N1C->isNullValue()) 1672 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, 1673 SDLoc(N), MVT::Glue)); 1674 1675 // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. 1676 APInt LHSZero, LHSOne; 1677 APInt RHSZero, RHSOne; 1678 DAG.computeKnownBits(N0, LHSZero, LHSOne); 1679 1680 if (LHSZero.getBoolValue()) { 1681 DAG.computeKnownBits(N1, RHSZero, RHSOne); 1682 1683 // If all possibly-set bits on the LHS are clear on the RHS, return an OR. 1684 // If all possibly-set bits on the RHS are clear on the LHS, return an OR. 1685 if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) 1686 return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), 1687 DAG.getNode(ISD::CARRY_FALSE, 1688 SDLoc(N), MVT::Glue)); 1689 } 1690 1691 return SDValue(); 1692 } 1693 1694 SDValue DAGCombiner::visitADDE(SDNode *N) { 1695 SDValue N0 = N->getOperand(0); 1696 SDValue N1 = N->getOperand(1); 1697 SDValue CarryIn = N->getOperand(2); 1698 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1699 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1700 1701 // canonicalize constant to RHS 1702 if (N0C && !N1C) 1703 return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), 1704 N1, N0, CarryIn); 1705 1706 // fold (adde x, y, false) -> (addc x, y) 1707 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1708 return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); 1709 1710 return SDValue(); 1711 } 1712 1713 // Since it may not be valid to emit a fold to zero for vector initializers 1714 // check if we can before folding. 1715 static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, 1716 SelectionDAG &DAG, 1717 bool LegalOperations, bool LegalTypes) { 1718 if (!VT.isVector()) 1719 return DAG.getConstant(0, VT); 1720 if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 1721 return DAG.getConstant(0, VT); 1722 return SDValue(); 1723 } 1724 1725 SDValue DAGCombiner::visitSUB(SDNode *N) { 1726 SDValue N0 = N->getOperand(0); 1727 SDValue N1 = N->getOperand(1); 1728 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0.getNode()); 1729 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 1730 ConstantSDNode *N1C1 = N1.getOpcode() != ISD::ADD ? nullptr : 1731 dyn_cast<ConstantSDNode>(N1.getOperand(1).getNode()); 1732 EVT VT = N0.getValueType(); 1733 1734 // fold vector ops 1735 if (VT.isVector()) { 1736 SDValue FoldedVOp = SimplifyVBinOp(N); 1737 if (FoldedVOp.getNode()) return FoldedVOp; 1738 1739 // fold (sub x, 0) -> x, vector edition 1740 if (ISD::isBuildVectorAllZeros(N1.getNode())) 1741 return N0; 1742 } 1743 1744 // fold (sub x, x) -> 0 1745 // FIXME: Refactor this and xor and other similar operations together. 1746 if (N0 == N1) 1747 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 1748 // fold (sub c1, c2) -> c1-c2 1749 if (N0C && N1C) 1750 return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); 1751 // fold (sub x, c) -> (add x, -c) 1752 if (N1C) 1753 return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, 1754 DAG.getConstant(-N1C->getAPIntValue(), VT)); 1755 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) 1756 if (N0C && N0C->isAllOnesValue()) 1757 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 1758 // fold A-(A-B) -> B 1759 if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) 1760 return N1.getOperand(1); 1761 // fold (A+B)-A -> B 1762 if (N0.getOpcode() == ISD::ADD && N0.getOperand(0) == N1) 1763 return N0.getOperand(1); 1764 // fold (A+B)-B -> A 1765 if (N0.getOpcode() == ISD::ADD && N0.getOperand(1) == N1) 1766 return N0.getOperand(0); 1767 // fold C2-(A+C1) -> (C2-C1)-A 1768 if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { 1769 SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), 1770 VT); 1771 return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, 1772 N1.getOperand(0)); 1773 } 1774 // fold ((A+(B+or-C))-B) -> A+or-C 1775 if (N0.getOpcode() == ISD::ADD && 1776 (N0.getOperand(1).getOpcode() == ISD::SUB || 1777 N0.getOperand(1).getOpcode() == ISD::ADD) && 1778 N0.getOperand(1).getOperand(0) == N1) 1779 return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, 1780 N0.getOperand(0), N0.getOperand(1).getOperand(1)); 1781 // fold ((A+(C+B))-B) -> A+C 1782 if (N0.getOpcode() == ISD::ADD && 1783 N0.getOperand(1).getOpcode() == ISD::ADD && 1784 N0.getOperand(1).getOperand(1) == N1) 1785 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1786 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1787 // fold ((A-(B-C))-C) -> A-B 1788 if (N0.getOpcode() == ISD::SUB && 1789 N0.getOperand(1).getOpcode() == ISD::SUB && 1790 N0.getOperand(1).getOperand(1) == N1) 1791 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1792 N0.getOperand(0), N0.getOperand(1).getOperand(0)); 1793 1794 // If either operand of a sub is undef, the result is undef 1795 if (N0.getOpcode() == ISD::UNDEF) 1796 return N0; 1797 if (N1.getOpcode() == ISD::UNDEF) 1798 return N1; 1799 1800 // If the relocation model supports it, consider symbol offsets. 1801 if (GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(N0)) 1802 if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { 1803 // fold (sub Sym, c) -> Sym-c 1804 if (N1C && GA->getOpcode() == ISD::GlobalAddress) 1805 return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, 1806 GA->getOffset() - 1807 (uint64_t)N1C->getSExtValue()); 1808 // fold (sub Sym+c1, Sym+c2) -> c1-c2 1809 if (GlobalAddressSDNode *GB = dyn_cast<GlobalAddressSDNode>(N1)) 1810 if (GA->getGlobal() == GB->getGlobal()) 1811 return DAG.getConstant((uint64_t)GA->getOffset() - GB->getOffset(), 1812 VT); 1813 } 1814 1815 return SDValue(); 1816 } 1817 1818 SDValue DAGCombiner::visitSUBC(SDNode *N) { 1819 SDValue N0 = N->getOperand(0); 1820 SDValue N1 = N->getOperand(1); 1821 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 1822 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 1823 EVT VT = N0.getValueType(); 1824 1825 // If the flag result is dead, turn this into an SUB. 1826 if (!N->hasAnyUseOfValue(1)) 1827 return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), 1828 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1829 MVT::Glue)); 1830 1831 // fold (subc x, x) -> 0 + no borrow 1832 if (N0 == N1) 1833 return CombineTo(N, DAG.getConstant(0, VT), 1834 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1835 MVT::Glue)); 1836 1837 // fold (subc x, 0) -> x + no borrow 1838 if (N1C && N1C->isNullValue()) 1839 return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1840 MVT::Glue)); 1841 1842 // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow 1843 if (N0C && N0C->isAllOnesValue()) 1844 return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), 1845 DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), 1846 MVT::Glue)); 1847 1848 return SDValue(); 1849 } 1850 1851 SDValue DAGCombiner::visitSUBE(SDNode *N) { 1852 SDValue N0 = N->getOperand(0); 1853 SDValue N1 = N->getOperand(1); 1854 SDValue CarryIn = N->getOperand(2); 1855 1856 // fold (sube x, y, false) -> (subc x, y) 1857 if (CarryIn.getOpcode() == ISD::CARRY_FALSE) 1858 return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); 1859 1860 return SDValue(); 1861 } 1862 1863 SDValue DAGCombiner::visitMUL(SDNode *N) { 1864 SDValue N0 = N->getOperand(0); 1865 SDValue N1 = N->getOperand(1); 1866 EVT VT = N0.getValueType(); 1867 1868 // fold (mul x, undef) -> 0 1869 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 1870 return DAG.getConstant(0, VT); 1871 1872 bool N0IsConst = false; 1873 bool N1IsConst = false; 1874 APInt ConstValue0, ConstValue1; 1875 // fold vector ops 1876 if (VT.isVector()) { 1877 SDValue FoldedVOp = SimplifyVBinOp(N); 1878 if (FoldedVOp.getNode()) return FoldedVOp; 1879 1880 N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); 1881 N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); 1882 } else { 1883 N0IsConst = dyn_cast<ConstantSDNode>(N0) != nullptr; 1884 ConstValue0 = N0IsConst ? (dyn_cast<ConstantSDNode>(N0))->getAPIntValue() 1885 : APInt(); 1886 N1IsConst = dyn_cast<ConstantSDNode>(N1) != nullptr; 1887 ConstValue1 = N1IsConst ? (dyn_cast<ConstantSDNode>(N1))->getAPIntValue() 1888 : APInt(); 1889 } 1890 1891 // fold (mul c1, c2) -> c1*c2 1892 if (N0IsConst && N1IsConst) 1893 return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); 1894 1895 // canonicalize constant to RHS 1896 if (N0IsConst && !N1IsConst) 1897 return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); 1898 // fold (mul x, 0) -> 0 1899 if (N1IsConst && ConstValue1 == 0) 1900 return N1; 1901 // We require a splat of the entire scalar bit width for non-contiguous 1902 // bit patterns. 1903 bool IsFullSplat = 1904 ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); 1905 // fold (mul x, 1) -> x 1906 if (N1IsConst && ConstValue1 == 1 && IsFullSplat) 1907 return N0; 1908 // fold (mul x, -1) -> 0-x 1909 if (N1IsConst && ConstValue1.isAllOnesValue()) 1910 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1911 DAG.getConstant(0, VT), N0); 1912 // fold (mul x, (1 << c)) -> x << c 1913 if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) 1914 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1915 DAG.getConstant(ConstValue1.logBase2(), 1916 getShiftAmountTy(N0.getValueType()))); 1917 // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c 1918 if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { 1919 unsigned Log2Val = (-ConstValue1).logBase2(); 1920 // FIXME: If the input is something that is easily negated (e.g. a 1921 // single-use add), we should put the negate there. 1922 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 1923 DAG.getConstant(0, VT), 1924 DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, 1925 DAG.getConstant(Log2Val, 1926 getShiftAmountTy(N0.getValueType())))); 1927 } 1928 1929 APInt Val; 1930 // (mul (shl X, c1), c2) -> (mul X, c2 << c1) 1931 if (N1IsConst && N0.getOpcode() == ISD::SHL && 1932 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 1933 isa<ConstantSDNode>(N0.getOperand(1)))) { 1934 SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, 1935 N1, N0.getOperand(1)); 1936 AddToWorkList(C3.getNode()); 1937 return DAG.getNode(ISD::MUL, SDLoc(N), VT, 1938 N0.getOperand(0), C3); 1939 } 1940 1941 // Change (mul (shl X, C), Y) -> (shl (mul X, Y), C) when the shift has one 1942 // use. 1943 { 1944 SDValue Sh(nullptr,0), Y(nullptr,0); 1945 // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). 1946 if (N0.getOpcode() == ISD::SHL && 1947 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 1948 isa<ConstantSDNode>(N0.getOperand(1))) && 1949 N0.getNode()->hasOneUse()) { 1950 Sh = N0; Y = N1; 1951 } else if (N1.getOpcode() == ISD::SHL && 1952 isa<ConstantSDNode>(N1.getOperand(1)) && 1953 N1.getNode()->hasOneUse()) { 1954 Sh = N1; Y = N0; 1955 } 1956 1957 if (Sh.getNode()) { 1958 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 1959 Sh.getOperand(0), Y); 1960 return DAG.getNode(ISD::SHL, SDLoc(N), VT, 1961 Mul, Sh.getOperand(1)); 1962 } 1963 } 1964 1965 // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) 1966 if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && 1967 (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || 1968 isa<ConstantSDNode>(N0.getOperand(1)))) 1969 return DAG.getNode(ISD::ADD, SDLoc(N), VT, 1970 DAG.getNode(ISD::MUL, SDLoc(N0), VT, 1971 N0.getOperand(0), N1), 1972 DAG.getNode(ISD::MUL, SDLoc(N1), VT, 1973 N0.getOperand(1), N1)); 1974 1975 // reassociate mul 1976 SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); 1977 if (RMUL.getNode()) 1978 return RMUL; 1979 1980 return SDValue(); 1981 } 1982 1983 SDValue DAGCombiner::visitSDIV(SDNode *N) { 1984 SDValue N0 = N->getOperand(0); 1985 SDValue N1 = N->getOperand(1); 1986 ConstantSDNode *N0C = isConstOrConstSplat(N0); 1987 ConstantSDNode *N1C = isConstOrConstSplat(N1); 1988 EVT VT = N->getValueType(0); 1989 1990 // fold vector ops 1991 if (VT.isVector()) { 1992 SDValue FoldedVOp = SimplifyVBinOp(N); 1993 if (FoldedVOp.getNode()) return FoldedVOp; 1994 } 1995 1996 // fold (sdiv c1, c2) -> c1/c2 1997 if (N0C && N1C && !N1C->isNullValue()) 1998 return DAG.FoldConstantArithmetic(ISD::SDIV, VT, N0C, N1C); 1999 // fold (sdiv X, 1) -> X 2000 if (N1C && N1C->getAPIntValue() == 1LL) 2001 return N0; 2002 // fold (sdiv X, -1) -> 0-X 2003 if (N1C && N1C->isAllOnesValue()) 2004 return DAG.getNode(ISD::SUB, SDLoc(N), VT, 2005 DAG.getConstant(0, VT), N0); 2006 // If we know the sign bits of both operands are zero, strength reduce to a 2007 // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 2008 if (!VT.isVector()) { 2009 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2010 return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), 2011 N0, N1); 2012 } 2013 2014 // fold (sdiv X, pow2) -> simple ops after legalize 2015 if (N1C && !N1C->isNullValue() && (N1C->getAPIntValue().isPowerOf2() || 2016 (-N1C->getAPIntValue()).isPowerOf2())) { 2017 // If dividing by powers of two is cheap, then don't perform the following 2018 // fold. 2019 if (TLI.isPow2DivCheap()) 2020 return SDValue(); 2021 2022 unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); 2023 2024 // Splat the sign bit into the register 2025 SDValue SGN = 2026 DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, 2027 DAG.getConstant(VT.getScalarSizeInBits() - 1, 2028 getShiftAmountTy(N0.getValueType()))); 2029 AddToWorkList(SGN.getNode()); 2030 2031 // Add (N0 < 0) ? abs2 - 1 : 0; 2032 SDValue SRL = 2033 DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, 2034 DAG.getConstant(VT.getScalarSizeInBits() - lg2, 2035 getShiftAmountTy(SGN.getValueType()))); 2036 SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); 2037 AddToWorkList(SRL.getNode()); 2038 AddToWorkList(ADD.getNode()); // Divide by pow2 2039 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, 2040 DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); 2041 2042 // If we're dividing by a positive value, we're done. Otherwise, we must 2043 // negate the result. 2044 if (N1C->getAPIntValue().isNonNegative()) 2045 return SRA; 2046 2047 AddToWorkList(SRA.getNode()); 2048 return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); 2049 } 2050 2051 // if integer divide is expensive and we satisfy the requirements, emit an 2052 // alternate sequence. 2053 if (N1C && !TLI.isIntDivCheap()) { 2054 SDValue Op = BuildSDIV(N); 2055 if (Op.getNode()) return Op; 2056 } 2057 2058 // undef / X -> 0 2059 if (N0.getOpcode() == ISD::UNDEF) 2060 return DAG.getConstant(0, VT); 2061 // X / undef -> undef 2062 if (N1.getOpcode() == ISD::UNDEF) 2063 return N1; 2064 2065 return SDValue(); 2066 } 2067 2068 SDValue DAGCombiner::visitUDIV(SDNode *N) { 2069 SDValue N0 = N->getOperand(0); 2070 SDValue N1 = N->getOperand(1); 2071 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2072 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2073 EVT VT = N->getValueType(0); 2074 2075 // fold vector ops 2076 if (VT.isVector()) { 2077 SDValue FoldedVOp = SimplifyVBinOp(N); 2078 if (FoldedVOp.getNode()) return FoldedVOp; 2079 } 2080 2081 // fold (udiv c1, c2) -> c1/c2 2082 if (N0C && N1C && !N1C->isNullValue()) 2083 return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); 2084 // fold (udiv x, (1 << c)) -> x >>u c 2085 if (N1C && N1C->getAPIntValue().isPowerOf2()) 2086 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, 2087 DAG.getConstant(N1C->getAPIntValue().logBase2(), 2088 getShiftAmountTy(N0.getValueType()))); 2089 // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 2090 if (N1.getOpcode() == ISD::SHL) { 2091 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2092 if (SHC->getAPIntValue().isPowerOf2()) { 2093 EVT ADDVT = N1.getOperand(1).getValueType(); 2094 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, 2095 N1.getOperand(1), 2096 DAG.getConstant(SHC->getAPIntValue() 2097 .logBase2(), 2098 ADDVT)); 2099 AddToWorkList(Add.getNode()); 2100 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); 2101 } 2102 } 2103 } 2104 // fold (udiv x, c) -> alternate 2105 if (N1C && !TLI.isIntDivCheap()) { 2106 SDValue Op = BuildUDIV(N); 2107 if (Op.getNode()) return Op; 2108 } 2109 2110 // undef / X -> 0 2111 if (N0.getOpcode() == ISD::UNDEF) 2112 return DAG.getConstant(0, VT); 2113 // X / undef -> undef 2114 if (N1.getOpcode() == ISD::UNDEF) 2115 return N1; 2116 2117 return SDValue(); 2118 } 2119 2120 SDValue DAGCombiner::visitSREM(SDNode *N) { 2121 SDValue N0 = N->getOperand(0); 2122 SDValue N1 = N->getOperand(1); 2123 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2124 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2125 EVT VT = N->getValueType(0); 2126 2127 // fold (srem c1, c2) -> c1%c2 2128 if (N0C && N1C && !N1C->isNullValue()) 2129 return DAG.FoldConstantArithmetic(ISD::SREM, VT, N0C, N1C); 2130 // If we know the sign bits of both operands are zero, strength reduce to a 2131 // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 2132 if (!VT.isVector()) { 2133 if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) 2134 return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); 2135 } 2136 2137 // If X/C can be simplified by the division-by-constant logic, lower 2138 // X%C to the equivalent of X-X/C*C. 2139 if (N1C && !N1C->isNullValue()) { 2140 SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); 2141 AddToWorkList(Div.getNode()); 2142 SDValue OptimizedDiv = combine(Div.getNode()); 2143 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2144 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2145 OptimizedDiv, N1); 2146 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2147 AddToWorkList(Mul.getNode()); 2148 return Sub; 2149 } 2150 } 2151 2152 // undef % X -> 0 2153 if (N0.getOpcode() == ISD::UNDEF) 2154 return DAG.getConstant(0, VT); 2155 // X % undef -> undef 2156 if (N1.getOpcode() == ISD::UNDEF) 2157 return N1; 2158 2159 return SDValue(); 2160 } 2161 2162 SDValue DAGCombiner::visitUREM(SDNode *N) { 2163 SDValue N0 = N->getOperand(0); 2164 SDValue N1 = N->getOperand(1); 2165 ConstantSDNode *N0C = isConstOrConstSplat(N0); 2166 ConstantSDNode *N1C = isConstOrConstSplat(N1); 2167 EVT VT = N->getValueType(0); 2168 2169 // fold (urem c1, c2) -> c1%c2 2170 if (N0C && N1C && !N1C->isNullValue()) 2171 return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); 2172 // fold (urem x, pow2) -> (and x, pow2-1) 2173 if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) 2174 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, 2175 DAG.getConstant(N1C->getAPIntValue()-1,VT)); 2176 // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) 2177 if (N1.getOpcode() == ISD::SHL) { 2178 if (ConstantSDNode *SHC = dyn_cast<ConstantSDNode>(N1.getOperand(0))) { 2179 if (SHC->getAPIntValue().isPowerOf2()) { 2180 SDValue Add = 2181 DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, 2182 DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), 2183 VT)); 2184 AddToWorkList(Add.getNode()); 2185 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); 2186 } 2187 } 2188 } 2189 2190 // If X/C can be simplified by the division-by-constant logic, lower 2191 // X%C to the equivalent of X-X/C*C. 2192 if (N1C && !N1C->isNullValue()) { 2193 SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); 2194 AddToWorkList(Div.getNode()); 2195 SDValue OptimizedDiv = combine(Div.getNode()); 2196 if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { 2197 SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, 2198 OptimizedDiv, N1); 2199 SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); 2200 AddToWorkList(Mul.getNode()); 2201 return Sub; 2202 } 2203 } 2204 2205 // undef % X -> 0 2206 if (N0.getOpcode() == ISD::UNDEF) 2207 return DAG.getConstant(0, VT); 2208 // X % undef -> undef 2209 if (N1.getOpcode() == ISD::UNDEF) 2210 return N1; 2211 2212 return SDValue(); 2213 } 2214 2215 SDValue DAGCombiner::visitMULHS(SDNode *N) { 2216 SDValue N0 = N->getOperand(0); 2217 SDValue N1 = N->getOperand(1); 2218 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2219 EVT VT = N->getValueType(0); 2220 SDLoc DL(N); 2221 2222 // fold (mulhs x, 0) -> 0 2223 if (N1C && N1C->isNullValue()) 2224 return N1; 2225 // fold (mulhs x, 1) -> (sra x, size(x)-1) 2226 if (N1C && N1C->getAPIntValue() == 1) 2227 return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, 2228 DAG.getConstant(N0.getValueType().getSizeInBits() - 1, 2229 getShiftAmountTy(N0.getValueType()))); 2230 // fold (mulhs x, undef) -> 0 2231 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2232 return DAG.getConstant(0, VT); 2233 2234 // If the type twice as wide is legal, transform the mulhs to a wider multiply 2235 // plus a shift. 2236 if (VT.isSimple() && !VT.isVector()) { 2237 MVT Simple = VT.getSimpleVT(); 2238 unsigned SimpleSize = Simple.getSizeInBits(); 2239 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2240 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2241 N0 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N0); 2242 N1 = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N1); 2243 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2244 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2245 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2246 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2247 } 2248 } 2249 2250 return SDValue(); 2251 } 2252 2253 SDValue DAGCombiner::visitMULHU(SDNode *N) { 2254 SDValue N0 = N->getOperand(0); 2255 SDValue N1 = N->getOperand(1); 2256 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2257 EVT VT = N->getValueType(0); 2258 SDLoc DL(N); 2259 2260 // fold (mulhu x, 0) -> 0 2261 if (N1C && N1C->isNullValue()) 2262 return N1; 2263 // fold (mulhu x, 1) -> 0 2264 if (N1C && N1C->getAPIntValue() == 1) 2265 return DAG.getConstant(0, N0.getValueType()); 2266 // fold (mulhu x, undef) -> 0 2267 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2268 return DAG.getConstant(0, VT); 2269 2270 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2271 // plus a shift. 2272 if (VT.isSimple() && !VT.isVector()) { 2273 MVT Simple = VT.getSimpleVT(); 2274 unsigned SimpleSize = Simple.getSizeInBits(); 2275 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2276 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2277 N0 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N0); 2278 N1 = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N1); 2279 N1 = DAG.getNode(ISD::MUL, DL, NewVT, N0, N1); 2280 N1 = DAG.getNode(ISD::SRL, DL, NewVT, N1, 2281 DAG.getConstant(SimpleSize, getShiftAmountTy(N1.getValueType()))); 2282 return DAG.getNode(ISD::TRUNCATE, DL, VT, N1); 2283 } 2284 } 2285 2286 return SDValue(); 2287 } 2288 2289 /// SimplifyNodeWithTwoResults - Perform optimizations common to nodes that 2290 /// compute two values. LoOp and HiOp give the opcodes for the two computations 2291 /// that are being performed. Return true if a simplification was made. 2292 /// 2293 SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, 2294 unsigned HiOp) { 2295 // If the high half is not needed, just compute the low half. 2296 bool HiExists = N->hasAnyUseOfValue(1); 2297 if (!HiExists && 2298 (!LegalOperations || 2299 TLI.isOperationLegalOrCustom(LoOp, N->getValueType(0)))) { 2300 SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), 2301 ArrayRef<SDUse>(N->op_begin(), N->op_end())); 2302 return CombineTo(N, Res, Res); 2303 } 2304 2305 // If the low half is not needed, just compute the high half. 2306 bool LoExists = N->hasAnyUseOfValue(0); 2307 if (!LoExists && 2308 (!LegalOperations || 2309 TLI.isOperationLegal(HiOp, N->getValueType(1)))) { 2310 SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), 2311 ArrayRef<SDUse>(N->op_begin(), N->op_end())); 2312 return CombineTo(N, Res, Res); 2313 } 2314 2315 // If both halves are used, return as it is. 2316 if (LoExists && HiExists) 2317 return SDValue(); 2318 2319 // If the two computed results can be simplified separately, separate them. 2320 if (LoExists) { 2321 SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), 2322 ArrayRef<SDUse>(N->op_begin(), N->op_end())); 2323 AddToWorkList(Lo.getNode()); 2324 SDValue LoOpt = combine(Lo.getNode()); 2325 if (LoOpt.getNode() && LoOpt.getNode() != Lo.getNode() && 2326 (!LegalOperations || 2327 TLI.isOperationLegal(LoOpt.getOpcode(), LoOpt.getValueType()))) 2328 return CombineTo(N, LoOpt, LoOpt); 2329 } 2330 2331 if (HiExists) { 2332 SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), 2333 ArrayRef<SDUse>(N->op_begin(), N->op_end())); 2334 AddToWorkList(Hi.getNode()); 2335 SDValue HiOpt = combine(Hi.getNode()); 2336 if (HiOpt.getNode() && HiOpt != Hi && 2337 (!LegalOperations || 2338 TLI.isOperationLegal(HiOpt.getOpcode(), HiOpt.getValueType()))) 2339 return CombineTo(N, HiOpt, HiOpt); 2340 } 2341 2342 return SDValue(); 2343 } 2344 2345 SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { 2346 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHS); 2347 if (Res.getNode()) return Res; 2348 2349 EVT VT = N->getValueType(0); 2350 SDLoc DL(N); 2351 2352 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2353 // plus a shift. 2354 if (VT.isSimple() && !VT.isVector()) { 2355 MVT Simple = VT.getSimpleVT(); 2356 unsigned SimpleSize = Simple.getSizeInBits(); 2357 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2358 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2359 SDValue Lo = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(0)); 2360 SDValue Hi = DAG.getNode(ISD::SIGN_EXTEND, DL, NewVT, N->getOperand(1)); 2361 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2362 // Compute the high part as N1. 2363 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2364 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2365 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2366 // Compute the low part as N0. 2367 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2368 return CombineTo(N, Lo, Hi); 2369 } 2370 } 2371 2372 return SDValue(); 2373 } 2374 2375 SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { 2376 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::MUL, ISD::MULHU); 2377 if (Res.getNode()) return Res; 2378 2379 EVT VT = N->getValueType(0); 2380 SDLoc DL(N); 2381 2382 // If the type twice as wide is legal, transform the mulhu to a wider multiply 2383 // plus a shift. 2384 if (VT.isSimple() && !VT.isVector()) { 2385 MVT Simple = VT.getSimpleVT(); 2386 unsigned SimpleSize = Simple.getSizeInBits(); 2387 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), SimpleSize*2); 2388 if (TLI.isOperationLegal(ISD::MUL, NewVT)) { 2389 SDValue Lo = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(0)); 2390 SDValue Hi = DAG.getNode(ISD::ZERO_EXTEND, DL, NewVT, N->getOperand(1)); 2391 Lo = DAG.getNode(ISD::MUL, DL, NewVT, Lo, Hi); 2392 // Compute the high part as N1. 2393 Hi = DAG.getNode(ISD::SRL, DL, NewVT, Lo, 2394 DAG.getConstant(SimpleSize, getShiftAmountTy(Lo.getValueType()))); 2395 Hi = DAG.getNode(ISD::TRUNCATE, DL, VT, Hi); 2396 // Compute the low part as N0. 2397 Lo = DAG.getNode(ISD::TRUNCATE, DL, VT, Lo); 2398 return CombineTo(N, Lo, Hi); 2399 } 2400 } 2401 2402 return SDValue(); 2403 } 2404 2405 SDValue DAGCombiner::visitSMULO(SDNode *N) { 2406 // (smulo x, 2) -> (saddo x, x) 2407 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2408 if (C2->getAPIntValue() == 2) 2409 return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), 2410 N->getOperand(0), N->getOperand(0)); 2411 2412 return SDValue(); 2413 } 2414 2415 SDValue DAGCombiner::visitUMULO(SDNode *N) { 2416 // (umulo x, 2) -> (uaddo x, x) 2417 if (ConstantSDNode *C2 = dyn_cast<ConstantSDNode>(N->getOperand(1))) 2418 if (C2->getAPIntValue() == 2) 2419 return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), 2420 N->getOperand(0), N->getOperand(0)); 2421 2422 return SDValue(); 2423 } 2424 2425 SDValue DAGCombiner::visitSDIVREM(SDNode *N) { 2426 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::SDIV, ISD::SREM); 2427 if (Res.getNode()) return Res; 2428 2429 return SDValue(); 2430 } 2431 2432 SDValue DAGCombiner::visitUDIVREM(SDNode *N) { 2433 SDValue Res = SimplifyNodeWithTwoResults(N, ISD::UDIV, ISD::UREM); 2434 if (Res.getNode()) return Res; 2435 2436 return SDValue(); 2437 } 2438 2439 /// SimplifyBinOpWithSameOpcodeHands - If this is a binary operator with 2440 /// two operands of the same opcode, try to simplify it. 2441 SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { 2442 SDValue N0 = N->getOperand(0), N1 = N->getOperand(1); 2443 EVT VT = N0.getValueType(); 2444 assert(N0.getOpcode() == N1.getOpcode() && "Bad input!"); 2445 2446 // Bail early if none of these transforms apply. 2447 if (N0.getNode()->getNumOperands() == 0) return SDValue(); 2448 2449 // For each of OP in AND/OR/XOR: 2450 // fold (OP (zext x), (zext y)) -> (zext (OP x, y)) 2451 // fold (OP (sext x), (sext y)) -> (sext (OP x, y)) 2452 // fold (OP (aext x), (aext y)) -> (aext (OP x, y)) 2453 // fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free) 2454 // 2455 // do not sink logical op inside of a vector extend, since it may combine 2456 // into a vsetcc. 2457 EVT Op0VT = N0.getOperand(0).getValueType(); 2458 if ((N0.getOpcode() == ISD::ZERO_EXTEND || 2459 N0.getOpcode() == ISD::SIGN_EXTEND || 2460 // Avoid infinite looping with PromoteIntBinOp. 2461 (N0.getOpcode() == ISD::ANY_EXTEND && 2462 (!LegalTypes || TLI.isTypeDesirableForOp(N->getOpcode(), Op0VT))) || 2463 (N0.getOpcode() == ISD::TRUNCATE && 2464 (!TLI.isZExtFree(VT, Op0VT) || 2465 !TLI.isTruncateFree(Op0VT, VT)) && 2466 TLI.isTypeLegal(Op0VT))) && 2467 !VT.isVector() && 2468 Op0VT == N1.getOperand(0).getValueType() && 2469 (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { 2470 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2471 N0.getOperand(0).getValueType(), 2472 N0.getOperand(0), N1.getOperand(0)); 2473 AddToWorkList(ORNode.getNode()); 2474 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); 2475 } 2476 2477 // For each of OP in SHL/SRL/SRA/AND... 2478 // fold (and (OP x, z), (OP y, z)) -> (OP (and x, y), z) 2479 // fold (or (OP x, z), (OP y, z)) -> (OP (or x, y), z) 2480 // fold (xor (OP x, z), (OP y, z)) -> (OP (xor x, y), z) 2481 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || 2482 N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && 2483 N0.getOperand(1) == N1.getOperand(1)) { 2484 SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), 2485 N0.getOperand(0).getValueType(), 2486 N0.getOperand(0), N1.getOperand(0)); 2487 AddToWorkList(ORNode.getNode()); 2488 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 2489 ORNode, N0.getOperand(1)); 2490 } 2491 2492 // Simplify xor/and/or (bitcast(A), bitcast(B)) -> bitcast(op (A,B)) 2493 // Only perform this optimization after type legalization and before 2494 // LegalizeVectorOprs. LegalizeVectorOprs promotes vector operations by 2495 // adding bitcasts. For example (xor v4i32) is promoted to (v2i64), and 2496 // we don't want to undo this promotion. 2497 // We also handle SCALAR_TO_VECTOR because xor/or/and operations are cheaper 2498 // on scalars. 2499 if ((N0.getOpcode() == ISD::BITCAST || 2500 N0.getOpcode() == ISD::SCALAR_TO_VECTOR) && 2501 Level == AfterLegalizeTypes) { 2502 SDValue In0 = N0.getOperand(0); 2503 SDValue In1 = N1.getOperand(0); 2504 EVT In0Ty = In0.getValueType(); 2505 EVT In1Ty = In1.getValueType(); 2506 SDLoc DL(N); 2507 // If both incoming values are integers, and the original types are the 2508 // same. 2509 if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { 2510 SDValue Op = DAG.getNode(N->getOpcode(), DL, In0Ty, In0, In1); 2511 SDValue BC = DAG.getNode(N0.getOpcode(), DL, VT, Op); 2512 AddToWorkList(Op.getNode()); 2513 return BC; 2514 } 2515 } 2516 2517 // Xor/and/or are indifferent to the swizzle operation (shuffle of one value). 2518 // Simplify xor/and/or (shuff(A), shuff(B)) -> shuff(op (A,B)) 2519 // If both shuffles use the same mask, and both shuffle within a single 2520 // vector, then it is worthwhile to move the swizzle after the operation. 2521 // The type-legalizer generates this pattern when loading illegal 2522 // vector types from memory. In many cases this allows additional shuffle 2523 // optimizations. 2524 // There are other cases where moving the shuffle after the xor/and/or 2525 // is profitable even if shuffles don't perform a swizzle. 2526 // If both shuffles use the same mask, and both shuffles have the same first 2527 // or second operand, then it might still be profitable to move the shuffle 2528 // after the xor/and/or operation. 2529 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG) { 2530 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(N0); 2531 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(N1); 2532 2533 assert(N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() && 2534 "Inputs to shuffles are not the same type"); 2535 2536 // Check that both shuffles use the same mask. The masks are known to be of 2537 // the same length because the result vector type is the same. 2538 // Check also that shuffles have only one use to avoid introducing extra 2539 // instructions. 2540 if (SVN0->hasOneUse() && SVN1->hasOneUse() && 2541 SVN0->getMask().equals(SVN1->getMask())) { 2542 SDValue ShOp = N0->getOperand(1); 2543 2544 // Don't try to fold this node if it requires introducing a 2545 // build vector of all zeros that might be illegal at this stage. 2546 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2547 if (!LegalTypes) 2548 ShOp = DAG.getConstant(0, VT); 2549 else 2550 ShOp = SDValue(); 2551 } 2552 2553 // (AND (shuf (A, C), shuf (B, C)) -> shuf (AND (A, B), C) 2554 // (OR (shuf (A, C), shuf (B, C)) -> shuf (OR (A, B), C) 2555 // (XOR (shuf (A, C), shuf (B, C)) -> shuf (XOR (A, B), V_0) 2556 if (N0.getOperand(1) == N1.getOperand(1) && ShOp.getNode()) { 2557 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2558 N0->getOperand(0), N1->getOperand(0)); 2559 AddToWorkList(NewNode.getNode()); 2560 return DAG.getVectorShuffle(VT, SDLoc(N), NewNode, ShOp, 2561 &SVN0->getMask()[0]); 2562 } 2563 2564 // Don't try to fold this node if it requires introducing a 2565 // build vector of all zeros that might be illegal at this stage. 2566 ShOp = N0->getOperand(0); 2567 if (N->getOpcode() == ISD::XOR && ShOp.getOpcode() != ISD::UNDEF) { 2568 if (!LegalTypes) 2569 ShOp = DAG.getConstant(0, VT); 2570 else 2571 ShOp = SDValue(); 2572 } 2573 2574 // (AND (shuf (C, A), shuf (C, B)) -> shuf (C, AND (A, B)) 2575 // (OR (shuf (C, A), shuf (C, B)) -> shuf (C, OR (A, B)) 2576 // (XOR (shuf (C, A), shuf (C, B)) -> shuf (V_0, XOR (A, B)) 2577 if (N0->getOperand(0) == N1->getOperand(0) && ShOp.getNode()) { 2578 SDValue NewNode = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 2579 N0->getOperand(1), N1->getOperand(1)); 2580 AddToWorkList(NewNode.getNode()); 2581 return DAG.getVectorShuffle(VT, SDLoc(N), ShOp, NewNode, 2582 &SVN0->getMask()[0]); 2583 } 2584 } 2585 } 2586 2587 return SDValue(); 2588 } 2589 2590 SDValue DAGCombiner::visitAND(SDNode *N) { 2591 SDValue N0 = N->getOperand(0); 2592 SDValue N1 = N->getOperand(1); 2593 SDValue LL, LR, RL, RR, CC0, CC1; 2594 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 2595 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 2596 EVT VT = N1.getValueType(); 2597 unsigned BitWidth = VT.getScalarType().getSizeInBits(); 2598 2599 // fold vector ops 2600 if (VT.isVector()) { 2601 SDValue FoldedVOp = SimplifyVBinOp(N); 2602 if (FoldedVOp.getNode()) return FoldedVOp; 2603 2604 // fold (and x, 0) -> 0, vector edition 2605 if (ISD::isBuildVectorAllZeros(N0.getNode())) 2606 return N0; 2607 if (ISD::isBuildVectorAllZeros(N1.getNode())) 2608 return N1; 2609 2610 // fold (and x, -1) -> x, vector edition 2611 if (ISD::isBuildVectorAllOnes(N0.getNode())) 2612 return N1; 2613 if (ISD::isBuildVectorAllOnes(N1.getNode())) 2614 return N0; 2615 } 2616 2617 // fold (and x, undef) -> 0 2618 if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) 2619 return DAG.getConstant(0, VT); 2620 // fold (and c1, c2) -> c1&c2 2621 if (N0C && N1C) 2622 return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); 2623 // canonicalize constant to RHS 2624 if (N0C && !N1C) 2625 return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); 2626 // fold (and x, -1) -> x 2627 if (N1C && N1C->isAllOnesValue()) 2628 return N0; 2629 // if (and x, c) is known to be zero, return 0 2630 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 2631 APInt::getAllOnesValue(BitWidth))) 2632 return DAG.getConstant(0, VT); 2633 // reassociate and 2634 SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); 2635 if (RAND.getNode()) 2636 return RAND; 2637 // fold (and (or x, C), D) -> D if (C & D) == D 2638 if (N1C && N0.getOpcode() == ISD::OR) 2639 if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 2640 if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue()) 2641 return N1; 2642 // fold (and (any_ext V), c) -> (zero_ext V) if 'and' only clears top bits. 2643 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 2644 SDValue N0Op0 = N0.getOperand(0); 2645 APInt Mask = ~N1C->getAPIntValue(); 2646 Mask = Mask.trunc(N0Op0.getValueSizeInBits()); 2647 if (DAG.MaskedValueIsZero(N0Op0, Mask)) { 2648 SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), 2649 N0.getValueType(), N0Op0); 2650 2651 // Replace uses of the AND with uses of the Zero extend node. 2652 CombineTo(N, Zext); 2653 2654 // We actually want to replace all uses of the any_extend with the 2655 // zero_extend, to avoid duplicating things. This will later cause this 2656 // AND to be folded. 2657 CombineTo(N0.getNode(), Zext); 2658 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2659 } 2660 } 2661 // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> 2662 // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must 2663 // already be zero by virtue of the width of the base type of the load. 2664 // 2665 // the 'X' node here can either be nothing or an extract_vector_elt to catch 2666 // more cases. 2667 if ((N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 2668 N0.getOperand(0).getOpcode() == ISD::LOAD) || 2669 N0.getOpcode() == ISD::LOAD) { 2670 LoadSDNode *Load = cast<LoadSDNode>( (N0.getOpcode() == ISD::LOAD) ? 2671 N0 : N0.getOperand(0) ); 2672 2673 // Get the constant (if applicable) the zero'th operand is being ANDed with. 2674 // This can be a pure constant or a vector splat, in which case we treat the 2675 // vector as a scalar and use the splat value. 2676 APInt Constant = APInt::getNullValue(1); 2677 if (const ConstantSDNode *C = dyn_cast<ConstantSDNode>(N1)) { 2678 Constant = C->getAPIntValue(); 2679 } else if (BuildVectorSDNode *Vector = dyn_cast<BuildVectorSDNode>(N1)) { 2680 APInt SplatValue, SplatUndef; 2681 unsigned SplatBitSize; 2682 bool HasAnyUndefs; 2683 bool IsSplat = Vector->isConstantSplat(SplatValue, SplatUndef, 2684 SplatBitSize, HasAnyUndefs); 2685 if (IsSplat) { 2686 // Undef bits can contribute to a possible optimisation if set, so 2687 // set them. 2688 SplatValue |= SplatUndef; 2689 2690 // The splat value may be something like "0x00FFFFFF", which means 0 for 2691 // the first vector value and FF for the rest, repeating. We need a mask 2692 // that will apply equally to all members of the vector, so AND all the 2693 // lanes of the constant together. 2694 EVT VT = Vector->getValueType(0); 2695 unsigned BitWidth = VT.getVectorElementType().getSizeInBits(); 2696 2697 // If the splat value has been compressed to a bitlength lower 2698 // than the size of the vector lane, we need to re-expand it to 2699 // the lane size. 2700 if (BitWidth > SplatBitSize) 2701 for (SplatValue = SplatValue.zextOrTrunc(BitWidth); 2702 SplatBitSize < BitWidth; 2703 SplatBitSize = SplatBitSize * 2) 2704 SplatValue |= SplatValue.shl(SplatBitSize); 2705 2706 Constant = APInt::getAllOnesValue(BitWidth); 2707 for (unsigned i = 0, n = SplatBitSize/BitWidth; i < n; ++i) 2708 Constant &= SplatValue.lshr(i*BitWidth).zextOrTrunc(BitWidth); 2709 } 2710 } 2711 2712 // If we want to change an EXTLOAD to a ZEXTLOAD, ensure a ZEXTLOAD is 2713 // actually legal and isn't going to get expanded, else this is a false 2714 // optimisation. 2715 bool CanZextLoadProfitably = TLI.isLoadExtLegal(ISD::ZEXTLOAD, 2716 Load->getMemoryVT()); 2717 2718 // Resize the constant to the same size as the original memory access before 2719 // extension. If it is still the AllOnesValue then this AND is completely 2720 // unneeded. 2721 Constant = 2722 Constant.zextOrTrunc(Load->getMemoryVT().getScalarType().getSizeInBits()); 2723 2724 bool B; 2725 switch (Load->getExtensionType()) { 2726 default: B = false; break; 2727 case ISD::EXTLOAD: B = CanZextLoadProfitably; break; 2728 case ISD::ZEXTLOAD: 2729 case ISD::NON_EXTLOAD: B = true; break; 2730 } 2731 2732 if (B && Constant.isAllOnesValue()) { 2733 // If the load type was an EXTLOAD, convert to ZEXTLOAD in order to 2734 // preserve semantics once we get rid of the AND. 2735 SDValue NewLoad(Load, 0); 2736 if (Load->getExtensionType() == ISD::EXTLOAD) { 2737 NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, 2738 Load->getValueType(0), SDLoc(Load), 2739 Load->getChain(), Load->getBasePtr(), 2740 Load->getOffset(), Load->getMemoryVT(), 2741 Load->getMemOperand()); 2742 // Replace uses of the EXTLOAD with the new ZEXTLOAD. 2743 if (Load->getNumValues() == 3) { 2744 // PRE/POST_INC loads have 3 values. 2745 SDValue To[] = { NewLoad.getValue(0), NewLoad.getValue(1), 2746 NewLoad.getValue(2) }; 2747 CombineTo(Load, To, 3, true); 2748 } else { 2749 CombineTo(Load, NewLoad.getValue(0), NewLoad.getValue(1)); 2750 } 2751 } 2752 2753 // Fold the AND away, taking care not to fold to the old load node if we 2754 // replaced it. 2755 CombineTo(N, (N0.getNode() == Load) ? NewLoad : N0); 2756 2757 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2758 } 2759 } 2760 // fold (and (setcc x), (setcc y)) -> (setcc (and x, y)) 2761 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 2762 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 2763 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 2764 2765 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 2766 LL.getValueType().isInteger()) { 2767 // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) 2768 if (cast<ConstantSDNode>(LR)->isNullValue() && Op1 == ISD::SETEQ) { 2769 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2770 LR.getValueType(), LL, RL); 2771 AddToWorkList(ORNode.getNode()); 2772 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2773 } 2774 // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) 2775 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { 2776 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), 2777 LR.getValueType(), LL, RL); 2778 AddToWorkList(ANDNode.getNode()); 2779 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 2780 } 2781 // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) 2782 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { 2783 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), 2784 LR.getValueType(), LL, RL); 2785 AddToWorkList(ORNode.getNode()); 2786 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 2787 } 2788 } 2789 // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) 2790 if (LL == RL && isa<ConstantSDNode>(LR) && isa<ConstantSDNode>(RR) && 2791 Op0 == Op1 && LL.getValueType().isInteger() && 2792 Op0 == ISD::SETNE && ((cast<ConstantSDNode>(LR)->isNullValue() && 2793 cast<ConstantSDNode>(RR)->isAllOnesValue()) || 2794 (cast<ConstantSDNode>(LR)->isAllOnesValue() && 2795 cast<ConstantSDNode>(RR)->isNullValue()))) { 2796 SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), 2797 LL, DAG.getConstant(1, LL.getValueType())); 2798 AddToWorkList(ADDNode.getNode()); 2799 return DAG.getSetCC(SDLoc(N), VT, ADDNode, 2800 DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); 2801 } 2802 // canonicalize equivalent to ll == rl 2803 if (LL == RR && LR == RL) { 2804 Op1 = ISD::getSetCCSwappedOperands(Op1); 2805 std::swap(RL, RR); 2806 } 2807 if (LL == RL && LR == RR) { 2808 bool isInteger = LL.getValueType().isInteger(); 2809 ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); 2810 if (Result != ISD::SETCC_INVALID && 2811 (!LegalOperations || 2812 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 2813 TLI.isOperationLegal(ISD::SETCC, 2814 getSetCCResultType(N0.getSimpleValueType()))))) 2815 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 2816 LL, LR, Result); 2817 } 2818 } 2819 2820 // Simplify: (and (op x...), (op y...)) -> (op (and x, y)) 2821 if (N0.getOpcode() == N1.getOpcode()) { 2822 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 2823 if (Tmp.getNode()) return Tmp; 2824 } 2825 2826 // fold (and (sign_extend_inreg x, i16 to i32), 1) -> (and x, 1) 2827 // fold (and (sra)) -> (and (srl)) when possible. 2828 if (!VT.isVector() && 2829 SimplifyDemandedBits(SDValue(N, 0))) 2830 return SDValue(N, 0); 2831 2832 // fold (zext_inreg (extload x)) -> (zextload x) 2833 if (ISD::isEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode())) { 2834 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2835 EVT MemVT = LN0->getMemoryVT(); 2836 // If we zero all the possible extended bits, then we can turn this into 2837 // a zextload if we are running before legalize or the operation is legal. 2838 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2839 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2840 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2841 ((!LegalOperations && !LN0->isVolatile()) || 2842 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2843 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2844 LN0->getChain(), LN0->getBasePtr(), 2845 MemVT, LN0->getMemOperand()); 2846 AddToWorkList(N); 2847 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2848 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2849 } 2850 } 2851 // fold (zext_inreg (sextload x)) -> (zextload x) iff load has one use 2852 if (ISD::isSEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 2853 N0.hasOneUse()) { 2854 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 2855 EVT MemVT = LN0->getMemoryVT(); 2856 // If we zero all the possible extended bits, then we can turn this into 2857 // a zextload if we are running before legalize or the operation is legal. 2858 unsigned BitWidth = N1.getValueType().getScalarType().getSizeInBits(); 2859 if (DAG.MaskedValueIsZero(N1, APInt::getHighBitsSet(BitWidth, 2860 BitWidth - MemVT.getScalarType().getSizeInBits())) && 2861 ((!LegalOperations && !LN0->isVolatile()) || 2862 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { 2863 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, 2864 LN0->getChain(), LN0->getBasePtr(), 2865 MemVT, LN0->getMemOperand()); 2866 AddToWorkList(N); 2867 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 2868 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2869 } 2870 } 2871 2872 // fold (and (load x), 255) -> (zextload x, i8) 2873 // fold (and (extload x, i16), 255) -> (zextload x, i8) 2874 // fold (and (any_ext (extload x, i16)), 255) -> (zextload x, i8) 2875 if (N1C && (N0.getOpcode() == ISD::LOAD || 2876 (N0.getOpcode() == ISD::ANY_EXTEND && 2877 N0.getOperand(0).getOpcode() == ISD::LOAD))) { 2878 bool HasAnyExt = N0.getOpcode() == ISD::ANY_EXTEND; 2879 LoadSDNode *LN0 = HasAnyExt 2880 ? cast<LoadSDNode>(N0.getOperand(0)) 2881 : cast<LoadSDNode>(N0); 2882 if (LN0->getExtensionType() != ISD::SEXTLOAD && 2883 LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { 2884 uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); 2885 if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ 2886 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); 2887 EVT LoadedVT = LN0->getMemoryVT(); 2888 2889 if (ExtVT == LoadedVT && 2890 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2891 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2892 2893 SDValue NewLoad = 2894 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2895 LN0->getChain(), LN0->getBasePtr(), ExtVT, 2896 LN0->getMemOperand()); 2897 AddToWorkList(N); 2898 CombineTo(LN0, NewLoad, NewLoad.getValue(1)); 2899 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2900 } 2901 2902 // Do not change the width of a volatile load. 2903 // Do not generate loads of non-round integer types since these can 2904 // be expensive (and would be wrong if the type is not byte sized). 2905 if (!LN0->isVolatile() && LoadedVT.bitsGT(ExtVT) && ExtVT.isRound() && 2906 (!LegalOperations || TLI.isLoadExtLegal(ISD::ZEXTLOAD, ExtVT))) { 2907 EVT PtrType = LN0->getOperand(1).getValueType(); 2908 2909 unsigned Alignment = LN0->getAlignment(); 2910 SDValue NewPtr = LN0->getBasePtr(); 2911 2912 // For big endian targets, we need to add an offset to the pointer 2913 // to load the correct bytes. For little endian systems, we merely 2914 // need to read fewer bytes from the same pointer. 2915 if (TLI.isBigEndian()) { 2916 unsigned LVTStoreBytes = LoadedVT.getStoreSize(); 2917 unsigned EVTStoreBytes = ExtVT.getStoreSize(); 2918 unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; 2919 NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, 2920 NewPtr, DAG.getConstant(PtrOff, PtrType)); 2921 Alignment = MinAlign(Alignment, PtrOff); 2922 } 2923 2924 AddToWorkList(NewPtr.getNode()); 2925 2926 EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; 2927 SDValue Load = 2928 DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, 2929 LN0->getChain(), NewPtr, 2930 LN0->getPointerInfo(), 2931 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 2932 Alignment, LN0->getTBAAInfo()); 2933 AddToWorkList(N); 2934 CombineTo(LN0, Load, Load.getValue(1)); 2935 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2936 } 2937 } 2938 } 2939 } 2940 2941 if (N0.getOpcode() == ISD::ADD && N1.getOpcode() == ISD::SRL && 2942 VT.getSizeInBits() <= 64) { 2943 if (ConstantSDNode *ADDI = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 2944 APInt ADDC = ADDI->getAPIntValue(); 2945 if (!TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2946 // Look for (and (add x, c1), (lshr y, c2)). If C1 wasn't a legal 2947 // immediate for an add, but it is legal if its top c2 bits are set, 2948 // transform the ADD so the immediate doesn't need to be materialized 2949 // in a register. 2950 if (ConstantSDNode *SRLI = dyn_cast<ConstantSDNode>(N1.getOperand(1))) { 2951 APInt Mask = APInt::getHighBitsSet(VT.getSizeInBits(), 2952 SRLI->getZExtValue()); 2953 if (DAG.MaskedValueIsZero(N0.getOperand(1), Mask)) { 2954 ADDC |= Mask; 2955 if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { 2956 SDValue NewAdd = 2957 DAG.getNode(ISD::ADD, SDLoc(N0), VT, 2958 N0.getOperand(0), DAG.getConstant(ADDC, VT)); 2959 CombineTo(N0.getNode(), NewAdd); 2960 return SDValue(N, 0); // Return N so it doesn't get rechecked! 2961 } 2962 } 2963 } 2964 } 2965 } 2966 } 2967 2968 // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) 2969 if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { 2970 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 2971 N0.getOperand(1), false); 2972 if (BSwap.getNode()) 2973 return BSwap; 2974 } 2975 2976 return SDValue(); 2977 } 2978 2979 /// MatchBSwapHWord - Match (a >> 8) | (a << 8) as (bswap a) >> 16 2980 /// 2981 SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, 2982 bool DemandHighBits) { 2983 if (!LegalOperations) 2984 return SDValue(); 2985 2986 EVT VT = N->getValueType(0); 2987 if (VT != MVT::i64 && VT != MVT::i32 && VT != MVT::i16) 2988 return SDValue(); 2989 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 2990 return SDValue(); 2991 2992 // Recognize (and (shl a, 8), 0xff), (and (srl a, 8), 0xff00) 2993 bool LookPassAnd0 = false; 2994 bool LookPassAnd1 = false; 2995 if (N0.getOpcode() == ISD::AND && N0.getOperand(0).getOpcode() == ISD::SRL) 2996 std::swap(N0, N1); 2997 if (N1.getOpcode() == ISD::AND && N1.getOperand(0).getOpcode() == ISD::SHL) 2998 std::swap(N0, N1); 2999 if (N0.getOpcode() == ISD::AND) { 3000 if (!N0.getNode()->hasOneUse()) 3001 return SDValue(); 3002 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3003 if (!N01C || N01C->getZExtValue() != 0xFF00) 3004 return SDValue(); 3005 N0 = N0.getOperand(0); 3006 LookPassAnd0 = true; 3007 } 3008 3009 if (N1.getOpcode() == ISD::AND) { 3010 if (!N1.getNode()->hasOneUse()) 3011 return SDValue(); 3012 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3013 if (!N11C || N11C->getZExtValue() != 0xFF) 3014 return SDValue(); 3015 N1 = N1.getOperand(0); 3016 LookPassAnd1 = true; 3017 } 3018 3019 if (N0.getOpcode() == ISD::SRL && N1.getOpcode() == ISD::SHL) 3020 std::swap(N0, N1); 3021 if (N0.getOpcode() != ISD::SHL || N1.getOpcode() != ISD::SRL) 3022 return SDValue(); 3023 if (!N0.getNode()->hasOneUse() || 3024 !N1.getNode()->hasOneUse()) 3025 return SDValue(); 3026 3027 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3028 ConstantSDNode *N11C = dyn_cast<ConstantSDNode>(N1.getOperand(1)); 3029 if (!N01C || !N11C) 3030 return SDValue(); 3031 if (N01C->getZExtValue() != 8 || N11C->getZExtValue() != 8) 3032 return SDValue(); 3033 3034 // Look for (shl (and a, 0xff), 8), (srl (and a, 0xff00), 8) 3035 SDValue N00 = N0->getOperand(0); 3036 if (!LookPassAnd0 && N00.getOpcode() == ISD::AND) { 3037 if (!N00.getNode()->hasOneUse()) 3038 return SDValue(); 3039 ConstantSDNode *N001C = dyn_cast<ConstantSDNode>(N00.getOperand(1)); 3040 if (!N001C || N001C->getZExtValue() != 0xFF) 3041 return SDValue(); 3042 N00 = N00.getOperand(0); 3043 LookPassAnd0 = true; 3044 } 3045 3046 SDValue N10 = N1->getOperand(0); 3047 if (!LookPassAnd1 && N10.getOpcode() == ISD::AND) { 3048 if (!N10.getNode()->hasOneUse()) 3049 return SDValue(); 3050 ConstantSDNode *N101C = dyn_cast<ConstantSDNode>(N10.getOperand(1)); 3051 if (!N101C || N101C->getZExtValue() != 0xFF00) 3052 return SDValue(); 3053 N10 = N10.getOperand(0); 3054 LookPassAnd1 = true; 3055 } 3056 3057 if (N00 != N10) 3058 return SDValue(); 3059 3060 // Make sure everything beyond the low halfword gets set to zero since the SRL 3061 // 16 will clear the top bits. 3062 unsigned OpSizeInBits = VT.getSizeInBits(); 3063 if (DemandHighBits && OpSizeInBits > 16) { 3064 // If the left-shift isn't masked out then the only way this is a bswap is 3065 // if all bits beyond the low 8 are 0. In that case the entire pattern 3066 // reduces to a left shift anyway: leave it for other parts of the combiner. 3067 if (!LookPassAnd0) 3068 return SDValue(); 3069 3070 // However, if the right shift isn't masked out then it might be because 3071 // it's not needed. See if we can spot that too. 3072 if (!LookPassAnd1 && 3073 !DAG.MaskedValueIsZero( 3074 N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) 3075 return SDValue(); 3076 } 3077 3078 SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); 3079 if (OpSizeInBits > 16) 3080 Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, 3081 DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); 3082 return Res; 3083 } 3084 3085 /// isBSwapHWordElement - Return true if the specified node is an element 3086 /// that makes up a 32-bit packed halfword byteswap. i.e. 3087 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 3088 static bool isBSwapHWordElement(SDValue N, SmallVectorImpl<SDNode *> &Parts) { 3089 if (!N.getNode()->hasOneUse()) 3090 return false; 3091 3092 unsigned Opc = N.getOpcode(); 3093 if (Opc != ISD::AND && Opc != ISD::SHL && Opc != ISD::SRL) 3094 return false; 3095 3096 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3097 if (!N1C) 3098 return false; 3099 3100 unsigned Num; 3101 switch (N1C->getZExtValue()) { 3102 default: 3103 return false; 3104 case 0xFF: Num = 0; break; 3105 case 0xFF00: Num = 1; break; 3106 case 0xFF0000: Num = 2; break; 3107 case 0xFF000000: Num = 3; break; 3108 } 3109 3110 // Look for (x & 0xff) << 8 as well as ((x << 8) & 0xff00). 3111 SDValue N0 = N.getOperand(0); 3112 if (Opc == ISD::AND) { 3113 if (Num == 0 || Num == 2) { 3114 // (x >> 8) & 0xff 3115 // (x >> 8) & 0xff0000 3116 if (N0.getOpcode() != ISD::SRL) 3117 return false; 3118 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3119 if (!C || C->getZExtValue() != 8) 3120 return false; 3121 } else { 3122 // (x << 8) & 0xff00 3123 // (x << 8) & 0xff000000 3124 if (N0.getOpcode() != ISD::SHL) 3125 return false; 3126 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3127 if (!C || C->getZExtValue() != 8) 3128 return false; 3129 } 3130 } else if (Opc == ISD::SHL) { 3131 // (x & 0xff) << 8 3132 // (x & 0xff0000) << 8 3133 if (Num != 0 && Num != 2) 3134 return false; 3135 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3136 if (!C || C->getZExtValue() != 8) 3137 return false; 3138 } else { // Opc == ISD::SRL 3139 // (x & 0xff00) >> 8 3140 // (x & 0xff000000) >> 8 3141 if (Num != 1 && Num != 3) 3142 return false; 3143 ConstantSDNode *C = dyn_cast<ConstantSDNode>(N.getOperand(1)); 3144 if (!C || C->getZExtValue() != 8) 3145 return false; 3146 } 3147 3148 if (Parts[Num]) 3149 return false; 3150 3151 Parts[Num] = N0.getOperand(0).getNode(); 3152 return true; 3153 } 3154 3155 /// MatchBSwapHWord - Match a 32-bit packed halfword bswap. That is 3156 /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) 3157 /// => (rotl (bswap x), 16) 3158 SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { 3159 if (!LegalOperations) 3160 return SDValue(); 3161 3162 EVT VT = N->getValueType(0); 3163 if (VT != MVT::i32) 3164 return SDValue(); 3165 if (!TLI.isOperationLegal(ISD::BSWAP, VT)) 3166 return SDValue(); 3167 3168 SmallVector<SDNode*,4> Parts(4, (SDNode*)nullptr); 3169 // Look for either 3170 // (or (or (and), (and)), (or (and), (and))) 3171 // (or (or (or (and), (and)), (and)), (and)) 3172 if (N0.getOpcode() != ISD::OR) 3173 return SDValue(); 3174 SDValue N00 = N0.getOperand(0); 3175 SDValue N01 = N0.getOperand(1); 3176 3177 if (N1.getOpcode() == ISD::OR && 3178 N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { 3179 // (or (or (and), (and)), (or (and), (and))) 3180 SDValue N000 = N00.getOperand(0); 3181 if (!isBSwapHWordElement(N000, Parts)) 3182 return SDValue(); 3183 3184 SDValue N001 = N00.getOperand(1); 3185 if (!isBSwapHWordElement(N001, Parts)) 3186 return SDValue(); 3187 SDValue N010 = N01.getOperand(0); 3188 if (!isBSwapHWordElement(N010, Parts)) 3189 return SDValue(); 3190 SDValue N011 = N01.getOperand(1); 3191 if (!isBSwapHWordElement(N011, Parts)) 3192 return SDValue(); 3193 } else { 3194 // (or (or (or (and), (and)), (and)), (and)) 3195 if (!isBSwapHWordElement(N1, Parts)) 3196 return SDValue(); 3197 if (!isBSwapHWordElement(N01, Parts)) 3198 return SDValue(); 3199 if (N00.getOpcode() != ISD::OR) 3200 return SDValue(); 3201 SDValue N000 = N00.getOperand(0); 3202 if (!isBSwapHWordElement(N000, Parts)) 3203 return SDValue(); 3204 SDValue N001 = N00.getOperand(1); 3205 if (!isBSwapHWordElement(N001, Parts)) 3206 return SDValue(); 3207 } 3208 3209 // Make sure the parts are all coming from the same node. 3210 if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) 3211 return SDValue(); 3212 3213 SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, 3214 SDValue(Parts[0],0)); 3215 3216 // Result of the bswap should be rotated by 16. If it's not legal, then 3217 // do (x << 16) | (x >> 16). 3218 SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); 3219 if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) 3220 return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); 3221 if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) 3222 return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); 3223 return DAG.getNode(ISD::OR, SDLoc(N), VT, 3224 DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), 3225 DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); 3226 } 3227 3228 SDValue DAGCombiner::visitOR(SDNode *N) { 3229 SDValue N0 = N->getOperand(0); 3230 SDValue N1 = N->getOperand(1); 3231 SDValue LL, LR, RL, RR, CC0, CC1; 3232 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3233 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3234 EVT VT = N1.getValueType(); 3235 3236 // fold vector ops 3237 if (VT.isVector()) { 3238 SDValue FoldedVOp = SimplifyVBinOp(N); 3239 if (FoldedVOp.getNode()) return FoldedVOp; 3240 3241 // fold (or x, 0) -> x, vector edition 3242 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3243 return N1; 3244 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3245 return N0; 3246 3247 // fold (or x, -1) -> -1, vector edition 3248 if (ISD::isBuildVectorAllOnes(N0.getNode())) 3249 return N0; 3250 if (ISD::isBuildVectorAllOnes(N1.getNode())) 3251 return N1; 3252 3253 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf A, B, Mask1) 3254 // fold (or (shuf A, V_0, MA), (shuf B, V_0, MB)) -> (shuf B, A, Mask2) 3255 // Do this only if the resulting shuffle is legal. 3256 if (isa<ShuffleVectorSDNode>(N0) && 3257 isa<ShuffleVectorSDNode>(N1) && 3258 N0->getOperand(1) == N1->getOperand(1) && 3259 ISD::isBuildVectorAllZeros(N0.getOperand(1).getNode())) { 3260 bool CanFold = true; 3261 unsigned NumElts = VT.getVectorNumElements(); 3262 const ShuffleVectorSDNode *SV0 = cast<ShuffleVectorSDNode>(N0); 3263 const ShuffleVectorSDNode *SV1 = cast<ShuffleVectorSDNode>(N1); 3264 // We construct two shuffle masks: 3265 // - Mask1 is a shuffle mask for a shuffle with N0 as the first operand 3266 // and N1 as the second operand. 3267 // - Mask2 is a shuffle mask for a shuffle with N1 as the first operand 3268 // and N0 as the second operand. 3269 // We do this because OR is commutable and therefore there might be 3270 // two ways to fold this node into a shuffle. 3271 SmallVector<int,4> Mask1; 3272 SmallVector<int,4> Mask2; 3273 3274 for (unsigned i = 0; i != NumElts && CanFold; ++i) { 3275 int M0 = SV0->getMaskElt(i); 3276 int M1 = SV1->getMaskElt(i); 3277 3278 // Both shuffle indexes are undef. Propagate Undef. 3279 if (M0 < 0 && M1 < 0) { 3280 Mask1.push_back(M0); 3281 Mask2.push_back(M0); 3282 continue; 3283 } 3284 3285 if (M0 < 0 || M1 < 0 || 3286 (M0 < (int)NumElts && M1 < (int)NumElts) || 3287 (M0 >= (int)NumElts && M1 >= (int)NumElts)) { 3288 CanFold = false; 3289 break; 3290 } 3291 3292 Mask1.push_back(M0 < (int)NumElts ? M0 : M1 + NumElts); 3293 Mask2.push_back(M1 < (int)NumElts ? M1 : M0 + NumElts); 3294 } 3295 3296 if (CanFold) { 3297 // Fold this sequence only if the resulting shuffle is 'legal'. 3298 if (TLI.isShuffleMaskLegal(Mask1, VT)) 3299 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), 3300 N1->getOperand(0), &Mask1[0]); 3301 if (TLI.isShuffleMaskLegal(Mask2, VT)) 3302 return DAG.getVectorShuffle(VT, SDLoc(N), N1->getOperand(0), 3303 N0->getOperand(0), &Mask2[0]); 3304 } 3305 } 3306 } 3307 3308 // fold (or x, undef) -> -1 3309 if (!LegalOperations && 3310 (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)) { 3311 EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT; 3312 return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); 3313 } 3314 // fold (or c1, c2) -> c1|c2 3315 if (N0C && N1C) 3316 return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); 3317 // canonicalize constant to RHS 3318 if (N0C && !N1C) 3319 return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); 3320 // fold (or x, 0) -> x 3321 if (N1C && N1C->isNullValue()) 3322 return N0; 3323 // fold (or x, -1) -> -1 3324 if (N1C && N1C->isAllOnesValue()) 3325 return N1; 3326 // fold (or x, c) -> c iff (x & ~c) == 0 3327 if (N1C && DAG.MaskedValueIsZero(N0, ~N1C->getAPIntValue())) 3328 return N1; 3329 3330 // Recognize halfword bswaps as (bswap + rotl 16) or (bswap + shl 16) 3331 SDValue BSwap = MatchBSwapHWord(N, N0, N1); 3332 if (BSwap.getNode()) 3333 return BSwap; 3334 BSwap = MatchBSwapHWordLow(N, N0, N1); 3335 if (BSwap.getNode()) 3336 return BSwap; 3337 3338 // reassociate or 3339 SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); 3340 if (ROR.getNode()) 3341 return ROR; 3342 // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) 3343 // iff (c1 & c2) == 0. 3344 if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3345 isa<ConstantSDNode>(N0.getOperand(1))) { 3346 ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1)); 3347 if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) { 3348 SDValue COR = DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1); 3349 if (!COR.getNode()) 3350 return SDValue(); 3351 return DAG.getNode(ISD::AND, SDLoc(N), VT, 3352 DAG.getNode(ISD::OR, SDLoc(N0), VT, 3353 N0.getOperand(0), N1), COR); 3354 } 3355 } 3356 // fold (or (setcc x), (setcc y)) -> (setcc (or x, y)) 3357 if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){ 3358 ISD::CondCode Op0 = cast<CondCodeSDNode>(CC0)->get(); 3359 ISD::CondCode Op1 = cast<CondCodeSDNode>(CC1)->get(); 3360 3361 if (LR == RR && isa<ConstantSDNode>(LR) && Op0 == Op1 && 3362 LL.getValueType().isInteger()) { 3363 // fold (or (setne X, 0), (setne Y, 0)) -> (setne (or X, Y), 0) 3364 // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) 3365 if (cast<ConstantSDNode>(LR)->isNullValue() && 3366 (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { 3367 SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), 3368 LR.getValueType(), LL, RL); 3369 AddToWorkList(ORNode.getNode()); 3370 return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); 3371 } 3372 // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) 3373 // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) 3374 if (cast<ConstantSDNode>(LR)->isAllOnesValue() && 3375 (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { 3376 SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), 3377 LR.getValueType(), LL, RL); 3378 AddToWorkList(ANDNode.getNode()); 3379 return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); 3380 } 3381 } 3382 // canonicalize equivalent to ll == rl 3383 if (LL == RR && LR == RL) { 3384 Op1 = ISD::getSetCCSwappedOperands(Op1); 3385 std::swap(RL, RR); 3386 } 3387 if (LL == RL && LR == RR) { 3388 bool isInteger = LL.getValueType().isInteger(); 3389 ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); 3390 if (Result != ISD::SETCC_INVALID && 3391 (!LegalOperations || 3392 (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && 3393 TLI.isOperationLegal(ISD::SETCC, 3394 getSetCCResultType(N0.getValueType()))))) 3395 return DAG.getSetCC(SDLoc(N), N0.getValueType(), 3396 LL, LR, Result); 3397 } 3398 } 3399 3400 // Simplify: (or (op x...), (op y...)) -> (op (or x, y)) 3401 if (N0.getOpcode() == N1.getOpcode()) { 3402 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3403 if (Tmp.getNode()) return Tmp; 3404 } 3405 3406 // (or (and X, C1), (and Y, C2)) -> (and (or X, Y), C3) if possible. 3407 if (N0.getOpcode() == ISD::AND && 3408 N1.getOpcode() == ISD::AND && 3409 N0.getOperand(1).getOpcode() == ISD::Constant && 3410 N1.getOperand(1).getOpcode() == ISD::Constant && 3411 // Don't increase # computations. 3412 (N0.getNode()->hasOneUse() || N1.getNode()->hasOneUse())) { 3413 // We can only do this xform if we know that bits from X that are set in C2 3414 // but not in C1 are already zero. Likewise for Y. 3415 const APInt &LHSMask = 3416 cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 3417 const APInt &RHSMask = 3418 cast<ConstantSDNode>(N1.getOperand(1))->getAPIntValue(); 3419 3420 if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && 3421 DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { 3422 SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, 3423 N0.getOperand(0), N1.getOperand(0)); 3424 return DAG.getNode(ISD::AND, SDLoc(N), VT, X, 3425 DAG.getConstant(LHSMask | RHSMask, VT)); 3426 } 3427 } 3428 3429 // See if this is some rotate idiom. 3430 if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) 3431 return SDValue(Rot, 0); 3432 3433 // Simplify the operands using demanded-bits information. 3434 if (!VT.isVector() && 3435 SimplifyDemandedBits(SDValue(N, 0))) 3436 return SDValue(N, 0); 3437 3438 return SDValue(); 3439 } 3440 3441 /// MatchRotateHalf - Match "(X shl/srl V1) & V2" where V2 may not be present. 3442 static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { 3443 if (Op.getOpcode() == ISD::AND) { 3444 if (isa<ConstantSDNode>(Op.getOperand(1))) { 3445 Mask = Op.getOperand(1); 3446 Op = Op.getOperand(0); 3447 } else { 3448 return false; 3449 } 3450 } 3451 3452 if (Op.getOpcode() == ISD::SRL || Op.getOpcode() == ISD::SHL) { 3453 Shift = Op; 3454 return true; 3455 } 3456 3457 return false; 3458 } 3459 3460 // Return true if we can prove that, whenever Neg and Pos are both in the 3461 // range [0, OpSize), Neg == (Pos == 0 ? 0 : OpSize - Pos). This means that 3462 // for two opposing shifts shift1 and shift2 and a value X with OpBits bits: 3463 // 3464 // (or (shift1 X, Neg), (shift2 X, Pos)) 3465 // 3466 // reduces to a rotate in direction shift2 by Pos or (equivalently) a rotate 3467 // in direction shift1 by Neg. The range [0, OpSize) means that we only need 3468 // to consider shift amounts with defined behavior. 3469 static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned OpSize) { 3470 // If OpSize is a power of 2 then: 3471 // 3472 // (a) (Pos == 0 ? 0 : OpSize - Pos) == (OpSize - Pos) & (OpSize - 1) 3473 // (b) Neg == Neg & (OpSize - 1) whenever Neg is in [0, OpSize). 3474 // 3475 // So if OpSize is a power of 2 and Neg is (and Neg', OpSize-1), we check 3476 // for the stronger condition: 3477 // 3478 // Neg & (OpSize - 1) == (OpSize - Pos) & (OpSize - 1) [A] 3479 // 3480 // for all Neg and Pos. Since Neg & (OpSize - 1) == Neg' & (OpSize - 1) 3481 // we can just replace Neg with Neg' for the rest of the function. 3482 // 3483 // In other cases we check for the even stronger condition: 3484 // 3485 // Neg == OpSize - Pos [B] 3486 // 3487 // for all Neg and Pos. Note that the (or ...) then invokes undefined 3488 // behavior if Pos == 0 (and consequently Neg == OpSize). 3489 // 3490 // We could actually use [A] whenever OpSize is a power of 2, but the 3491 // only extra cases that it would match are those uninteresting ones 3492 // where Neg and Pos are never in range at the same time. E.g. for 3493 // OpSize == 32, using [A] would allow a Neg of the form (sub 64, Pos) 3494 // as well as (sub 32, Pos), but: 3495 // 3496 // (or (shift1 X, (sub 64, Pos)), (shift2 X, Pos)) 3497 // 3498 // always invokes undefined behavior for 32-bit X. 3499 // 3500 // Below, Mask == OpSize - 1 when using [A] and is all-ones otherwise. 3501 unsigned MaskLoBits = 0; 3502 if (Neg.getOpcode() == ISD::AND && 3503 isPowerOf2_64(OpSize) && 3504 Neg.getOperand(1).getOpcode() == ISD::Constant && 3505 cast<ConstantSDNode>(Neg.getOperand(1))->getAPIntValue() == OpSize - 1) { 3506 Neg = Neg.getOperand(0); 3507 MaskLoBits = Log2_64(OpSize); 3508 } 3509 3510 // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. 3511 if (Neg.getOpcode() != ISD::SUB) 3512 return 0; 3513 ConstantSDNode *NegC = dyn_cast<ConstantSDNode>(Neg.getOperand(0)); 3514 if (!NegC) 3515 return 0; 3516 SDValue NegOp1 = Neg.getOperand(1); 3517 3518 // On the RHS of [A], if Pos is Pos' & (OpSize - 1), just replace Pos with 3519 // Pos'. The truncation is redundant for the purpose of the equality. 3520 if (MaskLoBits && 3521 Pos.getOpcode() == ISD::AND && 3522 Pos.getOperand(1).getOpcode() == ISD::Constant && 3523 cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() == OpSize - 1) 3524 Pos = Pos.getOperand(0); 3525 3526 // The condition we need is now: 3527 // 3528 // (NegC - NegOp1) & Mask == (OpSize - Pos) & Mask 3529 // 3530 // If NegOp1 == Pos then we need: 3531 // 3532 // OpSize & Mask == NegC & Mask 3533 // 3534 // (because "x & Mask" is a truncation and distributes through subtraction). 3535 APInt Width; 3536 if (Pos == NegOp1) 3537 Width = NegC->getAPIntValue(); 3538 // Check for cases where Pos has the form (add NegOp1, PosC) for some PosC. 3539 // Then the condition we want to prove becomes: 3540 // 3541 // (NegC - NegOp1) & Mask == (OpSize - (NegOp1 + PosC)) & Mask 3542 // 3543 // which, again because "x & Mask" is a truncation, becomes: 3544 // 3545 // NegC & Mask == (OpSize - PosC) & Mask 3546 // OpSize & Mask == (NegC + PosC) & Mask 3547 else if (Pos.getOpcode() == ISD::ADD && 3548 Pos.getOperand(0) == NegOp1 && 3549 Pos.getOperand(1).getOpcode() == ISD::Constant) 3550 Width = (cast<ConstantSDNode>(Pos.getOperand(1))->getAPIntValue() + 3551 NegC->getAPIntValue()); 3552 else 3553 return false; 3554 3555 // Now we just need to check that OpSize & Mask == Width & Mask. 3556 if (MaskLoBits) 3557 // Opsize & Mask is 0 since Mask is Opsize - 1. 3558 return Width.getLoBits(MaskLoBits) == 0; 3559 return Width == OpSize; 3560 } 3561 3562 // A subroutine of MatchRotate used once we have found an OR of two opposite 3563 // shifts of Shifted. If Neg == <operand size> - Pos then the OR reduces 3564 // to both (PosOpcode Shifted, Pos) and (NegOpcode Shifted, Neg), with the 3565 // former being preferred if supported. InnerPos and InnerNeg are Pos and 3566 // Neg with outer conversions stripped away. 3567 SDNode *DAGCombiner::MatchRotatePosNeg(SDValue Shifted, SDValue Pos, 3568 SDValue Neg, SDValue InnerPos, 3569 SDValue InnerNeg, unsigned PosOpcode, 3570 unsigned NegOpcode, SDLoc DL) { 3571 // fold (or (shl x, (*ext y)), 3572 // (srl x, (*ext (sub 32, y)))) -> 3573 // (rotl x, y) or (rotr x, (sub 32, y)) 3574 // 3575 // fold (or (shl x, (*ext (sub 32, y))), 3576 // (srl x, (*ext y))) -> 3577 // (rotr x, y) or (rotl x, (sub 32, y)) 3578 EVT VT = Shifted.getValueType(); 3579 if (matchRotateSub(InnerPos, InnerNeg, VT.getSizeInBits())) { 3580 bool HasPos = TLI.isOperationLegalOrCustom(PosOpcode, VT); 3581 return DAG.getNode(HasPos ? PosOpcode : NegOpcode, DL, VT, Shifted, 3582 HasPos ? Pos : Neg).getNode(); 3583 } 3584 3585 return nullptr; 3586 } 3587 3588 // MatchRotate - Handle an 'or' of two operands. If this is one of the many 3589 // idioms for rotate, and if the target supports rotation instructions, generate 3590 // a rot[lr]. 3591 SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { 3592 // Must be a legal type. Expanded 'n promoted things won't work with rotates. 3593 EVT VT = LHS.getValueType(); 3594 if (!TLI.isTypeLegal(VT)) return nullptr; 3595 3596 // The target must have at least one rotate flavor. 3597 bool HasROTL = TLI.isOperationLegalOrCustom(ISD::ROTL, VT); 3598 bool HasROTR = TLI.isOperationLegalOrCustom(ISD::ROTR, VT); 3599 if (!HasROTL && !HasROTR) return nullptr; 3600 3601 // Match "(X shl/srl V1) & V2" where V2 may not be present. 3602 SDValue LHSShift; // The shift. 3603 SDValue LHSMask; // AND value if any. 3604 if (!MatchRotateHalf(LHS, LHSShift, LHSMask)) 3605 return nullptr; // Not part of a rotate. 3606 3607 SDValue RHSShift; // The shift. 3608 SDValue RHSMask; // AND value if any. 3609 if (!MatchRotateHalf(RHS, RHSShift, RHSMask)) 3610 return nullptr; // Not part of a rotate. 3611 3612 if (LHSShift.getOperand(0) != RHSShift.getOperand(0)) 3613 return nullptr; // Not shifting the same value. 3614 3615 if (LHSShift.getOpcode() == RHSShift.getOpcode()) 3616 return nullptr; // Shifts must disagree. 3617 3618 // Canonicalize shl to left side in a shl/srl pair. 3619 if (RHSShift.getOpcode() == ISD::SHL) { 3620 std::swap(LHS, RHS); 3621 std::swap(LHSShift, RHSShift); 3622 std::swap(LHSMask , RHSMask ); 3623 } 3624 3625 unsigned OpSizeInBits = VT.getSizeInBits(); 3626 SDValue LHSShiftArg = LHSShift.getOperand(0); 3627 SDValue LHSShiftAmt = LHSShift.getOperand(1); 3628 SDValue RHSShiftArg = RHSShift.getOperand(0); 3629 SDValue RHSShiftAmt = RHSShift.getOperand(1); 3630 3631 // fold (or (shl x, C1), (srl x, C2)) -> (rotl x, C1) 3632 // fold (or (shl x, C1), (srl x, C2)) -> (rotr x, C2) 3633 if (LHSShiftAmt.getOpcode() == ISD::Constant && 3634 RHSShiftAmt.getOpcode() == ISD::Constant) { 3635 uint64_t LShVal = cast<ConstantSDNode>(LHSShiftAmt)->getZExtValue(); 3636 uint64_t RShVal = cast<ConstantSDNode>(RHSShiftAmt)->getZExtValue(); 3637 if ((LShVal + RShVal) != OpSizeInBits) 3638 return nullptr; 3639 3640 SDValue Rot = DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, 3641 LHSShiftArg, HasROTL ? LHSShiftAmt : RHSShiftAmt); 3642 3643 // If there is an AND of either shifted operand, apply it to the result. 3644 if (LHSMask.getNode() || RHSMask.getNode()) { 3645 APInt Mask = APInt::getAllOnesValue(OpSizeInBits); 3646 3647 if (LHSMask.getNode()) { 3648 APInt RHSBits = APInt::getLowBitsSet(OpSizeInBits, LShVal); 3649 Mask &= cast<ConstantSDNode>(LHSMask)->getAPIntValue() | RHSBits; 3650 } 3651 if (RHSMask.getNode()) { 3652 APInt LHSBits = APInt::getHighBitsSet(OpSizeInBits, RShVal); 3653 Mask &= cast<ConstantSDNode>(RHSMask)->getAPIntValue() | LHSBits; 3654 } 3655 3656 Rot = DAG.getNode(ISD::AND, DL, VT, Rot, DAG.getConstant(Mask, VT)); 3657 } 3658 3659 return Rot.getNode(); 3660 } 3661 3662 // If there is a mask here, and we have a variable shift, we can't be sure 3663 // that we're masking out the right stuff. 3664 if (LHSMask.getNode() || RHSMask.getNode()) 3665 return nullptr; 3666 3667 // If the shift amount is sign/zext/any-extended just peel it off. 3668 SDValue LExtOp0 = LHSShiftAmt; 3669 SDValue RExtOp0 = RHSShiftAmt; 3670 if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3671 LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3672 LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3673 LHSShiftAmt.getOpcode() == ISD::TRUNCATE) && 3674 (RHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || 3675 RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || 3676 RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || 3677 RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { 3678 LExtOp0 = LHSShiftAmt.getOperand(0); 3679 RExtOp0 = RHSShiftAmt.getOperand(0); 3680 } 3681 3682 SDNode *TryL = MatchRotatePosNeg(LHSShiftArg, LHSShiftAmt, RHSShiftAmt, 3683 LExtOp0, RExtOp0, ISD::ROTL, ISD::ROTR, DL); 3684 if (TryL) 3685 return TryL; 3686 3687 SDNode *TryR = MatchRotatePosNeg(RHSShiftArg, RHSShiftAmt, LHSShiftAmt, 3688 RExtOp0, LExtOp0, ISD::ROTR, ISD::ROTL, DL); 3689 if (TryR) 3690 return TryR; 3691 3692 return nullptr; 3693 } 3694 3695 SDValue DAGCombiner::visitXOR(SDNode *N) { 3696 SDValue N0 = N->getOperand(0); 3697 SDValue N1 = N->getOperand(1); 3698 SDValue LHS, RHS, CC; 3699 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3700 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3701 EVT VT = N0.getValueType(); 3702 3703 // fold vector ops 3704 if (VT.isVector()) { 3705 SDValue FoldedVOp = SimplifyVBinOp(N); 3706 if (FoldedVOp.getNode()) return FoldedVOp; 3707 3708 // fold (xor x, 0) -> x, vector edition 3709 if (ISD::isBuildVectorAllZeros(N0.getNode())) 3710 return N1; 3711 if (ISD::isBuildVectorAllZeros(N1.getNode())) 3712 return N0; 3713 } 3714 3715 // fold (xor undef, undef) -> 0. This is a common idiom (misuse). 3716 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 3717 return DAG.getConstant(0, VT); 3718 // fold (xor x, undef) -> undef 3719 if (N0.getOpcode() == ISD::UNDEF) 3720 return N0; 3721 if (N1.getOpcode() == ISD::UNDEF) 3722 return N1; 3723 // fold (xor c1, c2) -> c1^c2 3724 if (N0C && N1C) 3725 return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); 3726 // canonicalize constant to RHS 3727 if (N0C && !N1C) 3728 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); 3729 // fold (xor x, 0) -> x 3730 if (N1C && N1C->isNullValue()) 3731 return N0; 3732 // reassociate xor 3733 SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); 3734 if (RXOR.getNode()) 3735 return RXOR; 3736 3737 // fold !(x cc y) -> (x !cc y) 3738 if (N1C && N1C->getAPIntValue() == 1 && isSetCCEquivalent(N0, LHS, RHS, CC)) { 3739 bool isInt = LHS.getValueType().isInteger(); 3740 ISD::CondCode NotCC = ISD::getSetCCInverse(cast<CondCodeSDNode>(CC)->get(), 3741 isInt); 3742 3743 if (!LegalOperations || 3744 TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { 3745 switch (N0.getOpcode()) { 3746 default: 3747 llvm_unreachable("Unhandled SetCC Equivalent!"); 3748 case ISD::SETCC: 3749 return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); 3750 case ISD::SELECT_CC: 3751 return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), 3752 N0.getOperand(3), NotCC); 3753 } 3754 } 3755 } 3756 3757 // fold (not (zext (setcc x, y))) -> (zext (not (setcc x, y))) 3758 if (N1C && N1C->getAPIntValue() == 1 && N0.getOpcode() == ISD::ZERO_EXTEND && 3759 N0.getNode()->hasOneUse() && 3760 isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ 3761 SDValue V = N0.getOperand(0); 3762 V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, 3763 DAG.getConstant(1, V.getValueType())); 3764 AddToWorkList(V.getNode()); 3765 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); 3766 } 3767 3768 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc 3769 if (N1C && N1C->getAPIntValue() == 1 && VT == MVT::i1 && 3770 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3771 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3772 if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { 3773 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3774 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3775 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3776 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3777 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3778 } 3779 } 3780 // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants 3781 if (N1C && N1C->isAllOnesValue() && 3782 (N0.getOpcode() == ISD::OR || N0.getOpcode() == ISD::AND)) { 3783 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 3784 if (isa<ConstantSDNode>(RHS) || isa<ConstantSDNode>(LHS)) { 3785 unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; 3786 LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS 3787 RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS 3788 AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); 3789 return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); 3790 } 3791 } 3792 // fold (xor (and x, y), y) -> (and (not x), y) 3793 if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && 3794 N0->getOperand(1) == N1) { 3795 SDValue X = N0->getOperand(0); 3796 SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); 3797 AddToWorkList(NotX.getNode()); 3798 return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); 3799 } 3800 // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) 3801 if (N1C && N0.getOpcode() == ISD::XOR) { 3802 ConstantSDNode *N00C = dyn_cast<ConstantSDNode>(N0.getOperand(0)); 3803 ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 3804 if (N00C) 3805 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), 3806 DAG.getConstant(N1C->getAPIntValue() ^ 3807 N00C->getAPIntValue(), VT)); 3808 if (N01C) 3809 return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), 3810 DAG.getConstant(N1C->getAPIntValue() ^ 3811 N01C->getAPIntValue(), VT)); 3812 } 3813 // fold (xor x, x) -> 0 3814 if (N0 == N1) 3815 return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); 3816 3817 // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) 3818 if (N0.getOpcode() == N1.getOpcode()) { 3819 SDValue Tmp = SimplifyBinOpWithSameOpcodeHands(N); 3820 if (Tmp.getNode()) return Tmp; 3821 } 3822 3823 // Simplify the expression using non-local knowledge. 3824 if (!VT.isVector() && 3825 SimplifyDemandedBits(SDValue(N, 0))) 3826 return SDValue(N, 0); 3827 3828 return SDValue(); 3829 } 3830 3831 /// visitShiftByConstant - Handle transforms common to the three shifts, when 3832 /// the shift amount is a constant. 3833 SDValue DAGCombiner::visitShiftByConstant(SDNode *N, ConstantSDNode *Amt) { 3834 // We can't and shouldn't fold opaque constants. 3835 if (Amt->isOpaque()) 3836 return SDValue(); 3837 3838 SDNode *LHS = N->getOperand(0).getNode(); 3839 if (!LHS->hasOneUse()) return SDValue(); 3840 3841 // We want to pull some binops through shifts, so that we have (and (shift)) 3842 // instead of (shift (and)), likewise for add, or, xor, etc. This sort of 3843 // thing happens with address calculations, so it's important to canonicalize 3844 // it. 3845 bool HighBitSet = false; // Can we transform this if the high bit is set? 3846 3847 switch (LHS->getOpcode()) { 3848 default: return SDValue(); 3849 case ISD::OR: 3850 case ISD::XOR: 3851 HighBitSet = false; // We can only transform sra if the high bit is clear. 3852 break; 3853 case ISD::AND: 3854 HighBitSet = true; // We can only transform sra if the high bit is set. 3855 break; 3856 case ISD::ADD: 3857 if (N->getOpcode() != ISD::SHL) 3858 return SDValue(); // only shl(add) not sr[al](add). 3859 HighBitSet = false; // We can only transform sra if the high bit is clear. 3860 break; 3861 } 3862 3863 // We require the RHS of the binop to be a constant and not opaque as well. 3864 ConstantSDNode *BinOpCst = dyn_cast<ConstantSDNode>(LHS->getOperand(1)); 3865 if (!BinOpCst || BinOpCst->isOpaque()) return SDValue(); 3866 3867 // FIXME: disable this unless the input to the binop is a shift by a constant. 3868 // If it is not a shift, it pessimizes some common cases like: 3869 // 3870 // void foo(int *X, int i) { X[i & 1235] = 1; } 3871 // int bar(int *X, int i) { return X[i & 255]; } 3872 SDNode *BinOpLHSVal = LHS->getOperand(0).getNode(); 3873 if ((BinOpLHSVal->getOpcode() != ISD::SHL && 3874 BinOpLHSVal->getOpcode() != ISD::SRA && 3875 BinOpLHSVal->getOpcode() != ISD::SRL) || 3876 !isa<ConstantSDNode>(BinOpLHSVal->getOperand(1))) 3877 return SDValue(); 3878 3879 EVT VT = N->getValueType(0); 3880 3881 // If this is a signed shift right, and the high bit is modified by the 3882 // logical operation, do not perform the transformation. The highBitSet 3883 // boolean indicates the value of the high bit of the constant which would 3884 // cause it to be modified for this operation. 3885 if (N->getOpcode() == ISD::SRA) { 3886 bool BinOpRHSSignSet = BinOpCst->getAPIntValue().isNegative(); 3887 if (BinOpRHSSignSet != HighBitSet) 3888 return SDValue(); 3889 } 3890 3891 if (!TLI.isDesirableToCommuteWithShift(LHS)) 3892 return SDValue(); 3893 3894 // Fold the constants, shifting the binop RHS by the shift amount. 3895 SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), 3896 N->getValueType(0), 3897 LHS->getOperand(1), N->getOperand(1)); 3898 assert(isa<ConstantSDNode>(NewRHS) && "Folding was not successful!"); 3899 3900 // Create the new shift. 3901 SDValue NewShift = DAG.getNode(N->getOpcode(), 3902 SDLoc(LHS->getOperand(0)), 3903 VT, LHS->getOperand(0), N->getOperand(1)); 3904 3905 // Create the new binop. 3906 return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); 3907 } 3908 3909 SDValue DAGCombiner::distributeTruncateThroughAnd(SDNode *N) { 3910 assert(N->getOpcode() == ISD::TRUNCATE); 3911 assert(N->getOperand(0).getOpcode() == ISD::AND); 3912 3913 // (truncate:TruncVT (and N00, N01C)) -> (and (truncate:TruncVT N00), TruncC) 3914 if (N->hasOneUse() && N->getOperand(0).hasOneUse()) { 3915 SDValue N01 = N->getOperand(0).getOperand(1); 3916 3917 if (ConstantSDNode *N01C = isConstOrConstSplat(N01)) { 3918 EVT TruncVT = N->getValueType(0); 3919 SDValue N00 = N->getOperand(0).getOperand(0); 3920 APInt TruncC = N01C->getAPIntValue(); 3921 TruncC = TruncC.trunc(TruncVT.getScalarSizeInBits()); 3922 3923 return DAG.getNode(ISD::AND, SDLoc(N), TruncVT, 3924 DAG.getNode(ISD::TRUNCATE, SDLoc(N), TruncVT, N00), 3925 DAG.getConstant(TruncC, TruncVT)); 3926 } 3927 } 3928 3929 return SDValue(); 3930 } 3931 3932 SDValue DAGCombiner::visitRotate(SDNode *N) { 3933 // fold (rot* x, (trunc (and y, c))) -> (rot* x, (and (trunc y), (trunc c))). 3934 if (N->getOperand(1).getOpcode() == ISD::TRUNCATE && 3935 N->getOperand(1).getOperand(0).getOpcode() == ISD::AND) { 3936 SDValue NewOp1 = distributeTruncateThroughAnd(N->getOperand(1).getNode()); 3937 if (NewOp1.getNode()) 3938 return DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0), 3939 N->getOperand(0), NewOp1); 3940 } 3941 return SDValue(); 3942 } 3943 3944 SDValue DAGCombiner::visitSHL(SDNode *N) { 3945 SDValue N0 = N->getOperand(0); 3946 SDValue N1 = N->getOperand(1); 3947 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 3948 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 3949 EVT VT = N0.getValueType(); 3950 unsigned OpSizeInBits = VT.getScalarSizeInBits(); 3951 3952 // fold vector ops 3953 if (VT.isVector()) { 3954 SDValue FoldedVOp = SimplifyVBinOp(N); 3955 if (FoldedVOp.getNode()) return FoldedVOp; 3956 3957 BuildVectorSDNode *N1CV = dyn_cast<BuildVectorSDNode>(N1); 3958 // If setcc produces all-one true value then: 3959 // (shl (and (setcc) N01CV) N1CV) -> (and (setcc) N01CV<<N1CV) 3960 if (N1CV && N1CV->isConstant()) { 3961 if (N0.getOpcode() == ISD::AND) { 3962 SDValue N00 = N0->getOperand(0); 3963 SDValue N01 = N0->getOperand(1); 3964 BuildVectorSDNode *N01CV = dyn_cast<BuildVectorSDNode>(N01); 3965 3966 if (N01CV && N01CV->isConstant() && N00.getOpcode() == ISD::SETCC && 3967 TLI.getBooleanContents(N00.getOperand(0).getValueType()) == 3968 TargetLowering::ZeroOrNegativeOneBooleanContent) { 3969 SDValue C = DAG.FoldConstantArithmetic(ISD::SHL, VT, N01CV, N1CV); 3970 if (C.getNode()) 3971 return DAG.getNode(ISD::AND, SDLoc(N), VT, N00, C); 3972 } 3973 } else { 3974 N1C = isConstOrConstSplat(N1); 3975 } 3976 } 3977 } 3978 3979 // fold (shl c1, c2) -> c1<<c2 3980 if (N0C && N1C) 3981 return DAG.FoldConstantArithmetic(ISD::SHL, VT, N0C, N1C); 3982 // fold (shl 0, x) -> 0 3983 if (N0C && N0C->isNullValue()) 3984 return N0; 3985 // fold (shl x, c >= size(x)) -> undef 3986 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 3987 return DAG.getUNDEF(VT); 3988 // fold (shl x, 0) -> x 3989 if (N1C && N1C->isNullValue()) 3990 return N0; 3991 // fold (shl undef, x) -> 0 3992 if (N0.getOpcode() == ISD::UNDEF) 3993 return DAG.getConstant(0, VT); 3994 // if (shl x, c) is known to be zero, return 0 3995 if (DAG.MaskedValueIsZero(SDValue(N, 0), 3996 APInt::getAllOnesValue(OpSizeInBits))) 3997 return DAG.getConstant(0, VT); 3998 // fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))). 3999 if (N1.getOpcode() == ISD::TRUNCATE && 4000 N1.getOperand(0).getOpcode() == ISD::AND) { 4001 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4002 if (NewOp1.getNode()) 4003 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, NewOp1); 4004 } 4005 4006 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4007 return SDValue(N, 0); 4008 4009 // fold (shl (shl x, c1), c2) -> 0 or (shl x, (add c1, c2)) 4010 if (N1C && N0.getOpcode() == ISD::SHL) { 4011 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4012 uint64_t c1 = N0C1->getZExtValue(); 4013 uint64_t c2 = N1C->getZExtValue(); 4014 if (c1 + c2 >= OpSizeInBits) 4015 return DAG.getConstant(0, VT); 4016 return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4017 DAG.getConstant(c1 + c2, N1.getValueType())); 4018 } 4019 } 4020 4021 // fold (shl (ext (shl x, c1)), c2) -> (ext (shl x, (add c1, c2))) 4022 // For this to be valid, the second form must not preserve any of the bits 4023 // that are shifted out by the inner shift in the first form. This means 4024 // the outer shift size must be >= the number of bits added by the ext. 4025 // As a corollary, we don't care what kind of ext it is. 4026 if (N1C && (N0.getOpcode() == ISD::ZERO_EXTEND || 4027 N0.getOpcode() == ISD::ANY_EXTEND || 4028 N0.getOpcode() == ISD::SIGN_EXTEND) && 4029 N0.getOperand(0).getOpcode() == ISD::SHL) { 4030 SDValue N0Op0 = N0.getOperand(0); 4031 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4032 uint64_t c1 = N0Op0C1->getZExtValue(); 4033 uint64_t c2 = N1C->getZExtValue(); 4034 EVT InnerShiftVT = N0Op0.getValueType(); 4035 uint64_t InnerShiftSize = InnerShiftVT.getScalarSizeInBits(); 4036 if (c2 >= OpSizeInBits - InnerShiftSize) { 4037 if (c1 + c2 >= OpSizeInBits) 4038 return DAG.getConstant(0, VT); 4039 return DAG.getNode(ISD::SHL, SDLoc(N0), VT, 4040 DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, 4041 N0Op0->getOperand(0)), 4042 DAG.getConstant(c1 + c2, N1.getValueType())); 4043 } 4044 } 4045 } 4046 4047 // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) 4048 // Only fold this if the inner zext has no other uses to avoid increasing 4049 // the total number of instructions. 4050 if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && 4051 N0.getOperand(0).getOpcode() == ISD::SRL) { 4052 SDValue N0Op0 = N0.getOperand(0); 4053 if (ConstantSDNode *N0Op0C1 = isConstOrConstSplat(N0Op0.getOperand(1))) { 4054 uint64_t c1 = N0Op0C1->getZExtValue(); 4055 if (c1 < VT.getScalarSizeInBits()) { 4056 uint64_t c2 = N1C->getZExtValue(); 4057 if (c1 == c2) { 4058 SDValue NewOp0 = N0.getOperand(0); 4059 EVT CountVT = NewOp0.getOperand(1).getValueType(); 4060 SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), 4061 NewOp0, DAG.getConstant(c2, CountVT)); 4062 AddToWorkList(NewSHL.getNode()); 4063 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); 4064 } 4065 } 4066 } 4067 } 4068 4069 // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or 4070 // (and (srl x, (sub c1, c2), MASK) 4071 // Only fold this if the inner shift has no other uses -- if it does, folding 4072 // this will increase the total number of instructions. 4073 if (N1C && N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 4074 if (ConstantSDNode *N0C1 = isConstOrConstSplat(N0.getOperand(1))) { 4075 uint64_t c1 = N0C1->getZExtValue(); 4076 if (c1 < OpSizeInBits) { 4077 uint64_t c2 = N1C->getZExtValue(); 4078 APInt Mask = APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - c1); 4079 SDValue Shift; 4080 if (c2 > c1) { 4081 Mask = Mask.shl(c2 - c1); 4082 Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), 4083 DAG.getConstant(c2 - c1, N1.getValueType())); 4084 } else { 4085 Mask = Mask.lshr(c1 - c2); 4086 Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4087 DAG.getConstant(c1 - c2, N1.getValueType())); 4088 } 4089 return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, 4090 DAG.getConstant(Mask, VT)); 4091 } 4092 } 4093 } 4094 // fold (shl (sra x, c1), c1) -> (and x, (shl -1, c1)) 4095 if (N1C && N0.getOpcode() == ISD::SRA && N1 == N0.getOperand(1)) { 4096 unsigned BitSize = VT.getScalarSizeInBits(); 4097 SDValue HiBitsMask = 4098 DAG.getConstant(APInt::getHighBitsSet(BitSize, 4099 BitSize - N1C->getZExtValue()), VT); 4100 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4101 HiBitsMask); 4102 } 4103 4104 if (N1C) { 4105 SDValue NewSHL = visitShiftByConstant(N, N1C); 4106 if (NewSHL.getNode()) 4107 return NewSHL; 4108 } 4109 4110 return SDValue(); 4111 } 4112 4113 SDValue DAGCombiner::visitSRA(SDNode *N) { 4114 SDValue N0 = N->getOperand(0); 4115 SDValue N1 = N->getOperand(1); 4116 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4117 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4118 EVT VT = N0.getValueType(); 4119 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4120 4121 // fold vector ops 4122 if (VT.isVector()) { 4123 SDValue FoldedVOp = SimplifyVBinOp(N); 4124 if (FoldedVOp.getNode()) return FoldedVOp; 4125 4126 N1C = isConstOrConstSplat(N1); 4127 } 4128 4129 // fold (sra c1, c2) -> (sra c1, c2) 4130 if (N0C && N1C) 4131 return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); 4132 // fold (sra 0, x) -> 0 4133 if (N0C && N0C->isNullValue()) 4134 return N0; 4135 // fold (sra -1, x) -> -1 4136 if (N0C && N0C->isAllOnesValue()) 4137 return N0; 4138 // fold (sra x, (setge c, size(x))) -> undef 4139 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4140 return DAG.getUNDEF(VT); 4141 // fold (sra x, 0) -> x 4142 if (N1C && N1C->isNullValue()) 4143 return N0; 4144 // fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports 4145 // sext_inreg. 4146 if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) { 4147 unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue(); 4148 EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), LowBits); 4149 if (VT.isVector()) 4150 ExtVT = EVT::getVectorVT(*DAG.getContext(), 4151 ExtVT, VT.getVectorNumElements()); 4152 if ((!LegalOperations || 4153 TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) 4154 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 4155 N0.getOperand(0), DAG.getValueType(ExtVT)); 4156 } 4157 4158 // fold (sra (sra x, c1), c2) -> (sra x, (add c1, c2)) 4159 if (N1C && N0.getOpcode() == ISD::SRA) { 4160 if (ConstantSDNode *C1 = isConstOrConstSplat(N0.getOperand(1))) { 4161 unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); 4162 if (Sum >= OpSizeInBits) 4163 Sum = OpSizeInBits - 1; 4164 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), 4165 DAG.getConstant(Sum, N1.getValueType())); 4166 } 4167 } 4168 4169 // fold (sra (shl X, m), (sub result_size, n)) 4170 // -> (sign_extend (trunc (shl X, (sub (sub result_size, n), m)))) for 4171 // result_size - n != m. 4172 // If truncate is free for the target sext(shl) is likely to result in better 4173 // code. 4174 if (N0.getOpcode() == ISD::SHL && N1C) { 4175 // Get the two constanst of the shifts, CN0 = m, CN = n. 4176 const ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1)); 4177 if (N01C) { 4178 LLVMContext &Ctx = *DAG.getContext(); 4179 // Determine what the truncate's result bitsize and type would be. 4180 EVT TruncVT = EVT::getIntegerVT(Ctx, OpSizeInBits - N1C->getZExtValue()); 4181 4182 if (VT.isVector()) 4183 TruncVT = EVT::getVectorVT(Ctx, TruncVT, VT.getVectorNumElements()); 4184 4185 // Determine the residual right-shift amount. 4186 signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue(); 4187 4188 // If the shift is not a no-op (in which case this should be just a sign 4189 // extend already), the truncated to type is legal, sign_extend is legal 4190 // on that type, and the truncate to that type is both legal and free, 4191 // perform the transform. 4192 if ((ShiftAmt > 0) && 4193 TLI.isOperationLegalOrCustom(ISD::SIGN_EXTEND, TruncVT) && 4194 TLI.isOperationLegalOrCustom(ISD::TRUNCATE, VT) && 4195 TLI.isTruncateFree(VT, TruncVT)) { 4196 4197 SDValue Amt = DAG.getConstant(ShiftAmt, 4198 getShiftAmountTy(N0.getOperand(0).getValueType())); 4199 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, 4200 N0.getOperand(0), Amt); 4201 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, 4202 Shift); 4203 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), 4204 N->getValueType(0), Trunc); 4205 } 4206 } 4207 } 4208 4209 // fold (sra x, (trunc (and y, c))) -> (sra x, (and (trunc y), (trunc c))). 4210 if (N1.getOpcode() == ISD::TRUNCATE && 4211 N1.getOperand(0).getOpcode() == ISD::AND) { 4212 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4213 if (NewOp1.getNode()) 4214 return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, NewOp1); 4215 } 4216 4217 // fold (sra (trunc (srl x, c1)), c2) -> (trunc (sra x, c1 + c2)) 4218 // if c1 is equal to the number of bits the trunc removes 4219 if (N0.getOpcode() == ISD::TRUNCATE && 4220 (N0.getOperand(0).getOpcode() == ISD::SRL || 4221 N0.getOperand(0).getOpcode() == ISD::SRA) && 4222 N0.getOperand(0).hasOneUse() && 4223 N0.getOperand(0).getOperand(1).hasOneUse() && 4224 N1C) { 4225 SDValue N0Op0 = N0.getOperand(0); 4226 if (ConstantSDNode *LargeShift = isConstOrConstSplat(N0Op0.getOperand(1))) { 4227 unsigned LargeShiftVal = LargeShift->getZExtValue(); 4228 EVT LargeVT = N0Op0.getValueType(); 4229 4230 if (LargeVT.getScalarSizeInBits() - OpSizeInBits == LargeShiftVal) { 4231 SDValue Amt = 4232 DAG.getConstant(LargeShiftVal + N1C->getZExtValue(), 4233 getShiftAmountTy(N0Op0.getOperand(0).getValueType())); 4234 SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, 4235 N0Op0.getOperand(0), Amt); 4236 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); 4237 } 4238 } 4239 } 4240 4241 // Simplify, based on bits shifted out of the LHS. 4242 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4243 return SDValue(N, 0); 4244 4245 4246 // If the sign bit is known to be zero, switch this to a SRL. 4247 if (DAG.SignBitIsZero(N0)) 4248 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); 4249 4250 if (N1C) { 4251 SDValue NewSRA = visitShiftByConstant(N, N1C); 4252 if (NewSRA.getNode()) 4253 return NewSRA; 4254 } 4255 4256 return SDValue(); 4257 } 4258 4259 SDValue DAGCombiner::visitSRL(SDNode *N) { 4260 SDValue N0 = N->getOperand(0); 4261 SDValue N1 = N->getOperand(1); 4262 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4263 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4264 EVT VT = N0.getValueType(); 4265 unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); 4266 4267 // fold vector ops 4268 if (VT.isVector()) { 4269 SDValue FoldedVOp = SimplifyVBinOp(N); 4270 if (FoldedVOp.getNode()) return FoldedVOp; 4271 4272 N1C = isConstOrConstSplat(N1); 4273 } 4274 4275 // fold (srl c1, c2) -> c1 >>u c2 4276 if (N0C && N1C) 4277 return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); 4278 // fold (srl 0, x) -> 0 4279 if (N0C && N0C->isNullValue()) 4280 return N0; 4281 // fold (srl x, c >= size(x)) -> undef 4282 if (N1C && N1C->getZExtValue() >= OpSizeInBits) 4283 return DAG.getUNDEF(VT); 4284 // fold (srl x, 0) -> x 4285 if (N1C && N1C->isNullValue()) 4286 return N0; 4287 // if (srl x, c) is known to be zero, return 0 4288 if (N1C && DAG.MaskedValueIsZero(SDValue(N, 0), 4289 APInt::getAllOnesValue(OpSizeInBits))) 4290 return DAG.getConstant(0, VT); 4291 4292 // fold (srl (srl x, c1), c2) -> 0 or (srl x, (add c1, c2)) 4293 if (N1C && N0.getOpcode() == ISD::SRL) { 4294 if (ConstantSDNode *N01C = isConstOrConstSplat(N0.getOperand(1))) { 4295 uint64_t c1 = N01C->getZExtValue(); 4296 uint64_t c2 = N1C->getZExtValue(); 4297 if (c1 + c2 >= OpSizeInBits) 4298 return DAG.getConstant(0, VT); 4299 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), 4300 DAG.getConstant(c1 + c2, N1.getValueType())); 4301 } 4302 } 4303 4304 // fold (srl (trunc (srl x, c1)), c2) -> 0 or (trunc (srl x, (add c1, c2))) 4305 if (N1C && N0.getOpcode() == ISD::TRUNCATE && 4306 N0.getOperand(0).getOpcode() == ISD::SRL && 4307 isa<ConstantSDNode>(N0.getOperand(0)->getOperand(1))) { 4308 uint64_t c1 = 4309 cast<ConstantSDNode>(N0.getOperand(0)->getOperand(1))->getZExtValue(); 4310 uint64_t c2 = N1C->getZExtValue(); 4311 EVT InnerShiftVT = N0.getOperand(0).getValueType(); 4312 EVT ShiftCountVT = N0.getOperand(0)->getOperand(1).getValueType(); 4313 uint64_t InnerShiftSize = InnerShiftVT.getScalarType().getSizeInBits(); 4314 // This is only valid if the OpSizeInBits + c1 = size of inner shift. 4315 if (c1 + OpSizeInBits == InnerShiftSize) { 4316 if (c1 + c2 >= InnerShiftSize) 4317 return DAG.getConstant(0, VT); 4318 return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, 4319 DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, 4320 N0.getOperand(0)->getOperand(0), 4321 DAG.getConstant(c1 + c2, ShiftCountVT))); 4322 } 4323 } 4324 4325 // fold (srl (shl x, c), c) -> (and x, cst2) 4326 if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1) { 4327 unsigned BitSize = N0.getScalarValueSizeInBits(); 4328 if (BitSize <= 64) { 4329 uint64_t ShAmt = N1C->getZExtValue() + 64 - BitSize; 4330 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), 4331 DAG.getConstant(~0ULL >> ShAmt, VT)); 4332 } 4333 } 4334 4335 // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) 4336 if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { 4337 // Shifting in all undef bits? 4338 EVT SmallVT = N0.getOperand(0).getValueType(); 4339 unsigned BitSize = SmallVT.getScalarSizeInBits(); 4340 if (N1C->getZExtValue() >= BitSize) 4341 return DAG.getUNDEF(VT); 4342 4343 if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { 4344 uint64_t ShiftAmt = N1C->getZExtValue(); 4345 SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, 4346 N0.getOperand(0), 4347 DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); 4348 AddToWorkList(SmallShift.getNode()); 4349 APInt Mask = APInt::getAllOnesValue(OpSizeInBits).lshr(ShiftAmt); 4350 return DAG.getNode(ISD::AND, SDLoc(N), VT, 4351 DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), 4352 DAG.getConstant(Mask, VT)); 4353 } 4354 } 4355 4356 // fold (srl (sra X, Y), 31) -> (srl X, 31). This srl only looks at the sign 4357 // bit, which is unmodified by sra. 4358 if (N1C && N1C->getZExtValue() + 1 == OpSizeInBits) { 4359 if (N0.getOpcode() == ISD::SRA) 4360 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); 4361 } 4362 4363 // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). 4364 if (N1C && N0.getOpcode() == ISD::CTLZ && 4365 N1C->getAPIntValue() == Log2_32(OpSizeInBits)) { 4366 APInt KnownZero, KnownOne; 4367 DAG.computeKnownBits(N0.getOperand(0), KnownZero, KnownOne); 4368 4369 // If any of the input bits are KnownOne, then the input couldn't be all 4370 // zeros, thus the result of the srl will always be zero. 4371 if (KnownOne.getBoolValue()) return DAG.getConstant(0, VT); 4372 4373 // If all of the bits input the to ctlz node are known to be zero, then 4374 // the result of the ctlz is "32" and the result of the shift is one. 4375 APInt UnknownBits = ~KnownZero; 4376 if (UnknownBits == 0) return DAG.getConstant(1, VT); 4377 4378 // Otherwise, check to see if there is exactly one bit input to the ctlz. 4379 if ((UnknownBits & (UnknownBits - 1)) == 0) { 4380 // Okay, we know that only that the single bit specified by UnknownBits 4381 // could be set on input to the CTLZ node. If this bit is set, the SRL 4382 // will return 0, if it is clear, it returns 1. Change the CTLZ/SRL pair 4383 // to an SRL/XOR pair, which is likely to simplify more. 4384 unsigned ShAmt = UnknownBits.countTrailingZeros(); 4385 SDValue Op = N0.getOperand(0); 4386 4387 if (ShAmt) { 4388 Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, 4389 DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); 4390 AddToWorkList(Op.getNode()); 4391 } 4392 4393 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 4394 Op, DAG.getConstant(1, VT)); 4395 } 4396 } 4397 4398 // fold (srl x, (trunc (and y, c))) -> (srl x, (and (trunc y), (trunc c))). 4399 if (N1.getOpcode() == ISD::TRUNCATE && 4400 N1.getOperand(0).getOpcode() == ISD::AND) { 4401 SDValue NewOp1 = distributeTruncateThroughAnd(N1.getNode()); 4402 if (NewOp1.getNode()) 4403 return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, NewOp1); 4404 } 4405 4406 // fold operands of srl based on knowledge that the low bits are not 4407 // demanded. 4408 if (N1C && SimplifyDemandedBits(SDValue(N, 0))) 4409 return SDValue(N, 0); 4410 4411 if (N1C) { 4412 SDValue NewSRL = visitShiftByConstant(N, N1C); 4413 if (NewSRL.getNode()) 4414 return NewSRL; 4415 } 4416 4417 // Attempt to convert a srl of a load into a narrower zero-extending load. 4418 SDValue NarrowLoad = ReduceLoadWidth(N); 4419 if (NarrowLoad.getNode()) 4420 return NarrowLoad; 4421 4422 // Here is a common situation. We want to optimize: 4423 // 4424 // %a = ... 4425 // %b = and i32 %a, 2 4426 // %c = srl i32 %b, 1 4427 // brcond i32 %c ... 4428 // 4429 // into 4430 // 4431 // %a = ... 4432 // %b = and %a, 2 4433 // %c = setcc eq %b, 0 4434 // brcond %c ... 4435 // 4436 // However when after the source operand of SRL is optimized into AND, the SRL 4437 // itself may not be optimized further. Look for it and add the BRCOND into 4438 // the worklist. 4439 if (N->hasOneUse()) { 4440 SDNode *Use = *N->use_begin(); 4441 if (Use->getOpcode() == ISD::BRCOND) 4442 AddToWorkList(Use); 4443 else if (Use->getOpcode() == ISD::TRUNCATE && Use->hasOneUse()) { 4444 // Also look pass the truncate. 4445 Use = *Use->use_begin(); 4446 if (Use->getOpcode() == ISD::BRCOND) 4447 AddToWorkList(Use); 4448 } 4449 } 4450 4451 return SDValue(); 4452 } 4453 4454 SDValue DAGCombiner::visitCTLZ(SDNode *N) { 4455 SDValue N0 = N->getOperand(0); 4456 EVT VT = N->getValueType(0); 4457 4458 // fold (ctlz c1) -> c2 4459 if (isa<ConstantSDNode>(N0)) 4460 return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); 4461 return SDValue(); 4462 } 4463 4464 SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { 4465 SDValue N0 = N->getOperand(0); 4466 EVT VT = N->getValueType(0); 4467 4468 // fold (ctlz_zero_undef c1) -> c2 4469 if (isa<ConstantSDNode>(N0)) 4470 return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4471 return SDValue(); 4472 } 4473 4474 SDValue DAGCombiner::visitCTTZ(SDNode *N) { 4475 SDValue N0 = N->getOperand(0); 4476 EVT VT = N->getValueType(0); 4477 4478 // fold (cttz c1) -> c2 4479 if (isa<ConstantSDNode>(N0)) 4480 return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); 4481 return SDValue(); 4482 } 4483 4484 SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { 4485 SDValue N0 = N->getOperand(0); 4486 EVT VT = N->getValueType(0); 4487 4488 // fold (cttz_zero_undef c1) -> c2 4489 if (isa<ConstantSDNode>(N0)) 4490 return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); 4491 return SDValue(); 4492 } 4493 4494 SDValue DAGCombiner::visitCTPOP(SDNode *N) { 4495 SDValue N0 = N->getOperand(0); 4496 EVT VT = N->getValueType(0); 4497 4498 // fold (ctpop c1) -> c2 4499 if (isa<ConstantSDNode>(N0)) 4500 return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); 4501 return SDValue(); 4502 } 4503 4504 SDValue DAGCombiner::visitSELECT(SDNode *N) { 4505 SDValue N0 = N->getOperand(0); 4506 SDValue N1 = N->getOperand(1); 4507 SDValue N2 = N->getOperand(2); 4508 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 4509 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1); 4510 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2); 4511 EVT VT = N->getValueType(0); 4512 EVT VT0 = N0.getValueType(); 4513 4514 // fold (select C, X, X) -> X 4515 if (N1 == N2) 4516 return N1; 4517 // fold (select true, X, Y) -> X 4518 if (N0C && !N0C->isNullValue()) 4519 return N1; 4520 // fold (select false, X, Y) -> Y 4521 if (N0C && N0C->isNullValue()) 4522 return N2; 4523 // fold (select C, 1, X) -> (or C, X) 4524 if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) 4525 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4526 // fold (select C, 0, 1) -> (xor C, 1) 4527 // We can't do this reliably if integer based booleans have different contents 4528 // to floating point based booleans. This is because we can't tell whether we 4529 // have an integer-based boolean or a floating-point-based boolean unless we 4530 // can find the SETCC that produced it and inspect its operands. This is 4531 // fairly easy if C is the SETCC node, but it can potentially be 4532 // undiscoverable (or not reasonably discoverable). For example, it could be 4533 // in another basic block or it could require searching a complicated 4534 // expression. 4535 if (VT.isInteger() && 4536 (VT0 == MVT::i1 || (VT0.isInteger() && 4537 TLI.getBooleanContents(false, false) == 4538 TLI.getBooleanContents(false, true) && 4539 TLI.getBooleanContents(false, false) == 4540 TargetLowering::ZeroOrOneBooleanContent)) && 4541 N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { 4542 SDValue XORNode; 4543 if (VT == VT0) 4544 return DAG.getNode(ISD::XOR, SDLoc(N), VT0, 4545 N0, DAG.getConstant(1, VT0)); 4546 XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, 4547 N0, DAG.getConstant(1, VT0)); 4548 AddToWorkList(XORNode.getNode()); 4549 if (VT.bitsGT(VT0)) 4550 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); 4551 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); 4552 } 4553 // fold (select C, 0, X) -> (and (not C), X) 4554 if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { 4555 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4556 AddToWorkList(NOTNode.getNode()); 4557 return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); 4558 } 4559 // fold (select C, X, 1) -> (or (not C), X) 4560 if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { 4561 SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); 4562 AddToWorkList(NOTNode.getNode()); 4563 return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); 4564 } 4565 // fold (select C, X, 0) -> (and C, X) 4566 if (VT == MVT::i1 && N2C && N2C->isNullValue()) 4567 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4568 // fold (select X, X, Y) -> (or X, Y) 4569 // fold (select X, 1, Y) -> (or X, Y) 4570 if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) 4571 return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); 4572 // fold (select X, Y, X) -> (and X, Y) 4573 // fold (select X, Y, 0) -> (and X, Y) 4574 if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) 4575 return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); 4576 4577 // If we can fold this based on the true/false value, do so. 4578 if (SimplifySelectOps(N, N1, N2)) 4579 return SDValue(N, 0); // Don't revisit N. 4580 4581 // fold selects based on a setcc into other things, such as min/max/abs 4582 if (N0.getOpcode() == ISD::SETCC) { 4583 if ((!LegalOperations && 4584 TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) || 4585 TLI.isOperationLegal(ISD::SELECT_CC, VT)) 4586 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, 4587 N0.getOperand(0), N0.getOperand(1), 4588 N1, N2, N0.getOperand(2)); 4589 return SimplifySelect(SDLoc(N), N0, N1, N2); 4590 } 4591 4592 return SDValue(); 4593 } 4594 4595 static 4596 std::pair<SDValue, SDValue> SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { 4597 SDLoc DL(N); 4598 EVT LoVT, HiVT; 4599 std::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); 4600 4601 // Split the inputs. 4602 SDValue Lo, Hi, LL, LH, RL, RH; 4603 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); 4604 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); 4605 4606 Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); 4607 Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); 4608 4609 return std::make_pair(Lo, Hi); 4610 } 4611 4612 // This function assumes all the vselect's arguments are CONCAT_VECTOR 4613 // nodes and that the condition is a BV of ConstantSDNodes (or undefs). 4614 static SDValue ConvertSelectToConcatVector(SDNode *N, SelectionDAG &DAG) { 4615 SDLoc dl(N); 4616 SDValue Cond = N->getOperand(0); 4617 SDValue LHS = N->getOperand(1); 4618 SDValue RHS = N->getOperand(2); 4619 MVT VT = N->getSimpleValueType(0); 4620 int NumElems = VT.getVectorNumElements(); 4621 assert(LHS.getOpcode() == ISD::CONCAT_VECTORS && 4622 RHS.getOpcode() == ISD::CONCAT_VECTORS && 4623 Cond.getOpcode() == ISD::BUILD_VECTOR); 4624 4625 // We're sure we have an even number of elements due to the 4626 // concat_vectors we have as arguments to vselect. 4627 // Skip BV elements until we find one that's not an UNDEF 4628 // After we find an UNDEF element, keep looping until we get to half the 4629 // length of the BV and see if all the non-undef nodes are the same. 4630 ConstantSDNode *BottomHalf = nullptr; 4631 for (int i = 0; i < NumElems / 2; ++i) { 4632 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4633 continue; 4634 4635 if (BottomHalf == nullptr) 4636 BottomHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4637 else if (Cond->getOperand(i).getNode() != BottomHalf) 4638 return SDValue(); 4639 } 4640 4641 // Do the same for the second half of the BuildVector 4642 ConstantSDNode *TopHalf = nullptr; 4643 for (int i = NumElems / 2; i < NumElems; ++i) { 4644 if (Cond->getOperand(i)->getOpcode() == ISD::UNDEF) 4645 continue; 4646 4647 if (TopHalf == nullptr) 4648 TopHalf = cast<ConstantSDNode>(Cond.getOperand(i)); 4649 else if (Cond->getOperand(i).getNode() != TopHalf) 4650 return SDValue(); 4651 } 4652 4653 assert(TopHalf && BottomHalf && 4654 "One half of the selector was all UNDEFs and the other was all the " 4655 "same value. This should have been addressed before this function."); 4656 return DAG.getNode( 4657 ISD::CONCAT_VECTORS, dl, VT, 4658 BottomHalf->isNullValue() ? RHS->getOperand(0) : LHS->getOperand(0), 4659 TopHalf->isNullValue() ? RHS->getOperand(1) : LHS->getOperand(1)); 4660 } 4661 4662 SDValue DAGCombiner::visitVSELECT(SDNode *N) { 4663 SDValue N0 = N->getOperand(0); 4664 SDValue N1 = N->getOperand(1); 4665 SDValue N2 = N->getOperand(2); 4666 SDLoc DL(N); 4667 4668 // Canonicalize integer abs. 4669 // vselect (setg[te] X, 0), X, -X -> 4670 // vselect (setgt X, -1), X, -X -> 4671 // vselect (setl[te] X, 0), -X, X -> 4672 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 4673 if (N0.getOpcode() == ISD::SETCC) { 4674 SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); 4675 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 4676 bool isAbs = false; 4677 bool RHSIsAllZeros = ISD::isBuildVectorAllZeros(RHS.getNode()); 4678 4679 if (((RHSIsAllZeros && (CC == ISD::SETGT || CC == ISD::SETGE)) || 4680 (ISD::isBuildVectorAllOnes(RHS.getNode()) && CC == ISD::SETGT)) && 4681 N1 == LHS && N2.getOpcode() == ISD::SUB && N1 == N2.getOperand(1)) 4682 isAbs = ISD::isBuildVectorAllZeros(N2.getOperand(0).getNode()); 4683 else if ((RHSIsAllZeros && (CC == ISD::SETLT || CC == ISD::SETLE)) && 4684 N2 == LHS && N1.getOpcode() == ISD::SUB && N2 == N1.getOperand(1)) 4685 isAbs = ISD::isBuildVectorAllZeros(N1.getOperand(0).getNode()); 4686 4687 if (isAbs) { 4688 EVT VT = LHS.getValueType(); 4689 SDValue Shift = DAG.getNode( 4690 ISD::SRA, DL, VT, LHS, 4691 DAG.getConstant(VT.getScalarType().getSizeInBits() - 1, VT)); 4692 SDValue Add = DAG.getNode(ISD::ADD, DL, VT, LHS, Shift); 4693 AddToWorkList(Shift.getNode()); 4694 AddToWorkList(Add.getNode()); 4695 return DAG.getNode(ISD::XOR, DL, VT, Add, Shift); 4696 } 4697 } 4698 4699 // If the VSELECT result requires splitting and the mask is provided by a 4700 // SETCC, then split both nodes and its operands before legalization. This 4701 // prevents the type legalizer from unrolling SETCC into scalar comparisons 4702 // and enables future optimizations (e.g. min/max pattern matching on X86). 4703 if (N0.getOpcode() == ISD::SETCC) { 4704 EVT VT = N->getValueType(0); 4705 4706 // Check if any splitting is required. 4707 if (TLI.getTypeAction(*DAG.getContext(), VT) != 4708 TargetLowering::TypeSplitVector) 4709 return SDValue(); 4710 4711 SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; 4712 std::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); 4713 std::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); 4714 std::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); 4715 4716 Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); 4717 Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); 4718 4719 // Add the new VSELECT nodes to the work list in case they need to be split 4720 // again. 4721 AddToWorkList(Lo.getNode()); 4722 AddToWorkList(Hi.getNode()); 4723 4724 return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); 4725 } 4726 4727 // Fold (vselect (build_vector all_ones), N1, N2) -> N1 4728 if (ISD::isBuildVectorAllOnes(N0.getNode())) 4729 return N1; 4730 // Fold (vselect (build_vector all_zeros), N1, N2) -> N2 4731 if (ISD::isBuildVectorAllZeros(N0.getNode())) 4732 return N2; 4733 4734 // The ConvertSelectToConcatVector function is assuming both the above 4735 // checks for (vselect (build_vector all{ones,zeros) ...) have been made 4736 // and addressed. 4737 if (N1.getOpcode() == ISD::CONCAT_VECTORS && 4738 N2.getOpcode() == ISD::CONCAT_VECTORS && 4739 ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 4740 SDValue CV = ConvertSelectToConcatVector(N, DAG); 4741 if (CV.getNode()) 4742 return CV; 4743 } 4744 4745 return SDValue(); 4746 } 4747 4748 SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { 4749 SDValue N0 = N->getOperand(0); 4750 SDValue N1 = N->getOperand(1); 4751 SDValue N2 = N->getOperand(2); 4752 SDValue N3 = N->getOperand(3); 4753 SDValue N4 = N->getOperand(4); 4754 ISD::CondCode CC = cast<CondCodeSDNode>(N4)->get(); 4755 4756 // fold select_cc lhs, rhs, x, x, cc -> x 4757 if (N2 == N3) 4758 return N2; 4759 4760 // Determine if the condition we're dealing with is constant 4761 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 4762 N0, N1, CC, SDLoc(N), false); 4763 if (SCC.getNode()) { 4764 AddToWorkList(SCC.getNode()); 4765 4766 if (ConstantSDNode *SCCC = dyn_cast<ConstantSDNode>(SCC.getNode())) { 4767 if (!SCCC->isNullValue()) 4768 return N2; // cond always true -> true val 4769 else 4770 return N3; // cond always false -> false val 4771 } 4772 4773 // Fold to a simpler select_cc 4774 if (SCC.getOpcode() == ISD::SETCC) 4775 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), 4776 SCC.getOperand(0), SCC.getOperand(1), N2, N3, 4777 SCC.getOperand(2)); 4778 } 4779 4780 // If we can fold this based on the true/false value, do so. 4781 if (SimplifySelectOps(N, N2, N3)) 4782 return SDValue(N, 0); // Don't revisit N. 4783 4784 // fold select_cc into other things, such as min/max/abs 4785 return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); 4786 } 4787 4788 SDValue DAGCombiner::visitSETCC(SDNode *N) { 4789 return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), 4790 cast<CondCodeSDNode>(N->getOperand(2))->get(), 4791 SDLoc(N)); 4792 } 4793 4794 // tryToFoldExtendOfConstant - Try to fold a sext/zext/aext 4795 // dag node into a ConstantSDNode or a build_vector of constants. 4796 // This function is called by the DAGCombiner when visiting sext/zext/aext 4797 // dag nodes (see for example method DAGCombiner::visitSIGN_EXTEND). 4798 // Vector extends are not folded if operations are legal; this is to 4799 // avoid introducing illegal build_vector dag nodes. 4800 static SDNode *tryToFoldExtendOfConstant(SDNode *N, const TargetLowering &TLI, 4801 SelectionDAG &DAG, bool LegalTypes, 4802 bool LegalOperations) { 4803 unsigned Opcode = N->getOpcode(); 4804 SDValue N0 = N->getOperand(0); 4805 EVT VT = N->getValueType(0); 4806 4807 assert((Opcode == ISD::SIGN_EXTEND || Opcode == ISD::ZERO_EXTEND || 4808 Opcode == ISD::ANY_EXTEND) && "Expected EXTEND dag node in input!"); 4809 4810 // fold (sext c1) -> c1 4811 // fold (zext c1) -> c1 4812 // fold (aext c1) -> c1 4813 if (isa<ConstantSDNode>(N0)) 4814 return DAG.getNode(Opcode, SDLoc(N), VT, N0).getNode(); 4815 4816 // fold (sext (build_vector AllConstants) -> (build_vector AllConstants) 4817 // fold (zext (build_vector AllConstants) -> (build_vector AllConstants) 4818 // fold (aext (build_vector AllConstants) -> (build_vector AllConstants) 4819 EVT SVT = VT.getScalarType(); 4820 if (!(VT.isVector() && 4821 (!LegalTypes || (!LegalOperations && TLI.isTypeLegal(SVT))) && 4822 ISD::isBuildVectorOfConstantSDNodes(N0.getNode()))) 4823 return nullptr; 4824 4825 // We can fold this node into a build_vector. 4826 unsigned VTBits = SVT.getSizeInBits(); 4827 unsigned EVTBits = N0->getValueType(0).getScalarType().getSizeInBits(); 4828 unsigned ShAmt = VTBits - EVTBits; 4829 SmallVector<SDValue, 8> Elts; 4830 unsigned NumElts = N0->getNumOperands(); 4831 SDLoc DL(N); 4832 4833 for (unsigned i=0; i != NumElts; ++i) { 4834 SDValue Op = N0->getOperand(i); 4835 if (Op->getOpcode() == ISD::UNDEF) { 4836 Elts.push_back(DAG.getUNDEF(SVT)); 4837 continue; 4838 } 4839 4840 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 4841 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 4842 if (Opcode == ISD::SIGN_EXTEND) 4843 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 4844 SVT)); 4845 else 4846 Elts.push_back(DAG.getConstant(C.shl(ShAmt).lshr(ShAmt).getZExtValue(), 4847 SVT)); 4848 } 4849 4850 return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, Elts).getNode(); 4851 } 4852 4853 // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: 4854 // "fold ({s|z|a}ext (load x)) -> ({s|z|a}ext (truncate ({s|z|a}extload x)))" 4855 // transformation. Returns true if extension are possible and the above 4856 // mentioned transformation is profitable. 4857 static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, 4858 unsigned ExtOpc, 4859 SmallVectorImpl<SDNode *> &ExtendNodes, 4860 const TargetLowering &TLI) { 4861 bool HasCopyToRegUses = false; 4862 bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); 4863 for (SDNode::use_iterator UI = N0.getNode()->use_begin(), 4864 UE = N0.getNode()->use_end(); 4865 UI != UE; ++UI) { 4866 SDNode *User = *UI; 4867 if (User == N) 4868 continue; 4869 if (UI.getUse().getResNo() != N0.getResNo()) 4870 continue; 4871 // FIXME: Only extend SETCC N, N and SETCC N, c for now. 4872 if (ExtOpc != ISD::ANY_EXTEND && User->getOpcode() == ISD::SETCC) { 4873 ISD::CondCode CC = cast<CondCodeSDNode>(User->getOperand(2))->get(); 4874 if (ExtOpc == ISD::ZERO_EXTEND && ISD::isSignedIntSetCC(CC)) 4875 // Sign bits will be lost after a zext. 4876 return false; 4877 bool Add = false; 4878 for (unsigned i = 0; i != 2; ++i) { 4879 SDValue UseOp = User->getOperand(i); 4880 if (UseOp == N0) 4881 continue; 4882 if (!isa<ConstantSDNode>(UseOp)) 4883 return false; 4884 Add = true; 4885 } 4886 if (Add) 4887 ExtendNodes.push_back(User); 4888 continue; 4889 } 4890 // If truncates aren't free and there are users we can't 4891 // extend, it isn't worthwhile. 4892 if (!isTruncFree) 4893 return false; 4894 // Remember if this value is live-out. 4895 if (User->getOpcode() == ISD::CopyToReg) 4896 HasCopyToRegUses = true; 4897 } 4898 4899 if (HasCopyToRegUses) { 4900 bool BothLiveOut = false; 4901 for (SDNode::use_iterator UI = N->use_begin(), UE = N->use_end(); 4902 UI != UE; ++UI) { 4903 SDUse &Use = UI.getUse(); 4904 if (Use.getResNo() == 0 && Use.getUser()->getOpcode() == ISD::CopyToReg) { 4905 BothLiveOut = true; 4906 break; 4907 } 4908 } 4909 if (BothLiveOut) 4910 // Both unextended and extended values are live out. There had better be 4911 // a good reason for the transformation. 4912 return ExtendNodes.size(); 4913 } 4914 return true; 4915 } 4916 4917 void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl<SDNode *> &SetCCs, 4918 SDValue Trunc, SDValue ExtLoad, SDLoc DL, 4919 ISD::NodeType ExtType) { 4920 // Extend SetCC uses if necessary. 4921 for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { 4922 SDNode *SetCC = SetCCs[i]; 4923 SmallVector<SDValue, 4> Ops; 4924 4925 for (unsigned j = 0; j != 2; ++j) { 4926 SDValue SOp = SetCC->getOperand(j); 4927 if (SOp == Trunc) 4928 Ops.push_back(ExtLoad); 4929 else 4930 Ops.push_back(DAG.getNode(ExtType, DL, ExtLoad->getValueType(0), SOp)); 4931 } 4932 4933 Ops.push_back(SetCC->getOperand(2)); 4934 CombineTo(SetCC, DAG.getNode(ISD::SETCC, DL, SetCC->getValueType(0), Ops)); 4935 } 4936 } 4937 4938 SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { 4939 SDValue N0 = N->getOperand(0); 4940 EVT VT = N->getValueType(0); 4941 4942 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 4943 LegalOperations)) 4944 return SDValue(Res, 0); 4945 4946 // fold (sext (sext x)) -> (sext x) 4947 // fold (sext (aext x)) -> (sext x) 4948 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 4949 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, 4950 N0.getOperand(0)); 4951 4952 if (N0.getOpcode() == ISD::TRUNCATE) { 4953 // fold (sext (truncate (load x))) -> (sext (smaller load x)) 4954 // fold (sext (truncate (srl (load x), c))) -> (sext (smaller load (x+c/n))) 4955 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 4956 if (NarrowLoad.getNode()) { 4957 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 4958 if (NarrowLoad.getNode() != N0.getNode()) { 4959 CombineTo(N0.getNode(), NarrowLoad); 4960 // CombineTo deleted the truncate, if needed, but not what's under it. 4961 AddToWorkList(oye); 4962 } 4963 return SDValue(N, 0); // Return N so it doesn't get rechecked! 4964 } 4965 4966 // See if the value being truncated is already sign extended. If so, just 4967 // eliminate the trunc/sext pair. 4968 SDValue Op = N0.getOperand(0); 4969 unsigned OpBits = Op.getValueType().getScalarType().getSizeInBits(); 4970 unsigned MidBits = N0.getValueType().getScalarType().getSizeInBits(); 4971 unsigned DestBits = VT.getScalarType().getSizeInBits(); 4972 unsigned NumSignBits = DAG.ComputeNumSignBits(Op); 4973 4974 if (OpBits == DestBits) { 4975 // Op is i32, Mid is i8, and Dest is i32. If Op has more than 24 sign 4976 // bits, it is already ready. 4977 if (NumSignBits > DestBits-MidBits) 4978 return Op; 4979 } else if (OpBits < DestBits) { 4980 // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign 4981 // bits, just sext from i32. 4982 if (NumSignBits > OpBits-MidBits) 4983 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); 4984 } else { 4985 // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign 4986 // bits, just truncate to i32. 4987 if (NumSignBits > OpBits-MidBits) 4988 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 4989 } 4990 4991 // fold (sext (truncate x)) -> (sextinreg x). 4992 if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, 4993 N0.getValueType())) { 4994 if (OpBits < DestBits) 4995 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); 4996 else if (OpBits > DestBits) 4997 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); 4998 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, 4999 DAG.getValueType(N0.getValueType())); 5000 } 5001 } 5002 5003 // fold (sext (load x)) -> (sext (truncate (sextload x))) 5004 // None of the supported targets knows how to perform load and sign extend 5005 // on vectors in one instruction. We only perform this transformation on 5006 // scalars. 5007 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5008 ISD::isUNINDEXEDLoad(N0.getNode()) && 5009 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5010 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()))) { 5011 bool DoXform = true; 5012 SmallVector<SDNode*, 4> SetCCs; 5013 if (!N0.hasOneUse()) 5014 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); 5015 if (DoXform) { 5016 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5017 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5018 LN0->getChain(), 5019 LN0->getBasePtr(), N0.getValueType(), 5020 LN0->getMemOperand()); 5021 CombineTo(N, ExtLoad); 5022 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5023 N0.getValueType(), ExtLoad); 5024 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5025 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5026 ISD::SIGN_EXTEND); 5027 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5028 } 5029 } 5030 5031 // fold (sext (sextload x)) -> (sext (truncate (sextload x))) 5032 // fold (sext ( extload x)) -> (sext (truncate (sextload x))) 5033 if ((ISD::isSEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5034 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5035 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5036 EVT MemVT = LN0->getMemoryVT(); 5037 if ((!LegalOperations && !LN0->isVolatile()) || 5038 TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { 5039 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5040 LN0->getChain(), 5041 LN0->getBasePtr(), MemVT, 5042 LN0->getMemOperand()); 5043 CombineTo(N, ExtLoad); 5044 CombineTo(N0.getNode(), 5045 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5046 N0.getValueType(), ExtLoad), 5047 ExtLoad.getValue(1)); 5048 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5049 } 5050 } 5051 5052 // fold (sext (and/or/xor (load x), cst)) -> 5053 // (and/or/xor (sextload x), (sext cst)) 5054 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5055 N0.getOpcode() == ISD::XOR) && 5056 isa<LoadSDNode>(N0.getOperand(0)) && 5057 N0.getOperand(1).getOpcode() == ISD::Constant && 5058 TLI.isLoadExtLegal(ISD::SEXTLOAD, N0.getValueType()) && 5059 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5060 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5061 if (LN0->getExtensionType() != ISD::ZEXTLOAD && LN0->isUnindexed()) { 5062 bool DoXform = true; 5063 SmallVector<SDNode*, 4> SetCCs; 5064 if (!N0.hasOneUse()) 5065 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, 5066 SetCCs, TLI); 5067 if (DoXform) { 5068 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, 5069 LN0->getChain(), LN0->getBasePtr(), 5070 LN0->getMemoryVT(), 5071 LN0->getMemOperand()); 5072 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5073 Mask = Mask.sext(VT.getSizeInBits()); 5074 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5075 ExtLoad, DAG.getConstant(Mask, VT)); 5076 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5077 SDLoc(N0.getOperand(0)), 5078 N0.getOperand(0).getValueType(), ExtLoad); 5079 CombineTo(N, And); 5080 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5081 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5082 ISD::SIGN_EXTEND); 5083 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5084 } 5085 } 5086 } 5087 5088 if (N0.getOpcode() == ISD::SETCC) { 5089 EVT N0VT = N0.getOperand(0).getValueType(); 5090 // sext(setcc) -> sext_in_reg(vsetcc) for vectors. 5091 // Only do this before legalize for now. 5092 if (VT.isVector() && !LegalOperations && 5093 TLI.getBooleanContents(N0VT) == 5094 TargetLowering::ZeroOrNegativeOneBooleanContent) { 5095 // On some architectures (such as SSE/NEON/etc) the SETCC result type is 5096 // of the same size as the compared operands. Only optimize sext(setcc()) 5097 // if this is the case. 5098 EVT SVT = getSetCCResultType(N0VT); 5099 5100 // We know that the # elements of the results is the same as the 5101 // # elements of the compare (and the # elements of the compare result 5102 // for that matter). Check to see that they are the same size. If so, 5103 // we know that the element size of the sext'd result matches the 5104 // element size of the compare operands. 5105 if (VT.getSizeInBits() == SVT.getSizeInBits()) 5106 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5107 N0.getOperand(1), 5108 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5109 5110 // If the desired elements are smaller or larger than the source 5111 // elements we can use a matching integer vector type and then 5112 // truncate/sign extend 5113 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5114 if (SVT == MatchingVectorType) { 5115 SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, 5116 N0.getOperand(0), N0.getOperand(1), 5117 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5118 return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); 5119 } 5120 } 5121 5122 // sext(setcc x, y, cc) -> (select (setcc x, y, cc), -1, 0) 5123 unsigned ElementWidth = VT.getScalarType().getSizeInBits(); 5124 SDValue NegOne = 5125 DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); 5126 SDValue SCC = 5127 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5128 NegOne, DAG.getConstant(0, VT), 5129 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5130 if (SCC.getNode()) return SCC; 5131 5132 if (!VT.isVector()) { 5133 EVT SetCCVT = getSetCCResultType(N0.getOperand(0).getValueType()); 5134 if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, SetCCVT)) { 5135 SDLoc DL(N); 5136 ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get(); 5137 SDValue SetCC = DAG.getSetCC(DL, 5138 SetCCVT, 5139 N0.getOperand(0), N0.getOperand(1), CC); 5140 EVT SelectVT = getSetCCResultType(VT); 5141 return DAG.getSelect(DL, VT, 5142 DAG.getSExtOrTrunc(SetCC, DL, SelectVT), 5143 NegOne, DAG.getConstant(0, VT)); 5144 5145 } 5146 } 5147 } 5148 5149 // fold (sext x) -> (zext x) if the sign bit is known zero. 5150 if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && 5151 DAG.SignBitIsZero(N0)) 5152 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); 5153 5154 return SDValue(); 5155 } 5156 5157 // isTruncateOf - If N is a truncate of some other value, return true, record 5158 // the value being truncated in Op and which of Op's bits are zero in KnownZero. 5159 // This function computes KnownZero to avoid a duplicated call to 5160 // computeKnownBits in the caller. 5161 static bool isTruncateOf(SelectionDAG &DAG, SDValue N, SDValue &Op, 5162 APInt &KnownZero) { 5163 APInt KnownOne; 5164 if (N->getOpcode() == ISD::TRUNCATE) { 5165 Op = N->getOperand(0); 5166 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5167 return true; 5168 } 5169 5170 if (N->getOpcode() != ISD::SETCC || N->getValueType(0) != MVT::i1 || 5171 cast<CondCodeSDNode>(N->getOperand(2))->get() != ISD::SETNE) 5172 return false; 5173 5174 SDValue Op0 = N->getOperand(0); 5175 SDValue Op1 = N->getOperand(1); 5176 assert(Op0.getValueType() == Op1.getValueType()); 5177 5178 ConstantSDNode *COp0 = dyn_cast<ConstantSDNode>(Op0); 5179 ConstantSDNode *COp1 = dyn_cast<ConstantSDNode>(Op1); 5180 if (COp0 && COp0->isNullValue()) 5181 Op = Op1; 5182 else if (COp1 && COp1->isNullValue()) 5183 Op = Op0; 5184 else 5185 return false; 5186 5187 DAG.computeKnownBits(Op, KnownZero, KnownOne); 5188 5189 if (!(KnownZero | APInt(Op.getValueSizeInBits(), 1)).isAllOnesValue()) 5190 return false; 5191 5192 return true; 5193 } 5194 5195 SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { 5196 SDValue N0 = N->getOperand(0); 5197 EVT VT = N->getValueType(0); 5198 5199 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5200 LegalOperations)) 5201 return SDValue(Res, 0); 5202 5203 // fold (zext (zext x)) -> (zext x) 5204 // fold (zext (aext x)) -> (zext x) 5205 if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) 5206 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, 5207 N0.getOperand(0)); 5208 5209 // fold (zext (truncate x)) -> (zext x) or 5210 // (zext (truncate x)) -> (truncate x) 5211 // This is valid when the truncated bits of x are already zero. 5212 // FIXME: We should extend this to work for vectors too. 5213 SDValue Op; 5214 APInt KnownZero; 5215 if (!VT.isVector() && isTruncateOf(DAG, N0, Op, KnownZero)) { 5216 APInt TruncatedBits = 5217 (Op.getValueSizeInBits() == N0.getValueSizeInBits()) ? 5218 APInt(Op.getValueSizeInBits(), 0) : 5219 APInt::getBitsSet(Op.getValueSizeInBits(), 5220 N0.getValueSizeInBits(), 5221 std::min(Op.getValueSizeInBits(), 5222 VT.getSizeInBits())); 5223 if (TruncatedBits == (KnownZero & TruncatedBits)) { 5224 if (VT.bitsGT(Op.getValueType())) 5225 return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); 5226 if (VT.bitsLT(Op.getValueType())) 5227 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5228 5229 return Op; 5230 } 5231 } 5232 5233 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5234 // fold (zext (truncate (srl (load x), c))) -> (zext (small load (x+c/n))) 5235 if (N0.getOpcode() == ISD::TRUNCATE) { 5236 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5237 if (NarrowLoad.getNode()) { 5238 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5239 if (NarrowLoad.getNode() != N0.getNode()) { 5240 CombineTo(N0.getNode(), NarrowLoad); 5241 // CombineTo deleted the truncate, if needed, but not what's under it. 5242 AddToWorkList(oye); 5243 } 5244 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5245 } 5246 } 5247 5248 // fold (zext (truncate x)) -> (and x, mask) 5249 if (N0.getOpcode() == ISD::TRUNCATE && 5250 (!LegalOperations || TLI.isOperationLegal(ISD::AND, VT))) { 5251 5252 // fold (zext (truncate (load x))) -> (zext (smaller load x)) 5253 // fold (zext (truncate (srl (load x), c))) -> (zext (smaller load (x+c/n))) 5254 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5255 if (NarrowLoad.getNode()) { 5256 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5257 if (NarrowLoad.getNode() != N0.getNode()) { 5258 CombineTo(N0.getNode(), NarrowLoad); 5259 // CombineTo deleted the truncate, if needed, but not what's under it. 5260 AddToWorkList(oye); 5261 } 5262 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5263 } 5264 5265 SDValue Op = N0.getOperand(0); 5266 if (Op.getValueType().bitsLT(VT)) { 5267 Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); 5268 AddToWorkList(Op.getNode()); 5269 } else if (Op.getValueType().bitsGT(VT)) { 5270 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); 5271 AddToWorkList(Op.getNode()); 5272 } 5273 return DAG.getZeroExtendInReg(Op, SDLoc(N), 5274 N0.getValueType().getScalarType()); 5275 } 5276 5277 // Fold (zext (and (trunc x), cst)) -> (and x, cst), 5278 // if either of the casts is not free. 5279 if (N0.getOpcode() == ISD::AND && 5280 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5281 N0.getOperand(1).getOpcode() == ISD::Constant && 5282 (!TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5283 N0.getValueType()) || 5284 !TLI.isZExtFree(N0.getValueType(), VT))) { 5285 SDValue X = N0.getOperand(0).getOperand(0); 5286 if (X.getValueType().bitsLT(VT)) { 5287 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); 5288 } else if (X.getValueType().bitsGT(VT)) { 5289 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 5290 } 5291 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5292 Mask = Mask.zext(VT.getSizeInBits()); 5293 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5294 X, DAG.getConstant(Mask, VT)); 5295 } 5296 5297 // fold (zext (load x)) -> (zext (truncate (zextload x))) 5298 // None of the supported targets knows how to perform load and vector_zext 5299 // on vectors in one instruction. We only perform this transformation on 5300 // scalars. 5301 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5302 ISD::isUNINDEXEDLoad(N0.getNode()) && 5303 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5304 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()))) { 5305 bool DoXform = true; 5306 SmallVector<SDNode*, 4> SetCCs; 5307 if (!N0.hasOneUse()) 5308 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); 5309 if (DoXform) { 5310 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5311 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5312 LN0->getChain(), 5313 LN0->getBasePtr(), N0.getValueType(), 5314 LN0->getMemOperand()); 5315 CombineTo(N, ExtLoad); 5316 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5317 N0.getValueType(), ExtLoad); 5318 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5319 5320 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5321 ISD::ZERO_EXTEND); 5322 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5323 } 5324 } 5325 5326 // fold (zext (and/or/xor (load x), cst)) -> 5327 // (and/or/xor (zextload x), (zext cst)) 5328 if ((N0.getOpcode() == ISD::AND || N0.getOpcode() == ISD::OR || 5329 N0.getOpcode() == ISD::XOR) && 5330 isa<LoadSDNode>(N0.getOperand(0)) && 5331 N0.getOperand(1).getOpcode() == ISD::Constant && 5332 TLI.isLoadExtLegal(ISD::ZEXTLOAD, N0.getValueType()) && 5333 (!LegalOperations && TLI.isOperationLegal(N0.getOpcode(), VT))) { 5334 LoadSDNode *LN0 = cast<LoadSDNode>(N0.getOperand(0)); 5335 if (LN0->getExtensionType() != ISD::SEXTLOAD && LN0->isUnindexed()) { 5336 bool DoXform = true; 5337 SmallVector<SDNode*, 4> SetCCs; 5338 if (!N0.hasOneUse()) 5339 DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, 5340 SetCCs, TLI); 5341 if (DoXform) { 5342 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, 5343 LN0->getChain(), LN0->getBasePtr(), 5344 LN0->getMemoryVT(), 5345 LN0->getMemOperand()); 5346 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5347 Mask = Mask.zext(VT.getSizeInBits()); 5348 SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5349 ExtLoad, DAG.getConstant(Mask, VT)); 5350 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, 5351 SDLoc(N0.getOperand(0)), 5352 N0.getOperand(0).getValueType(), ExtLoad); 5353 CombineTo(N, And); 5354 CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); 5355 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5356 ISD::ZERO_EXTEND); 5357 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5358 } 5359 } 5360 } 5361 5362 // fold (zext (zextload x)) -> (zext (truncate (zextload x))) 5363 // fold (zext ( extload x)) -> (zext (truncate (zextload x))) 5364 if ((ISD::isZEXTLoad(N0.getNode()) || ISD::isEXTLoad(N0.getNode())) && 5365 ISD::isUNINDEXEDLoad(N0.getNode()) && N0.hasOneUse()) { 5366 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5367 EVT MemVT = LN0->getMemoryVT(); 5368 if ((!LegalOperations && !LN0->isVolatile()) || 5369 TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { 5370 SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, 5371 LN0->getChain(), 5372 LN0->getBasePtr(), MemVT, 5373 LN0->getMemOperand()); 5374 CombineTo(N, ExtLoad); 5375 CombineTo(N0.getNode(), 5376 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), 5377 ExtLoad), 5378 ExtLoad.getValue(1)); 5379 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5380 } 5381 } 5382 5383 if (N0.getOpcode() == ISD::SETCC) { 5384 if (!LegalOperations && VT.isVector() && 5385 N0.getValueType().getVectorElementType() == MVT::i1) { 5386 EVT N0VT = N0.getOperand(0).getValueType(); 5387 if (getSetCCResultType(N0VT) == N0.getValueType()) 5388 return SDValue(); 5389 5390 // zext(setcc) -> (and (vsetcc), (1, 1, ...) for vectors. 5391 // Only do this before legalize for now. 5392 EVT EltVT = VT.getVectorElementType(); 5393 SmallVector<SDValue,8> OneOps(VT.getVectorNumElements(), 5394 DAG.getConstant(1, EltVT)); 5395 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5396 // We know that the # elements of the results is the same as the 5397 // # elements of the compare (and the # elements of the compare result 5398 // for that matter). Check to see that they are the same size. If so, 5399 // we know that the element size of the sext'd result matches the 5400 // element size of the compare operands. 5401 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5402 DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5403 N0.getOperand(1), 5404 cast<CondCodeSDNode>(N0.getOperand(2))->get()), 5405 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, 5406 OneOps)); 5407 5408 // If the desired elements are smaller or larger than the source 5409 // elements we can use a matching integer vector type and then 5410 // truncate/sign extend 5411 EVT MatchingElementType = 5412 EVT::getIntegerVT(*DAG.getContext(), 5413 N0VT.getScalarType().getSizeInBits()); 5414 EVT MatchingVectorType = 5415 EVT::getVectorVT(*DAG.getContext(), MatchingElementType, 5416 N0VT.getVectorNumElements()); 5417 SDValue VsetCC = 5418 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5419 N0.getOperand(1), 5420 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5421 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5422 DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), 5423 DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, OneOps)); 5424 } 5425 5426 // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5427 SDValue SCC = 5428 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5429 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5430 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5431 if (SCC.getNode()) return SCC; 5432 } 5433 5434 // (zext (shl (zext x), cst)) -> (shl (zext x), cst) 5435 if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) && 5436 isa<ConstantSDNode>(N0.getOperand(1)) && 5437 N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND && 5438 N0.hasOneUse()) { 5439 SDValue ShAmt = N0.getOperand(1); 5440 unsigned ShAmtVal = cast<ConstantSDNode>(ShAmt)->getZExtValue(); 5441 if (N0.getOpcode() == ISD::SHL) { 5442 SDValue InnerZExt = N0.getOperand(0); 5443 // If the original shl may be shifting out bits, do not perform this 5444 // transformation. 5445 unsigned KnownZeroBits = InnerZExt.getValueType().getSizeInBits() - 5446 InnerZExt.getOperand(0).getValueType().getSizeInBits(); 5447 if (ShAmtVal > KnownZeroBits) 5448 return SDValue(); 5449 } 5450 5451 SDLoc DL(N); 5452 5453 // Ensure that the shift amount is wide enough for the shifted value. 5454 if (VT.getSizeInBits() >= 256) 5455 ShAmt = DAG.getNode(ISD::ZERO_EXTEND, DL, MVT::i32, ShAmt); 5456 5457 return DAG.getNode(N0.getOpcode(), DL, VT, 5458 DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)), 5459 ShAmt); 5460 } 5461 5462 return SDValue(); 5463 } 5464 5465 SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { 5466 SDValue N0 = N->getOperand(0); 5467 EVT VT = N->getValueType(0); 5468 5469 if (SDNode *Res = tryToFoldExtendOfConstant(N, TLI, DAG, LegalTypes, 5470 LegalOperations)) 5471 return SDValue(Res, 0); 5472 5473 // fold (aext (aext x)) -> (aext x) 5474 // fold (aext (zext x)) -> (zext x) 5475 // fold (aext (sext x)) -> (sext x) 5476 if (N0.getOpcode() == ISD::ANY_EXTEND || 5477 N0.getOpcode() == ISD::ZERO_EXTEND || 5478 N0.getOpcode() == ISD::SIGN_EXTEND) 5479 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); 5480 5481 // fold (aext (truncate (load x))) -> (aext (smaller load x)) 5482 // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) 5483 if (N0.getOpcode() == ISD::TRUNCATE) { 5484 SDValue NarrowLoad = ReduceLoadWidth(N0.getNode()); 5485 if (NarrowLoad.getNode()) { 5486 SDNode* oye = N0.getNode()->getOperand(0).getNode(); 5487 if (NarrowLoad.getNode() != N0.getNode()) { 5488 CombineTo(N0.getNode(), NarrowLoad); 5489 // CombineTo deleted the truncate, if needed, but not what's under it. 5490 AddToWorkList(oye); 5491 } 5492 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5493 } 5494 } 5495 5496 // fold (aext (truncate x)) 5497 if (N0.getOpcode() == ISD::TRUNCATE) { 5498 SDValue TruncOp = N0.getOperand(0); 5499 if (TruncOp.getValueType() == VT) 5500 return TruncOp; // x iff x size == zext size. 5501 if (TruncOp.getValueType().bitsGT(VT)) 5502 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); 5503 return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); 5504 } 5505 5506 // Fold (aext (and (trunc x), cst)) -> (and x, cst) 5507 // if the trunc is not free. 5508 if (N0.getOpcode() == ISD::AND && 5509 N0.getOperand(0).getOpcode() == ISD::TRUNCATE && 5510 N0.getOperand(1).getOpcode() == ISD::Constant && 5511 !TLI.isTruncateFree(N0.getOperand(0).getOperand(0).getValueType(), 5512 N0.getValueType())) { 5513 SDValue X = N0.getOperand(0).getOperand(0); 5514 if (X.getValueType().bitsLT(VT)) { 5515 X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); 5516 } else if (X.getValueType().bitsGT(VT)) { 5517 X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); 5518 } 5519 APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue(); 5520 Mask = Mask.zext(VT.getSizeInBits()); 5521 return DAG.getNode(ISD::AND, SDLoc(N), VT, 5522 X, DAG.getConstant(Mask, VT)); 5523 } 5524 5525 // fold (aext (load x)) -> (aext (truncate (extload x))) 5526 // None of the supported targets knows how to perform load and any_ext 5527 // on vectors in one instruction. We only perform this transformation on 5528 // scalars. 5529 if (ISD::isNON_EXTLoad(N0.getNode()) && !VT.isVector() && 5530 ISD::isUNINDEXEDLoad(N0.getNode()) && 5531 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5532 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 5533 bool DoXform = true; 5534 SmallVector<SDNode*, 4> SetCCs; 5535 if (!N0.hasOneUse()) 5536 DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); 5537 if (DoXform) { 5538 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5539 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 5540 LN0->getChain(), 5541 LN0->getBasePtr(), N0.getValueType(), 5542 LN0->getMemOperand()); 5543 CombineTo(N, ExtLoad); 5544 SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5545 N0.getValueType(), ExtLoad); 5546 CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); 5547 ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), 5548 ISD::ANY_EXTEND); 5549 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5550 } 5551 } 5552 5553 // fold (aext (zextload x)) -> (aext (truncate (zextload x))) 5554 // fold (aext (sextload x)) -> (aext (truncate (sextload x))) 5555 // fold (aext ( extload x)) -> (aext (truncate (extload x))) 5556 if (N0.getOpcode() == ISD::LOAD && 5557 !ISD::isNON_EXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5558 N0.hasOneUse()) { 5559 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5560 ISD::LoadExtType ExtType = LN0->getExtensionType(); 5561 EVT MemVT = LN0->getMemoryVT(); 5562 if (!LegalOperations || TLI.isLoadExtLegal(ExtType, MemVT)) { 5563 SDValue ExtLoad = DAG.getExtLoad(ExtType, SDLoc(N), 5564 VT, LN0->getChain(), LN0->getBasePtr(), 5565 MemVT, LN0->getMemOperand()); 5566 CombineTo(N, ExtLoad); 5567 CombineTo(N0.getNode(), 5568 DAG.getNode(ISD::TRUNCATE, SDLoc(N0), 5569 N0.getValueType(), ExtLoad), 5570 ExtLoad.getValue(1)); 5571 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5572 } 5573 } 5574 5575 if (N0.getOpcode() == ISD::SETCC) { 5576 // For vectors: 5577 // aext(setcc) -> vsetcc 5578 // aext(setcc) -> truncate(vsetcc) 5579 // aext(setcc) -> aext(vsetcc) 5580 // Only do this before legalize for now. 5581 if (VT.isVector() && !LegalOperations) { 5582 EVT N0VT = N0.getOperand(0).getValueType(); 5583 // We know that the # elements of the results is the same as the 5584 // # elements of the compare (and the # elements of the compare result 5585 // for that matter). Check to see that they are the same size. If so, 5586 // we know that the element size of the sext'd result matches the 5587 // element size of the compare operands. 5588 if (VT.getSizeInBits() == N0VT.getSizeInBits()) 5589 return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), 5590 N0.getOperand(1), 5591 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5592 // If the desired elements are smaller or larger than the source 5593 // elements we can use a matching integer vector type and then 5594 // truncate/any extend 5595 else { 5596 EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); 5597 SDValue VsetCC = 5598 DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), 5599 N0.getOperand(1), 5600 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 5601 return DAG.getAnyExtOrTrunc(VsetCC, SDLoc(N), VT); 5602 } 5603 } 5604 5605 // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc 5606 SDValue SCC = 5607 SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), 5608 DAG.getConstant(1, VT), DAG.getConstant(0, VT), 5609 cast<CondCodeSDNode>(N0.getOperand(2))->get(), true); 5610 if (SCC.getNode()) 5611 return SCC; 5612 } 5613 5614 return SDValue(); 5615 } 5616 5617 /// GetDemandedBits - See if the specified operand can be simplified with the 5618 /// knowledge that only the bits specified by Mask are used. If so, return the 5619 /// simpler operand, otherwise return a null SDValue. 5620 SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { 5621 switch (V.getOpcode()) { 5622 default: break; 5623 case ISD::Constant: { 5624 const ConstantSDNode *CV = cast<ConstantSDNode>(V.getNode()); 5625 assert(CV && "Const value should be ConstSDNode."); 5626 const APInt &CVal = CV->getAPIntValue(); 5627 APInt NewVal = CVal & Mask; 5628 if (NewVal != CVal) 5629 return DAG.getConstant(NewVal, V.getValueType()); 5630 break; 5631 } 5632 case ISD::OR: 5633 case ISD::XOR: 5634 // If the LHS or RHS don't contribute bits to the or, drop them. 5635 if (DAG.MaskedValueIsZero(V.getOperand(0), Mask)) 5636 return V.getOperand(1); 5637 if (DAG.MaskedValueIsZero(V.getOperand(1), Mask)) 5638 return V.getOperand(0); 5639 break; 5640 case ISD::SRL: 5641 // Only look at single-use SRLs. 5642 if (!V.getNode()->hasOneUse()) 5643 break; 5644 if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(V.getOperand(1))) { 5645 // See if we can recursively simplify the LHS. 5646 unsigned Amt = RHSC->getZExtValue(); 5647 5648 // Watch out for shift count overflow though. 5649 if (Amt >= Mask.getBitWidth()) break; 5650 APInt NewMask = Mask << Amt; 5651 SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); 5652 if (SimplifyLHS.getNode()) 5653 return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), 5654 SimplifyLHS, V.getOperand(1)); 5655 } 5656 } 5657 return SDValue(); 5658 } 5659 5660 /// ReduceLoadWidth - If the result of a wider load is shifted to right of N 5661 /// bits and then truncated to a narrower type and where N is a multiple 5662 /// of number of bits of the narrower type, transform it to a narrower load 5663 /// from address + N / num of bits of new type. If the result is to be 5664 /// extended, also fold the extension to form a extending load. 5665 SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { 5666 unsigned Opc = N->getOpcode(); 5667 5668 ISD::LoadExtType ExtType = ISD::NON_EXTLOAD; 5669 SDValue N0 = N->getOperand(0); 5670 EVT VT = N->getValueType(0); 5671 EVT ExtVT = VT; 5672 5673 // This transformation isn't valid for vector loads. 5674 if (VT.isVector()) 5675 return SDValue(); 5676 5677 // Special case: SIGN_EXTEND_INREG is basically truncating to ExtVT then 5678 // extended to VT. 5679 if (Opc == ISD::SIGN_EXTEND_INREG) { 5680 ExtType = ISD::SEXTLOAD; 5681 ExtVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 5682 } else if (Opc == ISD::SRL) { 5683 // Another special-case: SRL is basically zero-extending a narrower value. 5684 ExtType = ISD::ZEXTLOAD; 5685 N0 = SDValue(N, 0); 5686 ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1)); 5687 if (!N01) return SDValue(); 5688 ExtVT = EVT::getIntegerVT(*DAG.getContext(), 5689 VT.getSizeInBits() - N01->getZExtValue()); 5690 } 5691 if (LegalOperations && !TLI.isLoadExtLegal(ExtType, ExtVT)) 5692 return SDValue(); 5693 5694 unsigned EVTBits = ExtVT.getSizeInBits(); 5695 5696 // Do not generate loads of non-round integer types since these can 5697 // be expensive (and would be wrong if the type is not byte sized). 5698 if (!ExtVT.isRound()) 5699 return SDValue(); 5700 5701 unsigned ShAmt = 0; 5702 if (N0.getOpcode() == ISD::SRL && N0.hasOneUse()) { 5703 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5704 ShAmt = N01->getZExtValue(); 5705 // Is the shift amount a multiple of size of VT? 5706 if ((ShAmt & (EVTBits-1)) == 0) { 5707 N0 = N0.getOperand(0); 5708 // Is the load width a multiple of size of VT? 5709 if ((N0.getValueType().getSizeInBits() & (EVTBits-1)) != 0) 5710 return SDValue(); 5711 } 5712 5713 // At this point, we must have a load or else we can't do the transform. 5714 if (!isa<LoadSDNode>(N0)) return SDValue(); 5715 5716 // Because a SRL must be assumed to *need* to zero-extend the high bits 5717 // (as opposed to anyext the high bits), we can't combine the zextload 5718 // lowering of SRL and an sextload. 5719 if (cast<LoadSDNode>(N0)->getExtensionType() == ISD::SEXTLOAD) 5720 return SDValue(); 5721 5722 // If the shift amount is larger than the input type then we're not 5723 // accessing any of the loaded bytes. If the load was a zextload/extload 5724 // then the result of the shift+trunc is zero/undef (handled elsewhere). 5725 if (ShAmt >= cast<LoadSDNode>(N0)->getMemoryVT().getSizeInBits()) 5726 return SDValue(); 5727 } 5728 } 5729 5730 // If the load is shifted left (and the result isn't shifted back right), 5731 // we can fold the truncate through the shift. 5732 unsigned ShLeftAmt = 0; 5733 if (ShAmt == 0 && N0.getOpcode() == ISD::SHL && N0.hasOneUse() && 5734 ExtVT == VT && TLI.isNarrowingProfitable(N0.getValueType(), VT)) { 5735 if (ConstantSDNode *N01 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) { 5736 ShLeftAmt = N01->getZExtValue(); 5737 N0 = N0.getOperand(0); 5738 } 5739 } 5740 5741 // If we haven't found a load, we can't narrow it. Don't transform one with 5742 // multiple uses, this would require adding a new load. 5743 if (!isa<LoadSDNode>(N0) || !N0.hasOneUse()) 5744 return SDValue(); 5745 5746 // Don't change the width of a volatile load. 5747 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5748 if (LN0->isVolatile()) 5749 return SDValue(); 5750 5751 // Verify that we are actually reducing a load width here. 5752 if (LN0->getMemoryVT().getSizeInBits() < EVTBits) 5753 return SDValue(); 5754 5755 // For the transform to be legal, the load must produce only two values 5756 // (the value loaded and the chain). Don't transform a pre-increment 5757 // load, for example, which produces an extra value. Otherwise the 5758 // transformation is not equivalent, and the downstream logic to replace 5759 // uses gets things wrong. 5760 if (LN0->getNumValues() > 2) 5761 return SDValue(); 5762 5763 // If the load that we're shrinking is an extload and we're not just 5764 // discarding the extension we can't simply shrink the load. Bail. 5765 // TODO: It would be possible to merge the extensions in some cases. 5766 if (LN0->getExtensionType() != ISD::NON_EXTLOAD && 5767 LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) 5768 return SDValue(); 5769 5770 EVT PtrType = N0.getOperand(1).getValueType(); 5771 5772 if (PtrType == MVT::Untyped || PtrType.isExtended()) 5773 // It's not possible to generate a constant of extended or untyped type. 5774 return SDValue(); 5775 5776 // For big endian targets, we need to adjust the offset to the pointer to 5777 // load the correct bytes. 5778 if (TLI.isBigEndian()) { 5779 unsigned LVTStoreBits = LN0->getMemoryVT().getStoreSizeInBits(); 5780 unsigned EVTStoreBits = ExtVT.getStoreSizeInBits(); 5781 ShAmt = LVTStoreBits - EVTStoreBits - ShAmt; 5782 } 5783 5784 uint64_t PtrOff = ShAmt / 8; 5785 unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); 5786 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), 5787 PtrType, LN0->getBasePtr(), 5788 DAG.getConstant(PtrOff, PtrType)); 5789 AddToWorkList(NewPtr.getNode()); 5790 5791 SDValue Load; 5792 if (ExtType == ISD::NON_EXTLOAD) 5793 Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, 5794 LN0->getPointerInfo().getWithOffset(PtrOff), 5795 LN0->isVolatile(), LN0->isNonTemporal(), 5796 LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); 5797 else 5798 Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, 5799 LN0->getPointerInfo().getWithOffset(PtrOff), 5800 ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), 5801 NewAlign, LN0->getTBAAInfo()); 5802 5803 // Replace the old load's chain with the new load's chain. 5804 WorkListRemover DeadNodes(*this); 5805 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), Load.getValue(1)); 5806 5807 // Shift the result left, if we've swallowed a left shift. 5808 SDValue Result = Load; 5809 if (ShLeftAmt != 0) { 5810 EVT ShImmTy = getShiftAmountTy(Result.getValueType()); 5811 if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) 5812 ShImmTy = VT; 5813 // If the shift amount is as large as the result size (but, presumably, 5814 // no larger than the source) then the useful bits of the result are 5815 // zero; we can't simply return the shortened shift, because the result 5816 // of that operation is undefined. 5817 if (ShLeftAmt >= VT.getSizeInBits()) 5818 Result = DAG.getConstant(0, VT); 5819 else 5820 Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, 5821 Result, DAG.getConstant(ShLeftAmt, ShImmTy)); 5822 } 5823 5824 // Return the new loaded value. 5825 return Result; 5826 } 5827 5828 SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { 5829 SDValue N0 = N->getOperand(0); 5830 SDValue N1 = N->getOperand(1); 5831 EVT VT = N->getValueType(0); 5832 EVT EVT = cast<VTSDNode>(N1)->getVT(); 5833 unsigned VTBits = VT.getScalarType().getSizeInBits(); 5834 unsigned EVTBits = EVT.getScalarType().getSizeInBits(); 5835 5836 // fold (sext_in_reg c1) -> c1 5837 if (isa<ConstantSDNode>(N0) || N0.getOpcode() == ISD::UNDEF) 5838 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); 5839 5840 // If the input is already sign extended, just drop the extension. 5841 if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) 5842 return N0; 5843 5844 // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 5845 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && 5846 EVT.bitsLT(cast<VTSDNode>(N0.getOperand(1))->getVT())) 5847 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 5848 N0.getOperand(0), N1); 5849 5850 // fold (sext_in_reg (sext x)) -> (sext x) 5851 // fold (sext_in_reg (aext x)) -> (sext x) 5852 // if x is small enough. 5853 if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { 5854 SDValue N00 = N0.getOperand(0); 5855 if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && 5856 (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) 5857 return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); 5858 } 5859 5860 // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. 5861 if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) 5862 return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); 5863 5864 // fold operands of sext_in_reg based on knowledge that the top bits are not 5865 // demanded. 5866 if (SimplifyDemandedBits(SDValue(N, 0))) 5867 return SDValue(N, 0); 5868 5869 // fold (sext_in_reg (load x)) -> (smaller sextload x) 5870 // fold (sext_in_reg (srl (load x), c)) -> (smaller sextload (x+c/evtbits)) 5871 SDValue NarrowLoad = ReduceLoadWidth(N); 5872 if (NarrowLoad.getNode()) 5873 return NarrowLoad; 5874 5875 // fold (sext_in_reg (srl X, 24), i8) -> (sra X, 24) 5876 // fold (sext_in_reg (srl X, 23), i8) -> (sra X, 23) iff possible. 5877 // We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above. 5878 if (N0.getOpcode() == ISD::SRL) { 5879 if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1))) 5880 if (ShAmt->getZExtValue()+EVTBits <= VTBits) { 5881 // We can turn this into an SRA iff the input to the SRL is already sign 5882 // extended enough. 5883 unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); 5884 if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) 5885 return DAG.getNode(ISD::SRA, SDLoc(N), VT, 5886 N0.getOperand(0), N0.getOperand(1)); 5887 } 5888 } 5889 5890 // fold (sext_inreg (extload x)) -> (sextload x) 5891 if (ISD::isEXTLoad(N0.getNode()) && 5892 ISD::isUNINDEXEDLoad(N0.getNode()) && 5893 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5894 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5895 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5896 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5897 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5898 LN0->getChain(), 5899 LN0->getBasePtr(), EVT, 5900 LN0->getMemOperand()); 5901 CombineTo(N, ExtLoad); 5902 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5903 AddToWorkList(ExtLoad.getNode()); 5904 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5905 } 5906 // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use 5907 if (ISD::isZEXTLoad(N0.getNode()) && ISD::isUNINDEXEDLoad(N0.getNode()) && 5908 N0.hasOneUse() && 5909 EVT == cast<LoadSDNode>(N0)->getMemoryVT() && 5910 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 5911 TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { 5912 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 5913 SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, 5914 LN0->getChain(), 5915 LN0->getBasePtr(), EVT, 5916 LN0->getMemOperand()); 5917 CombineTo(N, ExtLoad); 5918 CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); 5919 return SDValue(N, 0); // Return N so it doesn't get rechecked! 5920 } 5921 5922 // Form (sext_inreg (bswap >> 16)) or (sext_inreg (rotl (bswap) 16)) 5923 if (EVTBits <= 16 && N0.getOpcode() == ISD::OR) { 5924 SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), 5925 N0.getOperand(1), false); 5926 if (BSwap.getNode()) 5927 return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, 5928 BSwap, N1); 5929 } 5930 5931 // Fold a sext_inreg of a build_vector of ConstantSDNodes or undefs 5932 // into a build_vector. 5933 if (ISD::isBuildVectorOfConstantSDNodes(N0.getNode())) { 5934 SmallVector<SDValue, 8> Elts; 5935 unsigned NumElts = N0->getNumOperands(); 5936 unsigned ShAmt = VTBits - EVTBits; 5937 5938 for (unsigned i = 0; i != NumElts; ++i) { 5939 SDValue Op = N0->getOperand(i); 5940 if (Op->getOpcode() == ISD::UNDEF) { 5941 Elts.push_back(Op); 5942 continue; 5943 } 5944 5945 ConstantSDNode *CurrentND = cast<ConstantSDNode>(Op); 5946 const APInt &C = APInt(VTBits, CurrentND->getAPIntValue().getZExtValue()); 5947 Elts.push_back(DAG.getConstant(C.shl(ShAmt).ashr(ShAmt).getZExtValue(), 5948 Op.getValueType())); 5949 } 5950 5951 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Elts); 5952 } 5953 5954 return SDValue(); 5955 } 5956 5957 SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { 5958 SDValue N0 = N->getOperand(0); 5959 EVT VT = N->getValueType(0); 5960 bool isLE = TLI.isLittleEndian(); 5961 5962 // noop truncate 5963 if (N0.getValueType() == N->getValueType(0)) 5964 return N0; 5965 // fold (truncate c1) -> c1 5966 if (isa<ConstantSDNode>(N0)) 5967 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); 5968 // fold (truncate (truncate x)) -> (truncate x) 5969 if (N0.getOpcode() == ISD::TRUNCATE) 5970 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 5971 // fold (truncate (ext x)) -> (ext x) or (truncate x) or x 5972 if (N0.getOpcode() == ISD::ZERO_EXTEND || 5973 N0.getOpcode() == ISD::SIGN_EXTEND || 5974 N0.getOpcode() == ISD::ANY_EXTEND) { 5975 if (N0.getOperand(0).getValueType().bitsLT(VT)) 5976 // if the source is smaller than the dest, we still need an extend 5977 return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, 5978 N0.getOperand(0)); 5979 if (N0.getOperand(0).getValueType().bitsGT(VT)) 5980 // if the source is larger than the dest, than we just need the truncate 5981 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); 5982 // if the source and dest are the same type, we can drop both the extend 5983 // and the truncate. 5984 return N0.getOperand(0); 5985 } 5986 5987 // Fold extract-and-trunc into a narrow extract. For example: 5988 // i64 x = EXTRACT_VECTOR_ELT(v2i64 val, i32 1) 5989 // i32 y = TRUNCATE(i64 x) 5990 // -- becomes -- 5991 // v16i8 b = BITCAST (v2i64 val) 5992 // i8 x = EXTRACT_VECTOR_ELT(v16i8 b, i32 8) 5993 // 5994 // Note: We only run this optimization after type legalization (which often 5995 // creates this pattern) and before operation legalization after which 5996 // we need to be more careful about the vector instructions that we generate. 5997 if (N0.getOpcode() == ISD::EXTRACT_VECTOR_ELT && 5998 LegalTypes && !LegalOperations && N0->hasOneUse() && VT != MVT::i1) { 5999 6000 EVT VecTy = N0.getOperand(0).getValueType(); 6001 EVT ExTy = N0.getValueType(); 6002 EVT TrTy = N->getValueType(0); 6003 6004 unsigned NumElem = VecTy.getVectorNumElements(); 6005 unsigned SizeRatio = ExTy.getSizeInBits()/TrTy.getSizeInBits(); 6006 6007 EVT NVT = EVT::getVectorVT(*DAG.getContext(), TrTy, SizeRatio * NumElem); 6008 assert(NVT.getSizeInBits() == VecTy.getSizeInBits() && "Invalid Size"); 6009 6010 SDValue EltNo = N0->getOperand(1); 6011 if (isa<ConstantSDNode>(EltNo) && isTypeLegal(NVT)) { 6012 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 6013 EVT IndexTy = TLI.getVectorIdxTy(); 6014 int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); 6015 6016 SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), 6017 NVT, N0.getOperand(0)); 6018 6019 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, 6020 SDLoc(N), TrTy, V, 6021 DAG.getConstant(Index, IndexTy)); 6022 } 6023 } 6024 6025 // Fold a series of buildvector, bitcast, and truncate if possible. 6026 // For example fold 6027 // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to 6028 // (2xi32 (buildvector x, y)). 6029 if (Level == AfterLegalizeVectorOps && VT.isVector() && 6030 N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && 6031 N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 6032 N0.getOperand(0).hasOneUse()) { 6033 6034 SDValue BuildVect = N0.getOperand(0); 6035 EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); 6036 EVT TruncVecEltTy = VT.getVectorElementType(); 6037 6038 // Check that the element types match. 6039 if (BuildVectEltTy == TruncVecEltTy) { 6040 // Now we only need to compute the offset of the truncated elements. 6041 unsigned BuildVecNumElts = BuildVect.getNumOperands(); 6042 unsigned TruncVecNumElts = VT.getVectorNumElements(); 6043 unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; 6044 6045 assert((BuildVecNumElts % TruncVecNumElts) == 0 && 6046 "Invalid number of elements"); 6047 6048 SmallVector<SDValue, 8> Opnds; 6049 for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) 6050 Opnds.push_back(BuildVect.getOperand(i)); 6051 6052 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 6053 } 6054 } 6055 6056 // See if we can simplify the input to this truncate through knowledge that 6057 // only the low bits are being used. 6058 // For example "trunc (or (shl x, 8), y)" // -> trunc y 6059 // Currently we only perform this optimization on scalars because vectors 6060 // may have different active low bits. 6061 if (!VT.isVector()) { 6062 SDValue Shorter = 6063 GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), 6064 VT.getSizeInBits())); 6065 if (Shorter.getNode()) 6066 return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); 6067 } 6068 // fold (truncate (load x)) -> (smaller load x) 6069 // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) 6070 if (!LegalTypes || TLI.isTypeDesirableForOp(N0.getOpcode(), VT)) { 6071 SDValue Reduced = ReduceLoadWidth(N); 6072 if (Reduced.getNode()) 6073 return Reduced; 6074 // Handle the case where the load remains an extending load even 6075 // after truncation. 6076 if (N0.hasOneUse() && ISD::isUNINDEXEDLoad(N0.getNode())) { 6077 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6078 if (!LN0->isVolatile() && 6079 LN0->getMemoryVT().getStoreSizeInBits() < VT.getSizeInBits()) { 6080 SDValue NewLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(LN0), 6081 VT, LN0->getChain(), LN0->getBasePtr(), 6082 LN0->getMemoryVT(), 6083 LN0->getMemOperand()); 6084 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLoad.getValue(1)); 6085 return NewLoad; 6086 } 6087 } 6088 } 6089 // fold (trunc (concat ... x ...)) -> (concat ..., (trunc x), ...)), 6090 // where ... are all 'undef'. 6091 if (N0.getOpcode() == ISD::CONCAT_VECTORS && !LegalTypes) { 6092 SmallVector<EVT, 8> VTs; 6093 SDValue V; 6094 unsigned Idx = 0; 6095 unsigned NumDefs = 0; 6096 6097 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 6098 SDValue X = N0.getOperand(i); 6099 if (X.getOpcode() != ISD::UNDEF) { 6100 V = X; 6101 Idx = i; 6102 NumDefs++; 6103 } 6104 // Stop if more than one members are non-undef. 6105 if (NumDefs > 1) 6106 break; 6107 VTs.push_back(EVT::getVectorVT(*DAG.getContext(), 6108 VT.getVectorElementType(), 6109 X.getValueType().getVectorNumElements())); 6110 } 6111 6112 if (NumDefs == 0) 6113 return DAG.getUNDEF(VT); 6114 6115 if (NumDefs == 1) { 6116 assert(V.getNode() && "The single defined operand is empty!"); 6117 SmallVector<SDValue, 8> Opnds; 6118 for (unsigned i = 0, e = VTs.size(); i != e; ++i) { 6119 if (i != Idx) { 6120 Opnds.push_back(DAG.getUNDEF(VTs[i])); 6121 continue; 6122 } 6123 SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); 6124 AddToWorkList(NV.getNode()); 6125 Opnds.push_back(NV); 6126 } 6127 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Opnds); 6128 } 6129 } 6130 6131 // Simplify the operands using demanded-bits information. 6132 if (!VT.isVector() && 6133 SimplifyDemandedBits(SDValue(N, 0))) 6134 return SDValue(N, 0); 6135 6136 return SDValue(); 6137 } 6138 6139 static SDNode *getBuildPairElt(SDNode *N, unsigned i) { 6140 SDValue Elt = N->getOperand(i); 6141 if (Elt.getOpcode() != ISD::MERGE_VALUES) 6142 return Elt.getNode(); 6143 return Elt.getOperand(Elt.getResNo()).getNode(); 6144 } 6145 6146 /// CombineConsecutiveLoads - build_pair (load, load) -> load 6147 /// if load locations are consecutive. 6148 SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { 6149 assert(N->getOpcode() == ISD::BUILD_PAIR); 6150 6151 LoadSDNode *LD1 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 0)); 6152 LoadSDNode *LD2 = dyn_cast<LoadSDNode>(getBuildPairElt(N, 1)); 6153 if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse() || 6154 LD1->getAddressSpace() != LD2->getAddressSpace()) 6155 return SDValue(); 6156 EVT LD1VT = LD1->getValueType(0); 6157 6158 if (ISD::isNON_EXTLoad(LD2) && 6159 LD2->hasOneUse() && 6160 // If both are volatile this would reduce the number of volatile loads. 6161 // If one is volatile it might be ok, but play conservative and bail out. 6162 !LD1->isVolatile() && 6163 !LD2->isVolatile() && 6164 DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) { 6165 unsigned Align = LD1->getAlignment(); 6166 unsigned NewAlign = TLI.getDataLayout()-> 6167 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6168 6169 if (NewAlign <= Align && 6170 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) 6171 return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), 6172 LD1->getBasePtr(), LD1->getPointerInfo(), 6173 false, false, false, Align); 6174 } 6175 6176 return SDValue(); 6177 } 6178 6179 SDValue DAGCombiner::visitBITCAST(SDNode *N) { 6180 SDValue N0 = N->getOperand(0); 6181 EVT VT = N->getValueType(0); 6182 6183 // If the input is a BUILD_VECTOR with all constant elements, fold this now. 6184 // Only do this before legalize, since afterward the target may be depending 6185 // on the bitconvert. 6186 // First check to see if this is all constant. 6187 if (!LegalTypes && 6188 N0.getOpcode() == ISD::BUILD_VECTOR && N0.getNode()->hasOneUse() && 6189 VT.isVector()) { 6190 bool isSimple = cast<BuildVectorSDNode>(N0)->isConstant(); 6191 6192 EVT DestEltVT = N->getValueType(0).getVectorElementType(); 6193 assert(!DestEltVT.isVector() && 6194 "Element type of vector ValueType must not be vector!"); 6195 if (isSimple) 6196 return ConstantFoldBITCASTofBUILD_VECTOR(N0.getNode(), DestEltVT); 6197 } 6198 6199 // If the input is a constant, let getNode fold it. 6200 if (isa<ConstantSDNode>(N0) || isa<ConstantFPSDNode>(N0)) { 6201 SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); 6202 if (Res.getNode() != N) { 6203 if (!LegalOperations || 6204 TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) 6205 return Res; 6206 6207 // Folding it resulted in an illegal node, and it's too late to 6208 // do that. Clean up the old node and forego the transformation. 6209 // Ideally this won't happen very often, because instcombine 6210 // and the earlier dagcombine runs (where illegal nodes are 6211 // permitted) should have folded most of them already. 6212 DAG.DeleteNode(Res.getNode()); 6213 } 6214 } 6215 6216 // (conv (conv x, t1), t2) -> (conv x, t2) 6217 if (N0.getOpcode() == ISD::BITCAST) 6218 return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, 6219 N0.getOperand(0)); 6220 6221 // fold (conv (load x)) -> (load (conv*)x) 6222 // If the resultant load doesn't need a higher alignment than the original! 6223 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 6224 // Do not change the width of a volatile load. 6225 !cast<LoadSDNode>(N0)->isVolatile() && 6226 // Do not remove the cast if the types differ in endian layout. 6227 TLI.hasBigEndianPartOrdering(N0.getValueType()) == 6228 TLI.hasBigEndianPartOrdering(VT) && 6229 (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && 6230 TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { 6231 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 6232 unsigned Align = TLI.getDataLayout()-> 6233 getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); 6234 unsigned OrigAlign = LN0->getAlignment(); 6235 6236 if (Align <= OrigAlign) { 6237 SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), 6238 LN0->getBasePtr(), LN0->getPointerInfo(), 6239 LN0->isVolatile(), LN0->isNonTemporal(), 6240 LN0->isInvariant(), OrigAlign, 6241 LN0->getTBAAInfo()); 6242 AddToWorkList(N); 6243 CombineTo(N0.getNode(), 6244 DAG.getNode(ISD::BITCAST, SDLoc(N0), 6245 N0.getValueType(), Load), 6246 Load.getValue(1)); 6247 return Load; 6248 } 6249 } 6250 6251 // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) 6252 // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) 6253 // This often reduces constant pool loads. 6254 if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || 6255 (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && 6256 N0.getNode()->hasOneUse() && VT.isInteger() && 6257 !VT.isVector() && !N0.getValueType().isVector()) { 6258 SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, 6259 N0.getOperand(0)); 6260 AddToWorkList(NewConv.getNode()); 6261 6262 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6263 if (N0.getOpcode() == ISD::FNEG) 6264 return DAG.getNode(ISD::XOR, SDLoc(N), VT, 6265 NewConv, DAG.getConstant(SignBit, VT)); 6266 assert(N0.getOpcode() == ISD::FABS); 6267 return DAG.getNode(ISD::AND, SDLoc(N), VT, 6268 NewConv, DAG.getConstant(~SignBit, VT)); 6269 } 6270 6271 // fold (bitconvert (fcopysign cst, x)) -> 6272 // (or (and (bitconvert x), sign), (and cst, (not sign))) 6273 // Note that we don't handle (copysign x, cst) because this can always be 6274 // folded to an fneg or fabs. 6275 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && 6276 isa<ConstantFPSDNode>(N0.getOperand(0)) && 6277 VT.isInteger() && !VT.isVector()) { 6278 unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); 6279 EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); 6280 if (isTypeLegal(IntXVT)) { 6281 SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6282 IntXVT, N0.getOperand(1)); 6283 AddToWorkList(X.getNode()); 6284 6285 // If X has a different width than the result/lhs, sext it or truncate it. 6286 unsigned VTWidth = VT.getSizeInBits(); 6287 if (OrigXWidth < VTWidth) { 6288 X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); 6289 AddToWorkList(X.getNode()); 6290 } else if (OrigXWidth > VTWidth) { 6291 // To get the sign bit in the right place, we have to shift it right 6292 // before truncating. 6293 X = DAG.getNode(ISD::SRL, SDLoc(X), 6294 X.getValueType(), X, 6295 DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); 6296 AddToWorkList(X.getNode()); 6297 X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); 6298 AddToWorkList(X.getNode()); 6299 } 6300 6301 APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); 6302 X = DAG.getNode(ISD::AND, SDLoc(X), VT, 6303 X, DAG.getConstant(SignBit, VT)); 6304 AddToWorkList(X.getNode()); 6305 6306 SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), 6307 VT, N0.getOperand(0)); 6308 Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, 6309 Cst, DAG.getConstant(~SignBit, VT)); 6310 AddToWorkList(Cst.getNode()); 6311 6312 return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); 6313 } 6314 } 6315 6316 // bitconvert(build_pair(ld, ld)) -> ld iff load locations are consecutive. 6317 if (N0.getOpcode() == ISD::BUILD_PAIR) { 6318 SDValue CombineLD = CombineConsecutiveLoads(N0.getNode(), VT); 6319 if (CombineLD.getNode()) 6320 return CombineLD; 6321 } 6322 6323 return SDValue(); 6324 } 6325 6326 SDValue DAGCombiner::visitBUILD_PAIR(SDNode *N) { 6327 EVT VT = N->getValueType(0); 6328 return CombineConsecutiveLoads(N, VT); 6329 } 6330 6331 /// ConstantFoldBITCASTofBUILD_VECTOR - We know that BV is a build_vector 6332 /// node with Constant, ConstantFP or Undef operands. DstEltVT indicates the 6333 /// destination element value type. 6334 SDValue DAGCombiner:: 6335 ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { 6336 EVT SrcEltVT = BV->getValueType(0).getVectorElementType(); 6337 6338 // If this is already the right type, we're done. 6339 if (SrcEltVT == DstEltVT) return SDValue(BV, 0); 6340 6341 unsigned SrcBitSize = SrcEltVT.getSizeInBits(); 6342 unsigned DstBitSize = DstEltVT.getSizeInBits(); 6343 6344 // If this is a conversion of N elements of one type to N elements of another 6345 // type, convert each element. This handles FP<->INT cases. 6346 if (SrcBitSize == DstBitSize) { 6347 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6348 BV->getValueType(0).getVectorNumElements()); 6349 6350 // Due to the FP element handling below calling this routine recursively, 6351 // we can end up with a scalar-to-vector node here. 6352 if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) 6353 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6354 DAG.getNode(ISD::BITCAST, SDLoc(BV), 6355 DstEltVT, BV->getOperand(0))); 6356 6357 SmallVector<SDValue, 8> Ops; 6358 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6359 SDValue Op = BV->getOperand(i); 6360 // If the vector element type is not legal, the BUILD_VECTOR operands 6361 // are promoted and implicitly truncated. Make that explicit here. 6362 if (Op.getValueType() != SrcEltVT) 6363 Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); 6364 Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), 6365 DstEltVT, Op)); 6366 AddToWorkList(Ops.back().getNode()); 6367 } 6368 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6369 } 6370 6371 // Otherwise, we're growing or shrinking the elements. To avoid having to 6372 // handle annoying details of growing/shrinking FP values, we convert them to 6373 // int first. 6374 if (SrcEltVT.isFloatingPoint()) { 6375 // Convert the input float vector to a int vector where the elements are the 6376 // same sizes. 6377 assert((SrcEltVT == MVT::f32 || SrcEltVT == MVT::f64) && "Unknown FP VT!"); 6378 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), SrcEltVT.getSizeInBits()); 6379 BV = ConstantFoldBITCASTofBUILD_VECTOR(BV, IntVT).getNode(); 6380 SrcEltVT = IntVT; 6381 } 6382 6383 // Now we know the input is an integer vector. If the output is a FP type, 6384 // convert to integer first, then to FP of the right size. 6385 if (DstEltVT.isFloatingPoint()) { 6386 assert((DstEltVT == MVT::f32 || DstEltVT == MVT::f64) && "Unknown FP VT!"); 6387 EVT TmpVT = EVT::getIntegerVT(*DAG.getContext(), DstEltVT.getSizeInBits()); 6388 SDNode *Tmp = ConstantFoldBITCASTofBUILD_VECTOR(BV, TmpVT).getNode(); 6389 6390 // Next, convert to FP elements of the same size. 6391 return ConstantFoldBITCASTofBUILD_VECTOR(Tmp, DstEltVT); 6392 } 6393 6394 // Okay, we know the src/dst types are both integers of differing types. 6395 // Handling growing first. 6396 assert(SrcEltVT.isInteger() && DstEltVT.isInteger()); 6397 if (SrcBitSize < DstBitSize) { 6398 unsigned NumInputsPerOutput = DstBitSize/SrcBitSize; 6399 6400 SmallVector<SDValue, 8> Ops; 6401 for (unsigned i = 0, e = BV->getNumOperands(); i != e; 6402 i += NumInputsPerOutput) { 6403 bool isLE = TLI.isLittleEndian(); 6404 APInt NewBits = APInt(DstBitSize, 0); 6405 bool EltIsUndef = true; 6406 for (unsigned j = 0; j != NumInputsPerOutput; ++j) { 6407 // Shift the previously computed bits over. 6408 NewBits <<= SrcBitSize; 6409 SDValue Op = BV->getOperand(i+ (isLE ? (NumInputsPerOutput-j-1) : j)); 6410 if (Op.getOpcode() == ISD::UNDEF) continue; 6411 EltIsUndef = false; 6412 6413 NewBits |= cast<ConstantSDNode>(Op)->getAPIntValue(). 6414 zextOrTrunc(SrcBitSize).zext(DstBitSize); 6415 } 6416 6417 if (EltIsUndef) 6418 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6419 else 6420 Ops.push_back(DAG.getConstant(NewBits, DstEltVT)); 6421 } 6422 6423 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); 6424 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6425 } 6426 6427 // Finally, this must be the case where we are shrinking elements: each input 6428 // turns into multiple outputs. 6429 bool isS2V = ISD::isScalarToVector(BV); 6430 unsigned NumOutputsPerInput = SrcBitSize/DstBitSize; 6431 EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, 6432 NumOutputsPerInput*BV->getNumOperands()); 6433 SmallVector<SDValue, 8> Ops; 6434 6435 for (unsigned i = 0, e = BV->getNumOperands(); i != e; ++i) { 6436 if (BV->getOperand(i).getOpcode() == ISD::UNDEF) { 6437 for (unsigned j = 0; j != NumOutputsPerInput; ++j) 6438 Ops.push_back(DAG.getUNDEF(DstEltVT)); 6439 continue; 6440 } 6441 6442 APInt OpVal = cast<ConstantSDNode>(BV->getOperand(i))-> 6443 getAPIntValue().zextOrTrunc(SrcBitSize); 6444 6445 for (unsigned j = 0; j != NumOutputsPerInput; ++j) { 6446 APInt ThisVal = OpVal.trunc(DstBitSize); 6447 Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); 6448 if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) 6449 // Simply turn this into a SCALAR_TO_VECTOR of the new type. 6450 return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, 6451 Ops[0]); 6452 OpVal = OpVal.lshr(DstBitSize); 6453 } 6454 6455 // For big endian targets, swap the order of the pieces of each element. 6456 if (TLI.isBigEndian()) 6457 std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); 6458 } 6459 6460 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, Ops); 6461 } 6462 6463 SDValue DAGCombiner::visitFADD(SDNode *N) { 6464 SDValue N0 = N->getOperand(0); 6465 SDValue N1 = N->getOperand(1); 6466 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6467 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6468 EVT VT = N->getValueType(0); 6469 6470 // fold vector ops 6471 if (VT.isVector()) { 6472 SDValue FoldedVOp = SimplifyVBinOp(N); 6473 if (FoldedVOp.getNode()) return FoldedVOp; 6474 } 6475 6476 // fold (fadd c1, c2) -> c1 + c2 6477 if (N0CFP && N1CFP) 6478 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); 6479 // canonicalize constant to RHS 6480 if (N0CFP && !N1CFP) 6481 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); 6482 // fold (fadd A, 0) -> A 6483 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6484 N1CFP->getValueAPF().isZero()) 6485 return N0; 6486 // fold (fadd A, (fneg B)) -> (fsub A, B) 6487 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6488 isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 6489 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, 6490 GetNegatedExpression(N1, DAG, LegalOperations)); 6491 // fold (fadd (fneg A), B) -> (fsub B, A) 6492 if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && 6493 isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) 6494 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, 6495 GetNegatedExpression(N0, DAG, LegalOperations)); 6496 6497 // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) 6498 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6499 N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && 6500 isa<ConstantFPSDNode>(N0.getOperand(1))) 6501 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), 6502 DAG.getNode(ISD::FADD, SDLoc(N), VT, 6503 N0.getOperand(1), N1)); 6504 6505 // No FP constant should be created after legalization as Instruction 6506 // Selection pass has hard time in dealing with FP constant. 6507 // 6508 // We don't need test this condition for transformation like following, as 6509 // the DAG being transformed implies it is legal to take FP constant as 6510 // operand. 6511 // 6512 // (fadd (fmul c, x), x) -> (fmul c+1, x) 6513 // 6514 bool AllowNewFpConst = (Level < AfterLegalizeDAG); 6515 6516 // If allow, fold (fadd (fneg x), x) -> 0.0 6517 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && 6518 N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) 6519 return DAG.getConstantFP(0.0, VT); 6520 6521 // If allow, fold (fadd x, (fneg x)) -> 0.0 6522 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && 6523 N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) 6524 return DAG.getConstantFP(0.0, VT); 6525 6526 // In unsafe math mode, we can fold chains of FADD's of the same value 6527 // into multiplications. This transform is not safe in general because 6528 // we are reducing the number of rounding steps. 6529 if (DAG.getTarget().Options.UnsafeFPMath && 6530 TLI.isOperationLegalOrCustom(ISD::FMUL, VT) && 6531 !N0CFP && !N1CFP) { 6532 if (N0.getOpcode() == ISD::FMUL) { 6533 ConstantFPSDNode *CFP00 = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6534 ConstantFPSDNode *CFP01 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 6535 6536 // (fadd (fmul c, x), x) -> (fmul x, c+1) 6537 if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { 6538 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6539 SDValue(CFP00, 0), 6540 DAG.getConstantFP(1.0, VT)); 6541 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6542 N1, NewCFP); 6543 } 6544 6545 // (fadd (fmul x, c), x) -> (fmul x, c+1) 6546 if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { 6547 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6548 SDValue(CFP01, 0), 6549 DAG.getConstantFP(1.0, VT)); 6550 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6551 N1, NewCFP); 6552 } 6553 6554 // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) 6555 if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && 6556 N1.getOperand(0) == N1.getOperand(1) && 6557 N0.getOperand(1) == N1.getOperand(0)) { 6558 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6559 SDValue(CFP00, 0), 6560 DAG.getConstantFP(2.0, VT)); 6561 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6562 N0.getOperand(1), NewCFP); 6563 } 6564 6565 // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) 6566 if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && 6567 N1.getOperand(0) == N1.getOperand(1) && 6568 N0.getOperand(0) == N1.getOperand(0)) { 6569 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6570 SDValue(CFP01, 0), 6571 DAG.getConstantFP(2.0, VT)); 6572 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6573 N0.getOperand(0), NewCFP); 6574 } 6575 } 6576 6577 if (N1.getOpcode() == ISD::FMUL) { 6578 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6579 ConstantFPSDNode *CFP11 = dyn_cast<ConstantFPSDNode>(N1.getOperand(1)); 6580 6581 // (fadd x, (fmul c, x)) -> (fmul x, c+1) 6582 if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { 6583 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6584 SDValue(CFP10, 0), 6585 DAG.getConstantFP(1.0, VT)); 6586 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6587 N0, NewCFP); 6588 } 6589 6590 // (fadd x, (fmul x, c)) -> (fmul x, c+1) 6591 if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { 6592 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6593 SDValue(CFP11, 0), 6594 DAG.getConstantFP(1.0, VT)); 6595 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6596 N0, NewCFP); 6597 } 6598 6599 6600 // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) 6601 if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && 6602 N0.getOperand(0) == N0.getOperand(1) && 6603 N1.getOperand(1) == N0.getOperand(0)) { 6604 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6605 SDValue(CFP10, 0), 6606 DAG.getConstantFP(2.0, VT)); 6607 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6608 N1.getOperand(1), NewCFP); 6609 } 6610 6611 // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) 6612 if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && 6613 N0.getOperand(0) == N0.getOperand(1) && 6614 N1.getOperand(0) == N0.getOperand(0)) { 6615 SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, 6616 SDValue(CFP11, 0), 6617 DAG.getConstantFP(2.0, VT)); 6618 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6619 N1.getOperand(0), NewCFP); 6620 } 6621 } 6622 6623 if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { 6624 ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N0.getOperand(0)); 6625 // (fadd (fadd x, x), x) -> (fmul x, 3.0) 6626 if (!CFP && N0.getOperand(0) == N0.getOperand(1) && 6627 (N0.getOperand(0) == N1)) 6628 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6629 N1, DAG.getConstantFP(3.0, VT)); 6630 } 6631 6632 if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { 6633 ConstantFPSDNode *CFP10 = dyn_cast<ConstantFPSDNode>(N1.getOperand(0)); 6634 // (fadd x, (fadd x, x)) -> (fmul x, 3.0) 6635 if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && 6636 N1.getOperand(0) == N0) 6637 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6638 N0, DAG.getConstantFP(3.0, VT)); 6639 } 6640 6641 // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) 6642 if (AllowNewFpConst && 6643 N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && 6644 N0.getOperand(0) == N0.getOperand(1) && 6645 N1.getOperand(0) == N1.getOperand(1) && 6646 N0.getOperand(0) == N1.getOperand(0)) 6647 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6648 N0.getOperand(0), 6649 DAG.getConstantFP(4.0, VT)); 6650 } 6651 6652 // FADD -> FMA combines: 6653 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 6654 DAG.getTarget().Options.UnsafeFPMath) && 6655 DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && 6656 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6657 6658 // fold (fadd (fmul x, y), z) -> (fma x, y, z) 6659 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) 6660 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6661 N0.getOperand(0), N0.getOperand(1), N1); 6662 6663 // fold (fadd x, (fmul y, z)) -> (fma y, z, x) 6664 // Note: Commutes FADD operands. 6665 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) 6666 return DAG.getNode(ISD::FMA, SDLoc(N), VT, 6667 N1.getOperand(0), N1.getOperand(1), N0); 6668 } 6669 6670 return SDValue(); 6671 } 6672 6673 SDValue DAGCombiner::visitFSUB(SDNode *N) { 6674 SDValue N0 = N->getOperand(0); 6675 SDValue N1 = N->getOperand(1); 6676 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6677 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6678 EVT VT = N->getValueType(0); 6679 SDLoc dl(N); 6680 6681 // fold vector ops 6682 if (VT.isVector()) { 6683 SDValue FoldedVOp = SimplifyVBinOp(N); 6684 if (FoldedVOp.getNode()) return FoldedVOp; 6685 } 6686 6687 // fold (fsub c1, c2) -> c1-c2 6688 if (N0CFP && N1CFP) 6689 return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); 6690 // fold (fsub A, 0) -> A 6691 if (DAG.getTarget().Options.UnsafeFPMath && 6692 N1CFP && N1CFP->getValueAPF().isZero()) 6693 return N0; 6694 // fold (fsub 0, B) -> -B 6695 if (DAG.getTarget().Options.UnsafeFPMath && 6696 N0CFP && N0CFP->getValueAPF().isZero()) { 6697 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 6698 return GetNegatedExpression(N1, DAG, LegalOperations); 6699 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6700 return DAG.getNode(ISD::FNEG, dl, VT, N1); 6701 } 6702 // fold (fsub A, (fneg B)) -> (fadd A, B) 6703 if (isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) 6704 return DAG.getNode(ISD::FADD, dl, VT, N0, 6705 GetNegatedExpression(N1, DAG, LegalOperations)); 6706 6707 // If 'unsafe math' is enabled, fold 6708 // (fsub x, x) -> 0.0 & 6709 // (fsub x, (fadd x, y)) -> (fneg y) & 6710 // (fsub x, (fadd y, x)) -> (fneg y) 6711 if (DAG.getTarget().Options.UnsafeFPMath) { 6712 if (N0 == N1) 6713 return DAG.getConstantFP(0.0f, VT); 6714 6715 if (N1.getOpcode() == ISD::FADD) { 6716 SDValue N10 = N1->getOperand(0); 6717 SDValue N11 = N1->getOperand(1); 6718 6719 if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, 6720 &DAG.getTarget().Options)) 6721 return GetNegatedExpression(N11, DAG, LegalOperations); 6722 6723 if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, 6724 &DAG.getTarget().Options)) 6725 return GetNegatedExpression(N10, DAG, LegalOperations); 6726 } 6727 } 6728 6729 // FSUB -> FMA combines: 6730 if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || 6731 DAG.getTarget().Options.UnsafeFPMath) && 6732 DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && 6733 (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { 6734 6735 // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 6736 if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) 6737 return DAG.getNode(ISD::FMA, dl, VT, 6738 N0.getOperand(0), N0.getOperand(1), 6739 DAG.getNode(ISD::FNEG, dl, VT, N1)); 6740 6741 // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 6742 // Note: Commutes FSUB operands. 6743 if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) 6744 return DAG.getNode(ISD::FMA, dl, VT, 6745 DAG.getNode(ISD::FNEG, dl, VT, 6746 N1.getOperand(0)), 6747 N1.getOperand(1), N0); 6748 6749 // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) 6750 if (N0.getOpcode() == ISD::FNEG && 6751 N0.getOperand(0).getOpcode() == ISD::FMUL && 6752 N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { 6753 SDValue N00 = N0.getOperand(0).getOperand(0); 6754 SDValue N01 = N0.getOperand(0).getOperand(1); 6755 return DAG.getNode(ISD::FMA, dl, VT, 6756 DAG.getNode(ISD::FNEG, dl, VT, N00), N01, 6757 DAG.getNode(ISD::FNEG, dl, VT, N1)); 6758 } 6759 } 6760 6761 return SDValue(); 6762 } 6763 6764 SDValue DAGCombiner::visitFMUL(SDNode *N) { 6765 SDValue N0 = N->getOperand(0); 6766 SDValue N1 = N->getOperand(1); 6767 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6768 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6769 EVT VT = N->getValueType(0); 6770 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6771 6772 // fold vector ops 6773 if (VT.isVector()) { 6774 SDValue FoldedVOp = SimplifyVBinOp(N); 6775 if (FoldedVOp.getNode()) return FoldedVOp; 6776 } 6777 6778 // fold (fmul c1, c2) -> c1*c2 6779 if (N0CFP && N1CFP) 6780 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); 6781 // canonicalize constant to RHS 6782 if (N0CFP && !N1CFP) 6783 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); 6784 // fold (fmul A, 0) -> 0 6785 if (DAG.getTarget().Options.UnsafeFPMath && 6786 N1CFP && N1CFP->getValueAPF().isZero()) 6787 return N1; 6788 // fold (fmul A, 0) -> 0, vector edition. 6789 if (DAG.getTarget().Options.UnsafeFPMath && 6790 ISD::isBuildVectorAllZeros(N1.getNode())) 6791 return N1; 6792 // fold (fmul A, 1.0) -> A 6793 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6794 return N0; 6795 // fold (fmul X, 2.0) -> (fadd X, X) 6796 if (N1CFP && N1CFP->isExactlyValue(+2.0)) 6797 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); 6798 // fold (fmul X, -1.0) -> (fneg X) 6799 if (N1CFP && N1CFP->isExactlyValue(-1.0)) 6800 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6801 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); 6802 6803 // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) 6804 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6805 &DAG.getTarget().Options)) { 6806 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6807 &DAG.getTarget().Options)) { 6808 // Both can be negated for free, check to see if at least one is cheaper 6809 // negated. 6810 if (LHSNeg == 2 || RHSNeg == 2) 6811 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6812 GetNegatedExpression(N0, DAG, LegalOperations), 6813 GetNegatedExpression(N1, DAG, LegalOperations)); 6814 } 6815 } 6816 6817 // If allowed, fold (fmul (fmul x, c1), c2) -> (fmul x, (fmul c1, c2)) 6818 if (DAG.getTarget().Options.UnsafeFPMath && 6819 N1CFP && N0.getOpcode() == ISD::FMUL && 6820 N0.getNode()->hasOneUse() && isa<ConstantFPSDNode>(N0.getOperand(1))) 6821 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 6822 DAG.getNode(ISD::FMUL, SDLoc(N), VT, 6823 N0.getOperand(1), N1)); 6824 6825 return SDValue(); 6826 } 6827 6828 SDValue DAGCombiner::visitFMA(SDNode *N) { 6829 SDValue N0 = N->getOperand(0); 6830 SDValue N1 = N->getOperand(1); 6831 SDValue N2 = N->getOperand(2); 6832 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6833 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6834 EVT VT = N->getValueType(0); 6835 SDLoc dl(N); 6836 6837 if (DAG.getTarget().Options.UnsafeFPMath) { 6838 if (N0CFP && N0CFP->isZero()) 6839 return N2; 6840 if (N1CFP && N1CFP->isZero()) 6841 return N2; 6842 } 6843 if (N0CFP && N0CFP->isExactlyValue(1.0)) 6844 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); 6845 if (N1CFP && N1CFP->isExactlyValue(1.0)) 6846 return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); 6847 6848 // Canonicalize (fma c, x, y) -> (fma x, c, y) 6849 if (N0CFP && !N1CFP) 6850 return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); 6851 6852 // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) 6853 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6854 N2.getOpcode() == ISD::FMUL && 6855 N0 == N2.getOperand(0) && 6856 N2.getOperand(1).getOpcode() == ISD::ConstantFP) { 6857 return DAG.getNode(ISD::FMUL, dl, VT, N0, 6858 DAG.getNode(ISD::FADD, dl, VT, N1, N2.getOperand(1))); 6859 } 6860 6861 6862 // (fma (fmul x, c1), c2, y) -> (fma x, c1*c2, y) 6863 if (DAG.getTarget().Options.UnsafeFPMath && 6864 N0.getOpcode() == ISD::FMUL && N1CFP && 6865 N0.getOperand(1).getOpcode() == ISD::ConstantFP) { 6866 return DAG.getNode(ISD::FMA, dl, VT, 6867 N0.getOperand(0), 6868 DAG.getNode(ISD::FMUL, dl, VT, N1, N0.getOperand(1)), 6869 N2); 6870 } 6871 6872 // (fma x, 1, y) -> (fadd x, y) 6873 // (fma x, -1, y) -> (fadd (fneg x), y) 6874 if (N1CFP) { 6875 if (N1CFP->isExactlyValue(1.0)) 6876 return DAG.getNode(ISD::FADD, dl, VT, N0, N2); 6877 6878 if (N1CFP->isExactlyValue(-1.0) && 6879 (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT))) { 6880 SDValue RHSNeg = DAG.getNode(ISD::FNEG, dl, VT, N0); 6881 AddToWorkList(RHSNeg.getNode()); 6882 return DAG.getNode(ISD::FADD, dl, VT, N2, RHSNeg); 6883 } 6884 } 6885 6886 // (fma x, c, x) -> (fmul x, (c+1)) 6887 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) 6888 return DAG.getNode(ISD::FMUL, dl, VT, N0, 6889 DAG.getNode(ISD::FADD, dl, VT, 6890 N1, DAG.getConstantFP(1.0, VT))); 6891 6892 // (fma x, c, (fneg x)) -> (fmul x, (c-1)) 6893 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && 6894 N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) 6895 return DAG.getNode(ISD::FMUL, dl, VT, N0, 6896 DAG.getNode(ISD::FADD, dl, VT, 6897 N1, DAG.getConstantFP(-1.0, VT))); 6898 6899 6900 return SDValue(); 6901 } 6902 6903 SDValue DAGCombiner::visitFDIV(SDNode *N) { 6904 SDValue N0 = N->getOperand(0); 6905 SDValue N1 = N->getOperand(1); 6906 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6907 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6908 EVT VT = N->getValueType(0); 6909 const TargetLowering &TLI = DAG.getTargetLoweringInfo(); 6910 6911 // fold vector ops 6912 if (VT.isVector()) { 6913 SDValue FoldedVOp = SimplifyVBinOp(N); 6914 if (FoldedVOp.getNode()) return FoldedVOp; 6915 } 6916 6917 // fold (fdiv c1, c2) -> c1/c2 6918 if (N0CFP && N1CFP) 6919 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); 6920 6921 // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. 6922 if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { 6923 // Compute the reciprocal 1.0 / c2. 6924 APFloat N1APF = N1CFP->getValueAPF(); 6925 APFloat Recip(N1APF.getSemantics(), 1); // 1.0 6926 APFloat::opStatus st = Recip.divide(N1APF, APFloat::rmNearestTiesToEven); 6927 // Only do the transform if the reciprocal is a legal fp immediate that 6928 // isn't too nasty (eg NaN, denormal, ...). 6929 if ((st == APFloat::opOK || st == APFloat::opInexact) && // Not too nasty 6930 (!LegalOperations || 6931 // FIXME: custom lowering of ConstantFP might fail (see e.g. ARM 6932 // backend)... we should handle this gracefully after Legalize. 6933 // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || 6934 TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || 6935 TLI.isFPImmLegal(Recip, VT))) 6936 return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, 6937 DAG.getConstantFP(Recip, VT)); 6938 } 6939 6940 // (fdiv (fneg X), (fneg Y)) -> (fdiv X, Y) 6941 if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, 6942 &DAG.getTarget().Options)) { 6943 if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, 6944 &DAG.getTarget().Options)) { 6945 // Both can be negated for free, check to see if at least one is cheaper 6946 // negated. 6947 if (LHSNeg == 2 || RHSNeg == 2) 6948 return DAG.getNode(ISD::FDIV, SDLoc(N), VT, 6949 GetNegatedExpression(N0, DAG, LegalOperations), 6950 GetNegatedExpression(N1, DAG, LegalOperations)); 6951 } 6952 } 6953 6954 return SDValue(); 6955 } 6956 6957 SDValue DAGCombiner::visitFREM(SDNode *N) { 6958 SDValue N0 = N->getOperand(0); 6959 SDValue N1 = N->getOperand(1); 6960 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6961 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6962 EVT VT = N->getValueType(0); 6963 6964 // fold (frem c1, c2) -> fmod(c1,c2) 6965 if (N0CFP && N1CFP) 6966 return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); 6967 6968 return SDValue(); 6969 } 6970 6971 SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { 6972 SDValue N0 = N->getOperand(0); 6973 SDValue N1 = N->getOperand(1); 6974 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 6975 ConstantFPSDNode *N1CFP = dyn_cast<ConstantFPSDNode>(N1); 6976 EVT VT = N->getValueType(0); 6977 6978 if (N0CFP && N1CFP) // Constant fold 6979 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); 6980 6981 if (N1CFP) { 6982 const APFloat& V = N1CFP->getValueAPF(); 6983 // copysign(x, c1) -> fabs(x) iff ispos(c1) 6984 // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) 6985 if (!V.isNegative()) { 6986 if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) 6987 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 6988 } else { 6989 if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) 6990 return DAG.getNode(ISD::FNEG, SDLoc(N), VT, 6991 DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); 6992 } 6993 } 6994 6995 // copysign(fabs(x), y) -> copysign(x, y) 6996 // copysign(fneg(x), y) -> copysign(x, y) 6997 // copysign(copysign(x,z), y) -> copysign(x, y) 6998 if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || 6999 N0.getOpcode() == ISD::FCOPYSIGN) 7000 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7001 N0.getOperand(0), N1); 7002 7003 // copysign(x, abs(y)) -> abs(x) 7004 if (N1.getOpcode() == ISD::FABS) 7005 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7006 7007 // copysign(x, copysign(y,z)) -> copysign(x, z) 7008 if (N1.getOpcode() == ISD::FCOPYSIGN) 7009 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7010 N0, N1.getOperand(1)); 7011 7012 // copysign(x, fp_extend(y)) -> copysign(x, y) 7013 // copysign(x, fp_round(y)) -> copysign(x, y) 7014 if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) 7015 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7016 N0, N1.getOperand(0)); 7017 7018 return SDValue(); 7019 } 7020 7021 SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { 7022 SDValue N0 = N->getOperand(0); 7023 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7024 EVT VT = N->getValueType(0); 7025 EVT OpVT = N0.getValueType(); 7026 7027 // fold (sint_to_fp c1) -> c1fp 7028 if (N0C && 7029 // ...but only if the target supports immediate floating-point values 7030 (!LegalOperations || 7031 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7032 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7033 7034 // If the input is a legal type, and SINT_TO_FP is not legal on this target, 7035 // but UINT_TO_FP is legal on this target, try to convert. 7036 if (!TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT) && 7037 TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { 7038 // If the sign bit is known to be zero, we can change this to UINT_TO_FP. 7039 if (DAG.SignBitIsZero(N0)) 7040 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7041 } 7042 7043 // The next optimizations are desirable only if SELECT_CC can be lowered. 7044 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7045 // fold (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7046 if (N0.getOpcode() == ISD::SETCC && N0.getValueType() == MVT::i1 && 7047 !VT.isVector() && 7048 (!LegalOperations || 7049 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7050 SDValue Ops[] = 7051 { N0.getOperand(0), N0.getOperand(1), 7052 DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), 7053 N0.getOperand(2) }; 7054 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7055 } 7056 7057 // fold (sint_to_fp (zext (setcc x, y, cc))) -> 7058 // (select_cc x, y, 1.0, 0.0,, cc) 7059 if (N0.getOpcode() == ISD::ZERO_EXTEND && 7060 N0.getOperand(0).getOpcode() == ISD::SETCC &&!VT.isVector() && 7061 (!LegalOperations || 7062 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7063 SDValue Ops[] = 7064 { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), 7065 DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), 7066 N0.getOperand(0).getOperand(2) }; 7067 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7068 } 7069 } 7070 7071 return SDValue(); 7072 } 7073 7074 SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { 7075 SDValue N0 = N->getOperand(0); 7076 ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0); 7077 EVT VT = N->getValueType(0); 7078 EVT OpVT = N0.getValueType(); 7079 7080 // fold (uint_to_fp c1) -> c1fp 7081 if (N0C && 7082 // ...but only if the target supports immediate floating-point values 7083 (!LegalOperations || 7084 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) 7085 return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); 7086 7087 // If the input is a legal type, and UINT_TO_FP is not legal on this target, 7088 // but SINT_TO_FP is legal on this target, try to convert. 7089 if (!TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT) && 7090 TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { 7091 // If the sign bit is known to be zero, we can change this to SINT_TO_FP. 7092 if (DAG.SignBitIsZero(N0)) 7093 return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); 7094 } 7095 7096 // The next optimizations are desirable only if SELECT_CC can be lowered. 7097 if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT) || !LegalOperations) { 7098 // fold (uint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) 7099 7100 if (N0.getOpcode() == ISD::SETCC && !VT.isVector() && 7101 (!LegalOperations || 7102 TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) { 7103 SDValue Ops[] = 7104 { N0.getOperand(0), N0.getOperand(1), 7105 DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), 7106 N0.getOperand(2) }; 7107 return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops); 7108 } 7109 } 7110 7111 return SDValue(); 7112 } 7113 7114 SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { 7115 SDValue N0 = N->getOperand(0); 7116 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7117 EVT VT = N->getValueType(0); 7118 7119 // fold (fp_to_sint c1fp) -> c1 7120 if (N0CFP) 7121 return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); 7122 7123 return SDValue(); 7124 } 7125 7126 SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { 7127 SDValue N0 = N->getOperand(0); 7128 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7129 EVT VT = N->getValueType(0); 7130 7131 // fold (fp_to_uint c1fp) -> c1 7132 if (N0CFP) 7133 return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); 7134 7135 return SDValue(); 7136 } 7137 7138 SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { 7139 SDValue N0 = N->getOperand(0); 7140 SDValue N1 = N->getOperand(1); 7141 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7142 EVT VT = N->getValueType(0); 7143 7144 // fold (fp_round c1fp) -> c1fp 7145 if (N0CFP) 7146 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); 7147 7148 // fold (fp_round (fp_extend x)) -> x 7149 if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) 7150 return N0.getOperand(0); 7151 7152 // fold (fp_round (fp_round x)) -> (fp_round x) 7153 if (N0.getOpcode() == ISD::FP_ROUND) { 7154 // This is a value preserving truncation if both round's are. 7155 bool IsTrunc = N->getConstantOperandVal(1) == 1 && 7156 N0.getNode()->getConstantOperandVal(1) == 1; 7157 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), 7158 DAG.getIntPtrConstant(IsTrunc)); 7159 } 7160 7161 // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) 7162 if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { 7163 SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, 7164 N0.getOperand(0), N1); 7165 AddToWorkList(Tmp.getNode()); 7166 return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, 7167 Tmp, N0.getOperand(1)); 7168 } 7169 7170 return SDValue(); 7171 } 7172 7173 SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { 7174 SDValue N0 = N->getOperand(0); 7175 EVT VT = N->getValueType(0); 7176 EVT EVT = cast<VTSDNode>(N->getOperand(1))->getVT(); 7177 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7178 7179 // fold (fp_round_inreg c1fp) -> c1fp 7180 if (N0CFP && isTypeLegal(EVT)) { 7181 SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); 7182 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); 7183 } 7184 7185 return SDValue(); 7186 } 7187 7188 SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { 7189 SDValue N0 = N->getOperand(0); 7190 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7191 EVT VT = N->getValueType(0); 7192 7193 // If this is fp_round(fpextend), don't fold it, allow ourselves to be folded. 7194 if (N->hasOneUse() && 7195 N->use_begin()->getOpcode() == ISD::FP_ROUND) 7196 return SDValue(); 7197 7198 // fold (fp_extend c1fp) -> c1fp 7199 if (N0CFP) 7200 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); 7201 7202 // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the 7203 // value of X. 7204 if (N0.getOpcode() == ISD::FP_ROUND 7205 && N0.getNode()->getConstantOperandVal(1) == 1) { 7206 SDValue In = N0.getOperand(0); 7207 if (In.getValueType() == VT) return In; 7208 if (VT.bitsLT(In.getValueType())) 7209 return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, 7210 In, N0.getOperand(1)); 7211 return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); 7212 } 7213 7214 // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) 7215 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 7216 ((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) || 7217 TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { 7218 LoadSDNode *LN0 = cast<LoadSDNode>(N0); 7219 SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, 7220 LN0->getChain(), 7221 LN0->getBasePtr(), N0.getValueType(), 7222 LN0->getMemOperand()); 7223 CombineTo(N, ExtLoad); 7224 CombineTo(N0.getNode(), 7225 DAG.getNode(ISD::FP_ROUND, SDLoc(N0), 7226 N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), 7227 ExtLoad.getValue(1)); 7228 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7229 } 7230 7231 return SDValue(); 7232 } 7233 7234 SDValue DAGCombiner::visitFNEG(SDNode *N) { 7235 SDValue N0 = N->getOperand(0); 7236 EVT VT = N->getValueType(0); 7237 7238 if (VT.isVector()) { 7239 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7240 if (FoldedVOp.getNode()) return FoldedVOp; 7241 } 7242 7243 if (isNegatibleForFree(N0, LegalOperations, DAG.getTargetLoweringInfo(), 7244 &DAG.getTarget().Options)) 7245 return GetNegatedExpression(N0, DAG, LegalOperations); 7246 7247 // Transform fneg(bitconvert(x)) -> bitconvert(x^sign) to avoid loading 7248 // constant pool values. 7249 if (!TLI.isFNegFree(VT) && N0.getOpcode() == ISD::BITCAST && 7250 !VT.isVector() && 7251 N0.getNode()->hasOneUse() && 7252 N0.getOperand(0).getValueType().isInteger()) { 7253 SDValue Int = N0.getOperand(0); 7254 EVT IntVT = Int.getValueType(); 7255 if (IntVT.isInteger() && !IntVT.isVector()) { 7256 Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, 7257 DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 7258 AddToWorkList(Int.getNode()); 7259 return DAG.getNode(ISD::BITCAST, SDLoc(N), 7260 VT, Int); 7261 } 7262 } 7263 7264 // (fneg (fmul c, x)) -> (fmul -c, x) 7265 if (N0.getOpcode() == ISD::FMUL) { 7266 ConstantFPSDNode *CFP1 = dyn_cast<ConstantFPSDNode>(N0.getOperand(1)); 7267 if (CFP1) { 7268 APFloat CVal = CFP1->getValueAPF(); 7269 CVal.changeSign(); 7270 if (Level >= AfterLegalizeDAG && 7271 (TLI.isFPImmLegal(CVal, N->getValueType(0)) || 7272 TLI.isOperationLegal(ISD::ConstantFP, N->getValueType(0)))) 7273 return DAG.getNode( 7274 ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), 7275 DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); 7276 } 7277 } 7278 7279 return SDValue(); 7280 } 7281 7282 SDValue DAGCombiner::visitFCEIL(SDNode *N) { 7283 SDValue N0 = N->getOperand(0); 7284 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7285 EVT VT = N->getValueType(0); 7286 7287 // fold (fceil c1) -> fceil(c1) 7288 if (N0CFP) 7289 return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); 7290 7291 return SDValue(); 7292 } 7293 7294 SDValue DAGCombiner::visitFTRUNC(SDNode *N) { 7295 SDValue N0 = N->getOperand(0); 7296 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7297 EVT VT = N->getValueType(0); 7298 7299 // fold (ftrunc c1) -> ftrunc(c1) 7300 if (N0CFP) 7301 return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); 7302 7303 return SDValue(); 7304 } 7305 7306 SDValue DAGCombiner::visitFFLOOR(SDNode *N) { 7307 SDValue N0 = N->getOperand(0); 7308 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7309 EVT VT = N->getValueType(0); 7310 7311 // fold (ffloor c1) -> ffloor(c1) 7312 if (N0CFP) 7313 return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); 7314 7315 return SDValue(); 7316 } 7317 7318 SDValue DAGCombiner::visitFABS(SDNode *N) { 7319 SDValue N0 = N->getOperand(0); 7320 ConstantFPSDNode *N0CFP = dyn_cast<ConstantFPSDNode>(N0); 7321 EVT VT = N->getValueType(0); 7322 7323 if (VT.isVector()) { 7324 SDValue FoldedVOp = SimplifyVUnaryOp(N); 7325 if (FoldedVOp.getNode()) return FoldedVOp; 7326 } 7327 7328 // fold (fabs c1) -> fabs(c1) 7329 if (N0CFP) 7330 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); 7331 // fold (fabs (fabs x)) -> (fabs x) 7332 if (N0.getOpcode() == ISD::FABS) 7333 return N->getOperand(0); 7334 // fold (fabs (fneg x)) -> (fabs x) 7335 // fold (fabs (fcopysign x, y)) -> (fabs x) 7336 if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) 7337 return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); 7338 7339 // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading 7340 // constant pool values. 7341 if (!TLI.isFAbsFree(VT) && 7342 N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && 7343 N0.getOperand(0).getValueType().isInteger() && 7344 !N0.getOperand(0).getValueType().isVector()) { 7345 SDValue Int = N0.getOperand(0); 7346 EVT IntVT = Int.getValueType(); 7347 if (IntVT.isInteger() && !IntVT.isVector()) { 7348 Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, 7349 DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); 7350 AddToWorkList(Int.getNode()); 7351 return DAG.getNode(ISD::BITCAST, SDLoc(N), 7352 N->getValueType(0), Int); 7353 } 7354 } 7355 7356 return SDValue(); 7357 } 7358 7359 SDValue DAGCombiner::visitBRCOND(SDNode *N) { 7360 SDValue Chain = N->getOperand(0); 7361 SDValue N1 = N->getOperand(1); 7362 SDValue N2 = N->getOperand(2); 7363 7364 // If N is a constant we could fold this into a fallthrough or unconditional 7365 // branch. However that doesn't happen very often in normal code, because 7366 // Instcombine/SimplifyCFG should have handled the available opportunities. 7367 // If we did this folding here, it would be necessary to update the 7368 // MachineBasicBlock CFG, which is awkward. 7369 7370 // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal 7371 // on the target. 7372 if (N1.getOpcode() == ISD::SETCC && 7373 TLI.isOperationLegalOrCustom(ISD::BR_CC, 7374 N1.getOperand(0).getValueType())) { 7375 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 7376 Chain, N1.getOperand(2), 7377 N1.getOperand(0), N1.getOperand(1), N2); 7378 } 7379 7380 if ((N1.hasOneUse() && N1.getOpcode() == ISD::SRL) || 7381 ((N1.getOpcode() == ISD::TRUNCATE && N1.hasOneUse()) && 7382 (N1.getOperand(0).hasOneUse() && 7383 N1.getOperand(0).getOpcode() == ISD::SRL))) { 7384 SDNode *Trunc = nullptr; 7385 if (N1.getOpcode() == ISD::TRUNCATE) { 7386 // Look pass the truncate. 7387 Trunc = N1.getNode(); 7388 N1 = N1.getOperand(0); 7389 } 7390 7391 // Match this pattern so that we can generate simpler code: 7392 // 7393 // %a = ... 7394 // %b = and i32 %a, 2 7395 // %c = srl i32 %b, 1 7396 // brcond i32 %c ... 7397 // 7398 // into 7399 // 7400 // %a = ... 7401 // %b = and i32 %a, 2 7402 // %c = setcc eq %b, 0 7403 // brcond %c ... 7404 // 7405 // This applies only when the AND constant value has one bit set and the 7406 // SRL constant is equal to the log2 of the AND constant. The back-end is 7407 // smart enough to convert the result into a TEST/JMP sequence. 7408 SDValue Op0 = N1.getOperand(0); 7409 SDValue Op1 = N1.getOperand(1); 7410 7411 if (Op0.getOpcode() == ISD::AND && 7412 Op1.getOpcode() == ISD::Constant) { 7413 SDValue AndOp1 = Op0.getOperand(1); 7414 7415 if (AndOp1.getOpcode() == ISD::Constant) { 7416 const APInt &AndConst = cast<ConstantSDNode>(AndOp1)->getAPIntValue(); 7417 7418 if (AndConst.isPowerOf2() && 7419 cast<ConstantSDNode>(Op1)->getAPIntValue()==AndConst.logBase2()) { 7420 SDValue SetCC = 7421 DAG.getSetCC(SDLoc(N), 7422 getSetCCResultType(Op0.getValueType()), 7423 Op0, DAG.getConstant(0, Op0.getValueType()), 7424 ISD::SETNE); 7425 7426 SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), 7427 MVT::Other, Chain, SetCC, N2); 7428 // Don't add the new BRCond into the worklist or else SimplifySelectCC 7429 // will convert it back to (X & C1) >> C2. 7430 CombineTo(N, NewBRCond, false); 7431 // Truncate is dead. 7432 if (Trunc) { 7433 removeFromWorkList(Trunc); 7434 DAG.DeleteNode(Trunc); 7435 } 7436 // Replace the uses of SRL with SETCC 7437 WorkListRemover DeadNodes(*this); 7438 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 7439 removeFromWorkList(N1.getNode()); 7440 DAG.DeleteNode(N1.getNode()); 7441 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7442 } 7443 } 7444 } 7445 7446 if (Trunc) 7447 // Restore N1 if the above transformation doesn't match. 7448 N1 = N->getOperand(1); 7449 } 7450 7451 // Transform br(xor(x, y)) -> br(x != y) 7452 // Transform br(xor(xor(x,y), 1)) -> br (x == y) 7453 if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) { 7454 SDNode *TheXor = N1.getNode(); 7455 SDValue Op0 = TheXor->getOperand(0); 7456 SDValue Op1 = TheXor->getOperand(1); 7457 if (Op0.getOpcode() == Op1.getOpcode()) { 7458 // Avoid missing important xor optimizations. 7459 SDValue Tmp = visitXOR(TheXor); 7460 if (Tmp.getNode()) { 7461 if (Tmp.getNode() != TheXor) { 7462 DEBUG(dbgs() << "\nReplacing.8 "; 7463 TheXor->dump(&DAG); 7464 dbgs() << "\nWith: "; 7465 Tmp.getNode()->dump(&DAG); 7466 dbgs() << '\n'); 7467 WorkListRemover DeadNodes(*this); 7468 DAG.ReplaceAllUsesOfValueWith(N1, Tmp); 7469 removeFromWorkList(TheXor); 7470 DAG.DeleteNode(TheXor); 7471 return DAG.getNode(ISD::BRCOND, SDLoc(N), 7472 MVT::Other, Chain, Tmp, N2); 7473 } 7474 7475 // visitXOR has changed XOR's operands or replaced the XOR completely, 7476 // bail out. 7477 return SDValue(N, 0); 7478 } 7479 } 7480 7481 if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) { 7482 bool Equal = false; 7483 if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0)) 7484 if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() && 7485 Op0.getOpcode() == ISD::XOR) { 7486 TheXor = Op0.getNode(); 7487 Equal = true; 7488 } 7489 7490 EVT SetCCVT = N1.getValueType(); 7491 if (LegalTypes) 7492 SetCCVT = getSetCCResultType(SetCCVT); 7493 SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), 7494 SetCCVT, 7495 Op0, Op1, 7496 Equal ? ISD::SETEQ : ISD::SETNE); 7497 // Replace the uses of XOR with SETCC 7498 WorkListRemover DeadNodes(*this); 7499 DAG.ReplaceAllUsesOfValueWith(N1, SetCC); 7500 removeFromWorkList(N1.getNode()); 7501 DAG.DeleteNode(N1.getNode()); 7502 return DAG.getNode(ISD::BRCOND, SDLoc(N), 7503 MVT::Other, Chain, SetCC, N2); 7504 } 7505 } 7506 7507 return SDValue(); 7508 } 7509 7510 // Operand List for BR_CC: Chain, CondCC, CondLHS, CondRHS, DestBB. 7511 // 7512 SDValue DAGCombiner::visitBR_CC(SDNode *N) { 7513 CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1)); 7514 SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3); 7515 7516 // If N is a constant we could fold this into a fallthrough or unconditional 7517 // branch. However that doesn't happen very often in normal code, because 7518 // Instcombine/SimplifyCFG should have handled the available opportunities. 7519 // If we did this folding here, it would be necessary to update the 7520 // MachineBasicBlock CFG, which is awkward. 7521 7522 // Use SimplifySetCC to simplify SETCC's. 7523 SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), 7524 CondLHS, CondRHS, CC->get(), SDLoc(N), 7525 false); 7526 if (Simp.getNode()) AddToWorkList(Simp.getNode()); 7527 7528 // fold to a simpler setcc 7529 if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) 7530 return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, 7531 N->getOperand(0), Simp.getOperand(2), 7532 Simp.getOperand(0), Simp.getOperand(1), 7533 N->getOperand(4)); 7534 7535 return SDValue(); 7536 } 7537 7538 /// canFoldInAddressingMode - Return true if 'Use' is a load or a store that 7539 /// uses N as its base pointer and that N may be folded in the load / store 7540 /// addressing mode. 7541 static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, 7542 SelectionDAG &DAG, 7543 const TargetLowering &TLI) { 7544 EVT VT; 7545 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(Use)) { 7546 if (LD->isIndexed() || LD->getBasePtr().getNode() != N) 7547 return false; 7548 VT = Use->getValueType(0); 7549 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(Use)) { 7550 if (ST->isIndexed() || ST->getBasePtr().getNode() != N) 7551 return false; 7552 VT = ST->getValue().getValueType(); 7553 } else 7554 return false; 7555 7556 TargetLowering::AddrMode AM; 7557 if (N->getOpcode() == ISD::ADD) { 7558 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7559 if (Offset) 7560 // [reg +/- imm] 7561 AM.BaseOffs = Offset->getSExtValue(); 7562 else 7563 // [reg +/- reg] 7564 AM.Scale = 1; 7565 } else if (N->getOpcode() == ISD::SUB) { 7566 ConstantSDNode *Offset = dyn_cast<ConstantSDNode>(N->getOperand(1)); 7567 if (Offset) 7568 // [reg +/- imm] 7569 AM.BaseOffs = -Offset->getSExtValue(); 7570 else 7571 // [reg +/- reg] 7572 AM.Scale = 1; 7573 } else 7574 return false; 7575 7576 return TLI.isLegalAddressingMode(AM, VT.getTypeForEVT(*DAG.getContext())); 7577 } 7578 7579 /// CombineToPreIndexedLoadStore - Try turning a load / store into a 7580 /// pre-indexed load / store when the base pointer is an add or subtract 7581 /// and it has other uses besides the load / store. After the 7582 /// transformation, the new indexed load / store has effectively folded 7583 /// the add / subtract in and all of its other uses are redirected to the 7584 /// new load / store. 7585 bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { 7586 if (Level < AfterLegalizeDAG) 7587 return false; 7588 7589 bool isLoad = true; 7590 SDValue Ptr; 7591 EVT VT; 7592 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7593 if (LD->isIndexed()) 7594 return false; 7595 VT = LD->getMemoryVT(); 7596 if (!TLI.isIndexedLoadLegal(ISD::PRE_INC, VT) && 7597 !TLI.isIndexedLoadLegal(ISD::PRE_DEC, VT)) 7598 return false; 7599 Ptr = LD->getBasePtr(); 7600 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7601 if (ST->isIndexed()) 7602 return false; 7603 VT = ST->getMemoryVT(); 7604 if (!TLI.isIndexedStoreLegal(ISD::PRE_INC, VT) && 7605 !TLI.isIndexedStoreLegal(ISD::PRE_DEC, VT)) 7606 return false; 7607 Ptr = ST->getBasePtr(); 7608 isLoad = false; 7609 } else { 7610 return false; 7611 } 7612 7613 // If the pointer is not an add/sub, or if it doesn't have multiple uses, bail 7614 // out. There is no reason to make this a preinc/predec. 7615 if ((Ptr.getOpcode() != ISD::ADD && Ptr.getOpcode() != ISD::SUB) || 7616 Ptr.getNode()->hasOneUse()) 7617 return false; 7618 7619 // Ask the target to do addressing mode selection. 7620 SDValue BasePtr; 7621 SDValue Offset; 7622 ISD::MemIndexedMode AM = ISD::UNINDEXED; 7623 if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) 7624 return false; 7625 7626 // Backends without true r+i pre-indexed forms may need to pass a 7627 // constant base with a variable offset so that constant coercion 7628 // will work with the patterns in canonical form. 7629 bool Swapped = false; 7630 if (isa<ConstantSDNode>(BasePtr)) { 7631 std::swap(BasePtr, Offset); 7632 Swapped = true; 7633 } 7634 7635 // Don't create a indexed load / store with zero offset. 7636 if (isa<ConstantSDNode>(Offset) && 7637 cast<ConstantSDNode>(Offset)->isNullValue()) 7638 return false; 7639 7640 // Try turning it into a pre-indexed load / store except when: 7641 // 1) The new base ptr is a frame index. 7642 // 2) If N is a store and the new base ptr is either the same as or is a 7643 // predecessor of the value being stored. 7644 // 3) Another use of old base ptr is a predecessor of N. If ptr is folded 7645 // that would create a cycle. 7646 // 4) All uses are load / store ops that use it as old base ptr. 7647 7648 // Check #1. Preinc'ing a frame index would require copying the stack pointer 7649 // (plus the implicit offset) to a register to preinc anyway. 7650 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 7651 return false; 7652 7653 // Check #2. 7654 if (!isLoad) { 7655 SDValue Val = cast<StoreSDNode>(N)->getValue(); 7656 if (Val == BasePtr || BasePtr.getNode()->isPredecessorOf(Val.getNode())) 7657 return false; 7658 } 7659 7660 // If the offset is a constant, there may be other adds of constants that 7661 // can be folded with this one. We should do this to avoid having to keep 7662 // a copy of the original base pointer. 7663 SmallVector<SDNode *, 16> OtherUses; 7664 if (isa<ConstantSDNode>(Offset)) 7665 for (SDNode *Use : BasePtr.getNode()->uses()) { 7666 if (Use == Ptr.getNode()) 7667 continue; 7668 7669 if (Use->isPredecessorOf(N)) 7670 continue; 7671 7672 if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { 7673 OtherUses.clear(); 7674 break; 7675 } 7676 7677 SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); 7678 if (Op1.getNode() == BasePtr.getNode()) 7679 std::swap(Op0, Op1); 7680 assert(Op0.getNode() == BasePtr.getNode() && 7681 "Use of ADD/SUB but not an operand"); 7682 7683 if (!isa<ConstantSDNode>(Op1)) { 7684 OtherUses.clear(); 7685 break; 7686 } 7687 7688 // FIXME: In some cases, we can be smarter about this. 7689 if (Op1.getValueType() != Offset.getValueType()) { 7690 OtherUses.clear(); 7691 break; 7692 } 7693 7694 OtherUses.push_back(Use); 7695 } 7696 7697 if (Swapped) 7698 std::swap(BasePtr, Offset); 7699 7700 // Now check for #3 and #4. 7701 bool RealUse = false; 7702 7703 // Caches for hasPredecessorHelper 7704 SmallPtrSet<const SDNode *, 32> Visited; 7705 SmallVector<const SDNode *, 16> Worklist; 7706 7707 for (SDNode *Use : Ptr.getNode()->uses()) { 7708 if (Use == N) 7709 continue; 7710 if (N->hasPredecessorHelper(Use, Visited, Worklist)) 7711 return false; 7712 7713 // If Ptr may be folded in addressing mode of other use, then it's 7714 // not profitable to do this transformation. 7715 if (!canFoldInAddressingMode(Ptr.getNode(), Use, DAG, TLI)) 7716 RealUse = true; 7717 } 7718 7719 if (!RealUse) 7720 return false; 7721 7722 SDValue Result; 7723 if (isLoad) 7724 Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 7725 BasePtr, Offset, AM); 7726 else 7727 Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 7728 BasePtr, Offset, AM); 7729 ++PreIndexedNodes; 7730 ++NodesCombined; 7731 DEBUG(dbgs() << "\nReplacing.4 "; 7732 N->dump(&DAG); 7733 dbgs() << "\nWith: "; 7734 Result.getNode()->dump(&DAG); 7735 dbgs() << '\n'); 7736 WorkListRemover DeadNodes(*this); 7737 if (isLoad) { 7738 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 7739 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 7740 } else { 7741 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 7742 } 7743 7744 // Finally, since the node is now dead, remove it from the graph. 7745 DAG.DeleteNode(N); 7746 7747 if (Swapped) 7748 std::swap(BasePtr, Offset); 7749 7750 // Replace other uses of BasePtr that can be updated to use Ptr 7751 for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { 7752 unsigned OffsetIdx = 1; 7753 if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) 7754 OffsetIdx = 0; 7755 assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == 7756 BasePtr.getNode() && "Expected BasePtr operand"); 7757 7758 // We need to replace ptr0 in the following expression: 7759 // x0 * offset0 + y0 * ptr0 = t0 7760 // knowing that 7761 // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) 7762 // 7763 // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the 7764 // indexed load/store and the expresion that needs to be re-written. 7765 // 7766 // Therefore, we have: 7767 // t0 = (x0 * offset0 - x1 * y0 * y1 *offset1) + (y0 * y1) * t1 7768 7769 ConstantSDNode *CN = 7770 cast<ConstantSDNode>(OtherUses[i]->getOperand(OffsetIdx)); 7771 int X0, X1, Y0, Y1; 7772 APInt Offset0 = CN->getAPIntValue(); 7773 APInt Offset1 = cast<ConstantSDNode>(Offset)->getAPIntValue(); 7774 7775 X0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) ? -1 : 1; 7776 Y0 = (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 0) ? -1 : 1; 7777 X1 = (AM == ISD::PRE_DEC && !Swapped) ? -1 : 1; 7778 Y1 = (AM == ISD::PRE_DEC && Swapped) ? -1 : 1; 7779 7780 unsigned Opcode = (Y0 * Y1 < 0) ? ISD::SUB : ISD::ADD; 7781 7782 APInt CNV = Offset0; 7783 if (X0 < 0) CNV = -CNV; 7784 if (X1 * Y0 * Y1 < 0) CNV = CNV + Offset1; 7785 else CNV = CNV - Offset1; 7786 7787 // We can now generate the new expression. 7788 SDValue NewOp1 = DAG.getConstant(CNV, CN->getValueType(0)); 7789 SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); 7790 7791 SDValue NewUse = DAG.getNode(Opcode, 7792 SDLoc(OtherUses[i]), 7793 OtherUses[i]->getValueType(0), NewOp1, NewOp2); 7794 DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); 7795 removeFromWorkList(OtherUses[i]); 7796 DAG.DeleteNode(OtherUses[i]); 7797 } 7798 7799 // Replace the uses of Ptr with uses of the updated base value. 7800 DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); 7801 removeFromWorkList(Ptr.getNode()); 7802 DAG.DeleteNode(Ptr.getNode()); 7803 7804 return true; 7805 } 7806 7807 /// CombineToPostIndexedLoadStore - Try to combine a load / store with a 7808 /// add / sub of the base pointer node into a post-indexed load / store. 7809 /// The transformation folded the add / subtract into the new indexed 7810 /// load / store effectively and all of its uses are redirected to the 7811 /// new load / store. 7812 bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { 7813 if (Level < AfterLegalizeDAG) 7814 return false; 7815 7816 bool isLoad = true; 7817 SDValue Ptr; 7818 EVT VT; 7819 if (LoadSDNode *LD = dyn_cast<LoadSDNode>(N)) { 7820 if (LD->isIndexed()) 7821 return false; 7822 VT = LD->getMemoryVT(); 7823 if (!TLI.isIndexedLoadLegal(ISD::POST_INC, VT) && 7824 !TLI.isIndexedLoadLegal(ISD::POST_DEC, VT)) 7825 return false; 7826 Ptr = LD->getBasePtr(); 7827 } else if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N)) { 7828 if (ST->isIndexed()) 7829 return false; 7830 VT = ST->getMemoryVT(); 7831 if (!TLI.isIndexedStoreLegal(ISD::POST_INC, VT) && 7832 !TLI.isIndexedStoreLegal(ISD::POST_DEC, VT)) 7833 return false; 7834 Ptr = ST->getBasePtr(); 7835 isLoad = false; 7836 } else { 7837 return false; 7838 } 7839 7840 if (Ptr.getNode()->hasOneUse()) 7841 return false; 7842 7843 for (SDNode *Op : Ptr.getNode()->uses()) { 7844 if (Op == N || 7845 (Op->getOpcode() != ISD::ADD && Op->getOpcode() != ISD::SUB)) 7846 continue; 7847 7848 SDValue BasePtr; 7849 SDValue Offset; 7850 ISD::MemIndexedMode AM = ISD::UNINDEXED; 7851 if (TLI.getPostIndexedAddressParts(N, Op, BasePtr, Offset, AM, DAG)) { 7852 // Don't create a indexed load / store with zero offset. 7853 if (isa<ConstantSDNode>(Offset) && 7854 cast<ConstantSDNode>(Offset)->isNullValue()) 7855 continue; 7856 7857 // Try turning it into a post-indexed load / store except when 7858 // 1) All uses are load / store ops that use it as base ptr (and 7859 // it may be folded as addressing mmode). 7860 // 2) Op must be independent of N, i.e. Op is neither a predecessor 7861 // nor a successor of N. Otherwise, if Op is folded that would 7862 // create a cycle. 7863 7864 if (isa<FrameIndexSDNode>(BasePtr) || isa<RegisterSDNode>(BasePtr)) 7865 continue; 7866 7867 // Check for #1. 7868 bool TryNext = false; 7869 for (SDNode *Use : BasePtr.getNode()->uses()) { 7870 if (Use == Ptr.getNode()) 7871 continue; 7872 7873 // If all the uses are load / store addresses, then don't do the 7874 // transformation. 7875 if (Use->getOpcode() == ISD::ADD || Use->getOpcode() == ISD::SUB){ 7876 bool RealUse = false; 7877 for (SDNode *UseUse : Use->uses()) { 7878 if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) 7879 RealUse = true; 7880 } 7881 7882 if (!RealUse) { 7883 TryNext = true; 7884 break; 7885 } 7886 } 7887 } 7888 7889 if (TryNext) 7890 continue; 7891 7892 // Check for #2 7893 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { 7894 SDValue Result = isLoad 7895 ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), 7896 BasePtr, Offset, AM) 7897 : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), 7898 BasePtr, Offset, AM); 7899 ++PostIndexedNodes; 7900 ++NodesCombined; 7901 DEBUG(dbgs() << "\nReplacing.5 "; 7902 N->dump(&DAG); 7903 dbgs() << "\nWith: "; 7904 Result.getNode()->dump(&DAG); 7905 dbgs() << '\n'); 7906 WorkListRemover DeadNodes(*this); 7907 if (isLoad) { 7908 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(0)); 7909 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Result.getValue(2)); 7910 } else { 7911 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Result.getValue(1)); 7912 } 7913 7914 // Finally, since the node is now dead, remove it from the graph. 7915 DAG.DeleteNode(N); 7916 7917 // Replace the uses of Use with uses of the updated base value. 7918 DAG.ReplaceAllUsesOfValueWith(SDValue(Op, 0), 7919 Result.getValue(isLoad ? 1 : 0)); 7920 removeFromWorkList(Op); 7921 DAG.DeleteNode(Op); 7922 return true; 7923 } 7924 } 7925 } 7926 7927 return false; 7928 } 7929 7930 SDValue DAGCombiner::visitLOAD(SDNode *N) { 7931 LoadSDNode *LD = cast<LoadSDNode>(N); 7932 SDValue Chain = LD->getChain(); 7933 SDValue Ptr = LD->getBasePtr(); 7934 7935 // If load is not volatile and there are no uses of the loaded value (and 7936 // the updated indexed value in case of indexed loads), change uses of the 7937 // chain value into uses of the chain input (i.e. delete the dead load). 7938 if (!LD->isVolatile()) { 7939 if (N->getValueType(1) == MVT::Other) { 7940 // Unindexed loads. 7941 if (!N->hasAnyUseOfValue(0)) { 7942 // It's not safe to use the two value CombineTo variant here. e.g. 7943 // v1, chain2 = load chain1, loc 7944 // v2, chain3 = load chain2, loc 7945 // v3 = add v2, c 7946 // Now we replace use of chain2 with chain1. This makes the second load 7947 // isomorphic to the one we are deleting, and thus makes this load live. 7948 DEBUG(dbgs() << "\nReplacing.6 "; 7949 N->dump(&DAG); 7950 dbgs() << "\nWith chain: "; 7951 Chain.getNode()->dump(&DAG); 7952 dbgs() << "\n"); 7953 WorkListRemover DeadNodes(*this); 7954 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 7955 7956 if (N->use_empty()) { 7957 removeFromWorkList(N); 7958 DAG.DeleteNode(N); 7959 } 7960 7961 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7962 } 7963 } else { 7964 // Indexed loads. 7965 assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?"); 7966 if (!N->hasAnyUseOfValue(0) && !N->hasAnyUseOfValue(1)) { 7967 SDValue Undef = DAG.getUNDEF(N->getValueType(0)); 7968 DEBUG(dbgs() << "\nReplacing.7 "; 7969 N->dump(&DAG); 7970 dbgs() << "\nWith: "; 7971 Undef.getNode()->dump(&DAG); 7972 dbgs() << " and 2 other values\n"); 7973 WorkListRemover DeadNodes(*this); 7974 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 0), Undef); 7975 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), 7976 DAG.getUNDEF(N->getValueType(1))); 7977 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 2), Chain); 7978 removeFromWorkList(N); 7979 DAG.DeleteNode(N); 7980 return SDValue(N, 0); // Return N so it doesn't get rechecked! 7981 } 7982 } 7983 } 7984 7985 // If this load is directly stored, replace the load value with the stored 7986 // value. 7987 // TODO: Handle store large -> read small portion. 7988 // TODO: Handle TRUNCSTORE/LOADEXT 7989 if (ISD::isNormalLoad(N) && !LD->isVolatile()) { 7990 if (ISD::isNON_TRUNCStore(Chain.getNode())) { 7991 StoreSDNode *PrevST = cast<StoreSDNode>(Chain); 7992 if (PrevST->getBasePtr() == Ptr && 7993 PrevST->getValue().getValueType() == N->getValueType(0)) 7994 return CombineTo(N, Chain.getOperand(1), Chain); 7995 } 7996 } 7997 7998 // Try to infer better alignment information than the load already has. 7999 if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { 8000 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 8001 if (Align > LD->getMemOperand()->getBaseAlignment()) { 8002 SDValue NewLoad = 8003 DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), 8004 LD->getValueType(0), 8005 Chain, Ptr, LD->getPointerInfo(), 8006 LD->getMemoryVT(), 8007 LD->isVolatile(), LD->isNonTemporal(), Align, 8008 LD->getTBAAInfo()); 8009 return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); 8010 } 8011 } 8012 } 8013 8014 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : 8015 TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); 8016 #ifndef NDEBUG 8017 if (CombinerAAOnlyFunc.getNumOccurrences() && 8018 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 8019 UseAA = false; 8020 #endif 8021 if (UseAA && LD->isUnindexed()) { 8022 // Walk up chain skipping non-aliasing memory nodes. 8023 SDValue BetterChain = FindBetterChain(N, Chain); 8024 8025 // If there is a better chain. 8026 if (Chain != BetterChain) { 8027 SDValue ReplLoad; 8028 8029 // Replace the chain to void dependency. 8030 if (LD->getExtensionType() == ISD::NON_EXTLOAD) { 8031 ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), 8032 BetterChain, Ptr, LD->getMemOperand()); 8033 } else { 8034 ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), 8035 LD->getValueType(0), 8036 BetterChain, Ptr, LD->getMemoryVT(), 8037 LD->getMemOperand()); 8038 } 8039 8040 // Create token factor to keep old chain connected. 8041 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 8042 MVT::Other, Chain, ReplLoad.getValue(1)); 8043 8044 // Make sure the new and old chains are cleaned up. 8045 AddToWorkList(Token.getNode()); 8046 8047 // Replace uses with load result and token factor. Don't add users 8048 // to work list. 8049 return CombineTo(N, ReplLoad.getValue(0), Token, false); 8050 } 8051 } 8052 8053 // Try transforming N to an indexed load. 8054 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 8055 return SDValue(N, 0); 8056 8057 // Try to slice up N to more direct loads if the slices are mapped to 8058 // different register banks or pairing can take place. 8059 if (SliceUpLoad(N)) 8060 return SDValue(N, 0); 8061 8062 return SDValue(); 8063 } 8064 8065 namespace { 8066 /// \brief Helper structure used to slice a load in smaller loads. 8067 /// Basically a slice is obtained from the following sequence: 8068 /// Origin = load Ty1, Base 8069 /// Shift = srl Ty1 Origin, CstTy Amount 8070 /// Inst = trunc Shift to Ty2 8071 /// 8072 /// Then, it will be rewriten into: 8073 /// Slice = load SliceTy, Base + SliceOffset 8074 /// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 8075 /// 8076 /// SliceTy is deduced from the number of bits that are actually used to 8077 /// build Inst. 8078 struct LoadedSlice { 8079 /// \brief Helper structure used to compute the cost of a slice. 8080 struct Cost { 8081 /// Are we optimizing for code size. 8082 bool ForCodeSize; 8083 /// Various cost. 8084 unsigned Loads; 8085 unsigned Truncates; 8086 unsigned CrossRegisterBanksCopies; 8087 unsigned ZExts; 8088 unsigned Shift; 8089 8090 Cost(bool ForCodeSize = false) 8091 : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), 8092 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} 8093 8094 /// \brief Get the cost of one isolated slice. 8095 Cost(const LoadedSlice &LS, bool ForCodeSize = false) 8096 : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), 8097 CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { 8098 EVT TruncType = LS.Inst->getValueType(0); 8099 EVT LoadedType = LS.getLoadedType(); 8100 if (TruncType != LoadedType && 8101 !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) 8102 ZExts = 1; 8103 } 8104 8105 /// \brief Account for slicing gain in the current cost. 8106 /// Slicing provide a few gains like removing a shift or a 8107 /// truncate. This method allows to grow the cost of the original 8108 /// load with the gain from this slice. 8109 void addSliceGain(const LoadedSlice &LS) { 8110 // Each slice saves a truncate. 8111 const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); 8112 if (!TLI.isTruncateFree(LS.Inst->getValueType(0), 8113 LS.Inst->getOperand(0).getValueType())) 8114 ++Truncates; 8115 // If there is a shift amount, this slice gets rid of it. 8116 if (LS.Shift) 8117 ++Shift; 8118 // If this slice can merge a cross register bank copy, account for it. 8119 if (LS.canMergeExpensiveCrossRegisterBankCopy()) 8120 ++CrossRegisterBanksCopies; 8121 } 8122 8123 Cost &operator+=(const Cost &RHS) { 8124 Loads += RHS.Loads; 8125 Truncates += RHS.Truncates; 8126 CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; 8127 ZExts += RHS.ZExts; 8128 Shift += RHS.Shift; 8129 return *this; 8130 } 8131 8132 bool operator==(const Cost &RHS) const { 8133 return Loads == RHS.Loads && Truncates == RHS.Truncates && 8134 CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && 8135 ZExts == RHS.ZExts && Shift == RHS.Shift; 8136 } 8137 8138 bool operator!=(const Cost &RHS) const { return !(*this == RHS); } 8139 8140 bool operator<(const Cost &RHS) const { 8141 // Assume cross register banks copies are as expensive as loads. 8142 // FIXME: Do we want some more target hooks? 8143 unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; 8144 unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; 8145 // Unless we are optimizing for code size, consider the 8146 // expensive operation first. 8147 if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) 8148 return ExpensiveOpsLHS < ExpensiveOpsRHS; 8149 return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < 8150 (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); 8151 } 8152 8153 bool operator>(const Cost &RHS) const { return RHS < *this; } 8154 8155 bool operator<=(const Cost &RHS) const { return !(RHS < *this); } 8156 8157 bool operator>=(const Cost &RHS) const { return !(*this < RHS); } 8158 }; 8159 // The last instruction that represent the slice. This should be a 8160 // truncate instruction. 8161 SDNode *Inst; 8162 // The original load instruction. 8163 LoadSDNode *Origin; 8164 // The right shift amount in bits from the original load. 8165 unsigned Shift; 8166 // The DAG from which Origin came from. 8167 // This is used to get some contextual information about legal types, etc. 8168 SelectionDAG *DAG; 8169 8170 LoadedSlice(SDNode *Inst = nullptr, LoadSDNode *Origin = nullptr, 8171 unsigned Shift = 0, SelectionDAG *DAG = nullptr) 8172 : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} 8173 8174 LoadedSlice(const LoadedSlice &LS) 8175 : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} 8176 8177 /// \brief Get the bits used in a chunk of bits \p BitWidth large. 8178 /// \return Result is \p BitWidth and has used bits set to 1 and 8179 /// not used bits set to 0. 8180 APInt getUsedBits() const { 8181 // Reproduce the trunc(lshr) sequence: 8182 // - Start from the truncated value. 8183 // - Zero extend to the desired bit width. 8184 // - Shift left. 8185 assert(Origin && "No original load to compare against."); 8186 unsigned BitWidth = Origin->getValueSizeInBits(0); 8187 assert(Inst && "This slice is not bound to an instruction"); 8188 assert(Inst->getValueSizeInBits(0) <= BitWidth && 8189 "Extracted slice is bigger than the whole type!"); 8190 APInt UsedBits(Inst->getValueSizeInBits(0), 0); 8191 UsedBits.setAllBits(); 8192 UsedBits = UsedBits.zext(BitWidth); 8193 UsedBits <<= Shift; 8194 return UsedBits; 8195 } 8196 8197 /// \brief Get the size of the slice to be loaded in bytes. 8198 unsigned getLoadedSize() const { 8199 unsigned SliceSize = getUsedBits().countPopulation(); 8200 assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); 8201 return SliceSize / 8; 8202 } 8203 8204 /// \brief Get the type that will be loaded for this slice. 8205 /// Note: This may not be the final type for the slice. 8206 EVT getLoadedType() const { 8207 assert(DAG && "Missing context"); 8208 LLVMContext &Ctxt = *DAG->getContext(); 8209 return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); 8210 } 8211 8212 /// \brief Get the alignment of the load used for this slice. 8213 unsigned getAlignment() const { 8214 unsigned Alignment = Origin->getAlignment(); 8215 unsigned Offset = getOffsetFromBase(); 8216 if (Offset != 0) 8217 Alignment = MinAlign(Alignment, Alignment + Offset); 8218 return Alignment; 8219 } 8220 8221 /// \brief Check if this slice can be rewritten with legal operations. 8222 bool isLegal() const { 8223 // An invalid slice is not legal. 8224 if (!Origin || !Inst || !DAG) 8225 return false; 8226 8227 // Offsets are for indexed load only, we do not handle that. 8228 if (Origin->getOffset().getOpcode() != ISD::UNDEF) 8229 return false; 8230 8231 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8232 8233 // Check that the type is legal. 8234 EVT SliceType = getLoadedType(); 8235 if (!TLI.isTypeLegal(SliceType)) 8236 return false; 8237 8238 // Check that the load is legal for this type. 8239 if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) 8240 return false; 8241 8242 // Check that the offset can be computed. 8243 // 1. Check its type. 8244 EVT PtrType = Origin->getBasePtr().getValueType(); 8245 if (PtrType == MVT::Untyped || PtrType.isExtended()) 8246 return false; 8247 8248 // 2. Check that it fits in the immediate. 8249 if (!TLI.isLegalAddImmediate(getOffsetFromBase())) 8250 return false; 8251 8252 // 3. Check that the computation is legal. 8253 if (!TLI.isOperationLegal(ISD::ADD, PtrType)) 8254 return false; 8255 8256 // Check that the zext is legal if it needs one. 8257 EVT TruncateType = Inst->getValueType(0); 8258 if (TruncateType != SliceType && 8259 !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) 8260 return false; 8261 8262 return true; 8263 } 8264 8265 /// \brief Get the offset in bytes of this slice in the original chunk of 8266 /// bits. 8267 /// \pre DAG != nullptr. 8268 uint64_t getOffsetFromBase() const { 8269 assert(DAG && "Missing context."); 8270 bool IsBigEndian = 8271 DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); 8272 assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); 8273 uint64_t Offset = Shift / 8; 8274 unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; 8275 assert(!(Origin->getValueSizeInBits(0) & 0x7) && 8276 "The size of the original loaded type is not a multiple of a" 8277 " byte."); 8278 // If Offset is bigger than TySizeInBytes, it means we are loading all 8279 // zeros. This should have been optimized before in the process. 8280 assert(TySizeInBytes > Offset && 8281 "Invalid shift amount for given loaded size"); 8282 if (IsBigEndian) 8283 Offset = TySizeInBytes - Offset - getLoadedSize(); 8284 return Offset; 8285 } 8286 8287 /// \brief Generate the sequence of instructions to load the slice 8288 /// represented by this object and redirect the uses of this slice to 8289 /// this new sequence of instructions. 8290 /// \pre this->Inst && this->Origin are valid Instructions and this 8291 /// object passed the legal check: LoadedSlice::isLegal returned true. 8292 /// \return The last instruction of the sequence used to load the slice. 8293 SDValue loadSlice() const { 8294 assert(Inst && Origin && "Unable to replace a non-existing slice."); 8295 const SDValue &OldBaseAddr = Origin->getBasePtr(); 8296 SDValue BaseAddr = OldBaseAddr; 8297 // Get the offset in that chunk of bytes w.r.t. the endianess. 8298 int64_t Offset = static_cast<int64_t>(getOffsetFromBase()); 8299 assert(Offset >= 0 && "Offset too big to fit in int64_t!"); 8300 if (Offset) { 8301 // BaseAddr = BaseAddr + Offset. 8302 EVT ArithType = BaseAddr.getValueType(); 8303 BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, 8304 DAG->getConstant(Offset, ArithType)); 8305 } 8306 8307 // Create the type of the loaded slice according to its size. 8308 EVT SliceType = getLoadedType(); 8309 8310 // Create the load for the slice. 8311 SDValue LastInst = DAG->getLoad( 8312 SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, 8313 Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), 8314 Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); 8315 // If the final type is not the same as the loaded type, this means that 8316 // we have to pad with zero. Create a zero extend for that. 8317 EVT FinalType = Inst->getValueType(0); 8318 if (SliceType != FinalType) 8319 LastInst = 8320 DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); 8321 return LastInst; 8322 } 8323 8324 /// \brief Check if this slice can be merged with an expensive cross register 8325 /// bank copy. E.g., 8326 /// i = load i32 8327 /// f = bitcast i32 i to float 8328 bool canMergeExpensiveCrossRegisterBankCopy() const { 8329 if (!Inst || !Inst->hasOneUse()) 8330 return false; 8331 SDNode *Use = *Inst->use_begin(); 8332 if (Use->getOpcode() != ISD::BITCAST) 8333 return false; 8334 assert(DAG && "Missing context"); 8335 const TargetLowering &TLI = DAG->getTargetLoweringInfo(); 8336 EVT ResVT = Use->getValueType(0); 8337 const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); 8338 const TargetRegisterClass *ArgRC = 8339 TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); 8340 if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) 8341 return false; 8342 8343 // At this point, we know that we perform a cross-register-bank copy. 8344 // Check if it is expensive. 8345 const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); 8346 // Assume bitcasts are cheap, unless both register classes do not 8347 // explicitly share a common sub class. 8348 if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) 8349 return false; 8350 8351 // Check if it will be merged with the load. 8352 // 1. Check the alignment constraint. 8353 unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( 8354 ResVT.getTypeForEVT(*DAG->getContext())); 8355 8356 if (RequiredAlignment > getAlignment()) 8357 return false; 8358 8359 // 2. Check that the load is a legal operation for that type. 8360 if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) 8361 return false; 8362 8363 // 3. Check that we do not have a zext in the way. 8364 if (Inst->getValueType(0) != getLoadedType()) 8365 return false; 8366 8367 return true; 8368 } 8369 }; 8370 } 8371 8372 /// \brief Check that all bits set in \p UsedBits form a dense region, i.e., 8373 /// \p UsedBits looks like 0..0 1..1 0..0. 8374 static bool areUsedBitsDense(const APInt &UsedBits) { 8375 // If all the bits are one, this is dense! 8376 if (UsedBits.isAllOnesValue()) 8377 return true; 8378 8379 // Get rid of the unused bits on the right. 8380 APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); 8381 // Get rid of the unused bits on the left. 8382 if (NarrowedUsedBits.countLeadingZeros()) 8383 NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); 8384 // Check that the chunk of bits is completely used. 8385 return NarrowedUsedBits.isAllOnesValue(); 8386 } 8387 8388 /// \brief Check whether or not \p First and \p Second are next to each other 8389 /// in memory. This means that there is no hole between the bits loaded 8390 /// by \p First and the bits loaded by \p Second. 8391 static bool areSlicesNextToEachOther(const LoadedSlice &First, 8392 const LoadedSlice &Second) { 8393 assert(First.Origin == Second.Origin && First.Origin && 8394 "Unable to match different memory origins."); 8395 APInt UsedBits = First.getUsedBits(); 8396 assert((UsedBits & Second.getUsedBits()) == 0 && 8397 "Slices are not supposed to overlap."); 8398 UsedBits |= Second.getUsedBits(); 8399 return areUsedBitsDense(UsedBits); 8400 } 8401 8402 /// \brief Adjust the \p GlobalLSCost according to the target 8403 /// paring capabilities and the layout of the slices. 8404 /// \pre \p GlobalLSCost should account for at least as many loads as 8405 /// there is in the slices in \p LoadedSlices. 8406 static void adjustCostForPairing(SmallVectorImpl<LoadedSlice> &LoadedSlices, 8407 LoadedSlice::Cost &GlobalLSCost) { 8408 unsigned NumberOfSlices = LoadedSlices.size(); 8409 // If there is less than 2 elements, no pairing is possible. 8410 if (NumberOfSlices < 2) 8411 return; 8412 8413 // Sort the slices so that elements that are likely to be next to each 8414 // other in memory are next to each other in the list. 8415 std::sort(LoadedSlices.begin(), LoadedSlices.end(), 8416 [](const LoadedSlice &LHS, const LoadedSlice &RHS) { 8417 assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); 8418 return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); 8419 }); 8420 const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); 8421 // First (resp. Second) is the first (resp. Second) potentially candidate 8422 // to be placed in a paired load. 8423 const LoadedSlice *First = nullptr; 8424 const LoadedSlice *Second = nullptr; 8425 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, 8426 // Set the beginning of the pair. 8427 First = Second) { 8428 8429 Second = &LoadedSlices[CurrSlice]; 8430 8431 // If First is NULL, it means we start a new pair. 8432 // Get to the next slice. 8433 if (!First) 8434 continue; 8435 8436 EVT LoadedType = First->getLoadedType(); 8437 8438 // If the types of the slices are different, we cannot pair them. 8439 if (LoadedType != Second->getLoadedType()) 8440 continue; 8441 8442 // Check if the target supplies paired loads for this type. 8443 unsigned RequiredAlignment = 0; 8444 if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { 8445 // move to the next pair, this type is hopeless. 8446 Second = nullptr; 8447 continue; 8448 } 8449 // Check if we meet the alignment requirement. 8450 if (RequiredAlignment > First->getAlignment()) 8451 continue; 8452 8453 // Check that both loads are next to each other in memory. 8454 if (!areSlicesNextToEachOther(*First, *Second)) 8455 continue; 8456 8457 assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); 8458 --GlobalLSCost.Loads; 8459 // Move to the next pair. 8460 Second = nullptr; 8461 } 8462 } 8463 8464 /// \brief Check the profitability of all involved LoadedSlice. 8465 /// Currently, it is considered profitable if there is exactly two 8466 /// involved slices (1) which are (2) next to each other in memory, and 8467 /// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). 8468 /// 8469 /// Note: The order of the elements in \p LoadedSlices may be modified, but not 8470 /// the elements themselves. 8471 /// 8472 /// FIXME: When the cost model will be mature enough, we can relax 8473 /// constraints (1) and (2). 8474 static bool isSlicingProfitable(SmallVectorImpl<LoadedSlice> &LoadedSlices, 8475 const APInt &UsedBits, bool ForCodeSize) { 8476 unsigned NumberOfSlices = LoadedSlices.size(); 8477 if (StressLoadSlicing) 8478 return NumberOfSlices > 1; 8479 8480 // Check (1). 8481 if (NumberOfSlices != 2) 8482 return false; 8483 8484 // Check (2). 8485 if (!areUsedBitsDense(UsedBits)) 8486 return false; 8487 8488 // Check (3). 8489 LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); 8490 // The original code has one big load. 8491 OrigCost.Loads = 1; 8492 for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { 8493 const LoadedSlice &LS = LoadedSlices[CurrSlice]; 8494 // Accumulate the cost of all the slices. 8495 LoadedSlice::Cost SliceCost(LS, ForCodeSize); 8496 GlobalSlicingCost += SliceCost; 8497 8498 // Account as cost in the original configuration the gain obtained 8499 // with the current slices. 8500 OrigCost.addSliceGain(LS); 8501 } 8502 8503 // If the target supports paired load, adjust the cost accordingly. 8504 adjustCostForPairing(LoadedSlices, GlobalSlicingCost); 8505 return OrigCost > GlobalSlicingCost; 8506 } 8507 8508 /// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) 8509 /// operations, split it in the various pieces being extracted. 8510 /// 8511 /// This sort of thing is introduced by SROA. 8512 /// This slicing takes care not to insert overlapping loads. 8513 /// \pre LI is a simple load (i.e., not an atomic or volatile load). 8514 bool DAGCombiner::SliceUpLoad(SDNode *N) { 8515 if (Level < AfterLegalizeDAG) 8516 return false; 8517 8518 LoadSDNode *LD = cast<LoadSDNode>(N); 8519 if (LD->isVolatile() || !ISD::isNormalLoad(LD) || 8520 !LD->getValueType(0).isInteger()) 8521 return false; 8522 8523 // Keep track of already used bits to detect overlapping values. 8524 // In that case, we will just abort the transformation. 8525 APInt UsedBits(LD->getValueSizeInBits(0), 0); 8526 8527 SmallVector<LoadedSlice, 4> LoadedSlices; 8528 8529 // Check if this load is used as several smaller chunks of bits. 8530 // Basically, look for uses in trunc or trunc(lshr) and record a new chain 8531 // of computation for each trunc. 8532 for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); 8533 UI != UIEnd; ++UI) { 8534 // Skip the uses of the chain. 8535 if (UI.getUse().getResNo() != 0) 8536 continue; 8537 8538 SDNode *User = *UI; 8539 unsigned Shift = 0; 8540 8541 // Check if this is a trunc(lshr). 8542 if (User->getOpcode() == ISD::SRL && User->hasOneUse() && 8543 isa<ConstantSDNode>(User->getOperand(1))) { 8544 Shift = cast<ConstantSDNode>(User->getOperand(1))->getZExtValue(); 8545 User = *User->use_begin(); 8546 } 8547 8548 // At this point, User is a Truncate, iff we encountered, trunc or 8549 // trunc(lshr). 8550 if (User->getOpcode() != ISD::TRUNCATE) 8551 return false; 8552 8553 // The width of the type must be a power of 2 and greater than 8-bits. 8554 // Otherwise the load cannot be represented in LLVM IR. 8555 // Moreover, if we shifted with a non-8-bits multiple, the slice 8556 // will be across several bytes. We do not support that. 8557 unsigned Width = User->getValueSizeInBits(0); 8558 if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) 8559 return 0; 8560 8561 // Build the slice for this chain of computations. 8562 LoadedSlice LS(User, LD, Shift, &DAG); 8563 APInt CurrentUsedBits = LS.getUsedBits(); 8564 8565 // Check if this slice overlaps with another. 8566 if ((CurrentUsedBits & UsedBits) != 0) 8567 return false; 8568 // Update the bits used globally. 8569 UsedBits |= CurrentUsedBits; 8570 8571 // Check if the new slice would be legal. 8572 if (!LS.isLegal()) 8573 return false; 8574 8575 // Record the slice. 8576 LoadedSlices.push_back(LS); 8577 } 8578 8579 // Abort slicing if it does not seem to be profitable. 8580 if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) 8581 return false; 8582 8583 ++SlicedLoads; 8584 8585 // Rewrite each chain to use an independent load. 8586 // By construction, each chain can be represented by a unique load. 8587 8588 // Prepare the argument for the new token factor for all the slices. 8589 SmallVector<SDValue, 8> ArgChains; 8590 for (SmallVectorImpl<LoadedSlice>::const_iterator 8591 LSIt = LoadedSlices.begin(), 8592 LSItEnd = LoadedSlices.end(); 8593 LSIt != LSItEnd; ++LSIt) { 8594 SDValue SliceInst = LSIt->loadSlice(); 8595 CombineTo(LSIt->Inst, SliceInst, true); 8596 if (SliceInst.getNode()->getOpcode() != ISD::LOAD) 8597 SliceInst = SliceInst.getOperand(0); 8598 assert(SliceInst->getOpcode() == ISD::LOAD && 8599 "It takes more than a zext to get to the loaded slice!!"); 8600 ArgChains.push_back(SliceInst.getValue(1)); 8601 } 8602 8603 SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, 8604 ArgChains); 8605 DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); 8606 return true; 8607 } 8608 8609 /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the 8610 /// load is having specific bytes cleared out. If so, return the byte size 8611 /// being masked out and the shift amount. 8612 static std::pair<unsigned, unsigned> 8613 CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { 8614 std::pair<unsigned, unsigned> Result(0, 0); 8615 8616 // Check for the structure we're looking for. 8617 if (V->getOpcode() != ISD::AND || 8618 !isa<ConstantSDNode>(V->getOperand(1)) || 8619 !ISD::isNormalLoad(V->getOperand(0).getNode())) 8620 return Result; 8621 8622 // Check the chain and pointer. 8623 LoadSDNode *LD = cast<LoadSDNode>(V->getOperand(0)); 8624 if (LD->getBasePtr() != Ptr) return Result; // Not from same pointer. 8625 8626 // The store should be chained directly to the load or be an operand of a 8627 // tokenfactor. 8628 if (LD == Chain.getNode()) 8629 ; // ok. 8630 else if (Chain->getOpcode() != ISD::TokenFactor) 8631 return Result; // Fail. 8632 else { 8633 bool isOk = false; 8634 for (unsigned i = 0, e = Chain->getNumOperands(); i != e; ++i) 8635 if (Chain->getOperand(i).getNode() == LD) { 8636 isOk = true; 8637 break; 8638 } 8639 if (!isOk) return Result; 8640 } 8641 8642 // This only handles simple types. 8643 if (V.getValueType() != MVT::i16 && 8644 V.getValueType() != MVT::i32 && 8645 V.getValueType() != MVT::i64) 8646 return Result; 8647 8648 // Check the constant mask. Invert it so that the bits being masked out are 8649 // 0 and the bits being kept are 1. Use getSExtValue so that leading bits 8650 // follow the sign bit for uniformity. 8651 uint64_t NotMask = ~cast<ConstantSDNode>(V->getOperand(1))->getSExtValue(); 8652 unsigned NotMaskLZ = countLeadingZeros(NotMask); 8653 if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. 8654 unsigned NotMaskTZ = countTrailingZeros(NotMask); 8655 if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. 8656 if (NotMaskLZ == 64) return Result; // All zero mask. 8657 8658 // See if we have a continuous run of bits. If so, we have 0*1+0* 8659 if (CountTrailingOnes_64(NotMask >> NotMaskTZ)+NotMaskTZ+NotMaskLZ != 64) 8660 return Result; 8661 8662 // Adjust NotMaskLZ down to be from the actual size of the int instead of i64. 8663 if (V.getValueType() != MVT::i64 && NotMaskLZ) 8664 NotMaskLZ -= 64-V.getValueSizeInBits(); 8665 8666 unsigned MaskedBytes = (V.getValueSizeInBits()-NotMaskLZ-NotMaskTZ)/8; 8667 switch (MaskedBytes) { 8668 case 1: 8669 case 2: 8670 case 4: break; 8671 default: return Result; // All one mask, or 5-byte mask. 8672 } 8673 8674 // Verify that the first bit starts at a multiple of mask so that the access 8675 // is aligned the same as the access width. 8676 if (NotMaskTZ && NotMaskTZ/8 % MaskedBytes) return Result; 8677 8678 Result.first = MaskedBytes; 8679 Result.second = NotMaskTZ/8; 8680 return Result; 8681 } 8682 8683 8684 /// ShrinkLoadReplaceStoreWithStore - Check to see if IVal is something that 8685 /// provides a value as specified by MaskInfo. If so, replace the specified 8686 /// store with a narrower store of truncated IVal. 8687 static SDNode * 8688 ShrinkLoadReplaceStoreWithStore(const std::pair<unsigned, unsigned> &MaskInfo, 8689 SDValue IVal, StoreSDNode *St, 8690 DAGCombiner *DC) { 8691 unsigned NumBytes = MaskInfo.first; 8692 unsigned ByteShift = MaskInfo.second; 8693 SelectionDAG &DAG = DC->getDAG(); 8694 8695 // Check to see if IVal is all zeros in the part being masked in by the 'or' 8696 // that uses this. If not, this is not a replacement. 8697 APInt Mask = ~APInt::getBitsSet(IVal.getValueSizeInBits(), 8698 ByteShift*8, (ByteShift+NumBytes)*8); 8699 if (!DAG.MaskedValueIsZero(IVal, Mask)) return nullptr; 8700 8701 // Check that it is legal on the target to do this. It is legal if the new 8702 // VT we're shrinking to (i8/i16/i32) is legal or we're still before type 8703 // legalization. 8704 MVT VT = MVT::getIntegerVT(NumBytes*8); 8705 if (!DC->isTypeLegal(VT)) 8706 return nullptr; 8707 8708 // Okay, we can do this! Replace the 'St' store with a store of IVal that is 8709 // shifted by ByteShift and truncated down to NumBytes. 8710 if (ByteShift) 8711 IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, 8712 DAG.getConstant(ByteShift*8, 8713 DC->getShiftAmountTy(IVal.getValueType()))); 8714 8715 // Figure out the offset for the store and the alignment of the access. 8716 unsigned StOffset; 8717 unsigned NewAlign = St->getAlignment(); 8718 8719 if (DAG.getTargetLoweringInfo().isLittleEndian()) 8720 StOffset = ByteShift; 8721 else 8722 StOffset = IVal.getValueType().getStoreSize() - ByteShift - NumBytes; 8723 8724 SDValue Ptr = St->getBasePtr(); 8725 if (StOffset) { 8726 Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), 8727 Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); 8728 NewAlign = MinAlign(NewAlign, StOffset); 8729 } 8730 8731 // Truncate down to the new size. 8732 IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); 8733 8734 ++OpsNarrowed; 8735 return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, 8736 St->getPointerInfo().getWithOffset(StOffset), 8737 false, false, NewAlign).getNode(); 8738 } 8739 8740 8741 /// ReduceLoadOpStoreWidth - Look for sequence of load / op / store where op is 8742 /// one of 'or', 'xor', and 'and' of immediates. If 'op' is only touching some 8743 /// of the loaded bits, try narrowing the load and store if it would end up 8744 /// being a win for performance or code size. 8745 SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { 8746 StoreSDNode *ST = cast<StoreSDNode>(N); 8747 if (ST->isVolatile()) 8748 return SDValue(); 8749 8750 SDValue Chain = ST->getChain(); 8751 SDValue Value = ST->getValue(); 8752 SDValue Ptr = ST->getBasePtr(); 8753 EVT VT = Value.getValueType(); 8754 8755 if (ST->isTruncatingStore() || VT.isVector() || !Value.hasOneUse()) 8756 return SDValue(); 8757 8758 unsigned Opc = Value.getOpcode(); 8759 8760 // If this is "store (or X, Y), P" and X is "(and (load P), cst)", where cst 8761 // is a byte mask indicating a consecutive number of bytes, check to see if 8762 // Y is known to provide just those bytes. If so, we try to replace the 8763 // load + replace + store sequence with a single (narrower) store, which makes 8764 // the load dead. 8765 if (Opc == ISD::OR) { 8766 std::pair<unsigned, unsigned> MaskedLoad; 8767 MaskedLoad = CheckForMaskedLoad(Value.getOperand(0), Ptr, Chain); 8768 if (MaskedLoad.first) 8769 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 8770 Value.getOperand(1), ST,this)) 8771 return SDValue(NewST, 0); 8772 8773 // Or is commutative, so try swapping X and Y. 8774 MaskedLoad = CheckForMaskedLoad(Value.getOperand(1), Ptr, Chain); 8775 if (MaskedLoad.first) 8776 if (SDNode *NewST = ShrinkLoadReplaceStoreWithStore(MaskedLoad, 8777 Value.getOperand(0), ST,this)) 8778 return SDValue(NewST, 0); 8779 } 8780 8781 if ((Opc != ISD::OR && Opc != ISD::XOR && Opc != ISD::AND) || 8782 Value.getOperand(1).getOpcode() != ISD::Constant) 8783 return SDValue(); 8784 8785 SDValue N0 = Value.getOperand(0); 8786 if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && 8787 Chain == SDValue(N0.getNode(), 1)) { 8788 LoadSDNode *LD = cast<LoadSDNode>(N0); 8789 if (LD->getBasePtr() != Ptr || 8790 LD->getPointerInfo().getAddrSpace() != 8791 ST->getPointerInfo().getAddrSpace()) 8792 return SDValue(); 8793 8794 // Find the type to narrow it the load / op / store to. 8795 SDValue N1 = Value.getOperand(1); 8796 unsigned BitWidth = N1.getValueSizeInBits(); 8797 APInt Imm = cast<ConstantSDNode>(N1)->getAPIntValue(); 8798 if (Opc == ISD::AND) 8799 Imm ^= APInt::getAllOnesValue(BitWidth); 8800 if (Imm == 0 || Imm.isAllOnesValue()) 8801 return SDValue(); 8802 unsigned ShAmt = Imm.countTrailingZeros(); 8803 unsigned MSB = BitWidth - Imm.countLeadingZeros() - 1; 8804 unsigned NewBW = NextPowerOf2(MSB - ShAmt); 8805 EVT NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 8806 while (NewBW < BitWidth && 8807 !(TLI.isOperationLegalOrCustom(Opc, NewVT) && 8808 TLI.isNarrowingProfitable(VT, NewVT))) { 8809 NewBW = NextPowerOf2(NewBW); 8810 NewVT = EVT::getIntegerVT(*DAG.getContext(), NewBW); 8811 } 8812 if (NewBW >= BitWidth) 8813 return SDValue(); 8814 8815 // If the lsb changed does not start at the type bitwidth boundary, 8816 // start at the previous one. 8817 if (ShAmt % NewBW) 8818 ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; 8819 APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, 8820 std::min(BitWidth, ShAmt + NewBW)); 8821 if ((Imm & Mask) == Imm) { 8822 APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); 8823 if (Opc == ISD::AND) 8824 NewImm ^= APInt::getAllOnesValue(NewBW); 8825 uint64_t PtrOff = ShAmt / 8; 8826 // For big endian targets, we need to adjust the offset to the pointer to 8827 // load the correct bytes. 8828 if (TLI.isBigEndian()) 8829 PtrOff = (BitWidth + 7 - NewBW) / 8 - PtrOff; 8830 8831 unsigned NewAlign = MinAlign(LD->getAlignment(), PtrOff); 8832 Type *NewVTTy = NewVT.getTypeForEVT(*DAG.getContext()); 8833 if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) 8834 return SDValue(); 8835 8836 SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), 8837 Ptr.getValueType(), Ptr, 8838 DAG.getConstant(PtrOff, Ptr.getValueType())); 8839 SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), 8840 LD->getChain(), NewPtr, 8841 LD->getPointerInfo().getWithOffset(PtrOff), 8842 LD->isVolatile(), LD->isNonTemporal(), 8843 LD->isInvariant(), NewAlign, 8844 LD->getTBAAInfo()); 8845 SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, 8846 DAG.getConstant(NewImm, NewVT)); 8847 SDValue NewST = DAG.getStore(Chain, SDLoc(N), 8848 NewVal, NewPtr, 8849 ST->getPointerInfo().getWithOffset(PtrOff), 8850 false, false, NewAlign); 8851 8852 AddToWorkList(NewPtr.getNode()); 8853 AddToWorkList(NewLD.getNode()); 8854 AddToWorkList(NewVal.getNode()); 8855 WorkListRemover DeadNodes(*this); 8856 DAG.ReplaceAllUsesOfValueWith(N0.getValue(1), NewLD.getValue(1)); 8857 ++OpsNarrowed; 8858 return NewST; 8859 } 8860 } 8861 8862 return SDValue(); 8863 } 8864 8865 /// TransformFPLoadStorePair - For a given floating point load / store pair, 8866 /// if the load value isn't used by any other operations, then consider 8867 /// transforming the pair to integer load / store operations if the target 8868 /// deems the transformation profitable. 8869 SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { 8870 StoreSDNode *ST = cast<StoreSDNode>(N); 8871 SDValue Chain = ST->getChain(); 8872 SDValue Value = ST->getValue(); 8873 if (ISD::isNormalStore(ST) && ISD::isNormalLoad(Value.getNode()) && 8874 Value.hasOneUse() && 8875 Chain == SDValue(Value.getNode(), 1)) { 8876 LoadSDNode *LD = cast<LoadSDNode>(Value); 8877 EVT VT = LD->getMemoryVT(); 8878 if (!VT.isFloatingPoint() || 8879 VT != ST->getMemoryVT() || 8880 LD->isNonTemporal() || 8881 ST->isNonTemporal() || 8882 LD->getPointerInfo().getAddrSpace() != 0 || 8883 ST->getPointerInfo().getAddrSpace() != 0) 8884 return SDValue(); 8885 8886 EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); 8887 if (!TLI.isOperationLegal(ISD::LOAD, IntVT) || 8888 !TLI.isOperationLegal(ISD::STORE, IntVT) || 8889 !TLI.isDesirableToTransformToIntegerOp(ISD::LOAD, VT) || 8890 !TLI.isDesirableToTransformToIntegerOp(ISD::STORE, VT)) 8891 return SDValue(); 8892 8893 unsigned LDAlign = LD->getAlignment(); 8894 unsigned STAlign = ST->getAlignment(); 8895 Type *IntVTTy = IntVT.getTypeForEVT(*DAG.getContext()); 8896 unsigned ABIAlign = TLI.getDataLayout()->getABITypeAlignment(IntVTTy); 8897 if (LDAlign < ABIAlign || STAlign < ABIAlign) 8898 return SDValue(); 8899 8900 SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), 8901 LD->getChain(), LD->getBasePtr(), 8902 LD->getPointerInfo(), 8903 false, false, false, LDAlign); 8904 8905 SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), 8906 NewLD, ST->getBasePtr(), 8907 ST->getPointerInfo(), 8908 false, false, STAlign); 8909 8910 AddToWorkList(NewLD.getNode()); 8911 AddToWorkList(NewST.getNode()); 8912 WorkListRemover DeadNodes(*this); 8913 DAG.ReplaceAllUsesOfValueWith(Value.getValue(1), NewLD.getValue(1)); 8914 ++LdStFP2Int; 8915 return NewST; 8916 } 8917 8918 return SDValue(); 8919 } 8920 8921 /// Helper struct to parse and store a memory address as base + index + offset. 8922 /// We ignore sign extensions when it is safe to do so. 8923 /// The following two expressions are not equivalent. To differentiate we need 8924 /// to store whether there was a sign extension involved in the index 8925 /// computation. 8926 /// (load (i64 add (i64 copyfromreg %c) 8927 /// (i64 signextend (add (i8 load %index) 8928 /// (i8 1)))) 8929 /// vs 8930 /// 8931 /// (load (i64 add (i64 copyfromreg %c) 8932 /// (i64 signextend (i32 add (i32 signextend (i8 load %index)) 8933 /// (i32 1))))) 8934 struct BaseIndexOffset { 8935 SDValue Base; 8936 SDValue Index; 8937 int64_t Offset; 8938 bool IsIndexSignExt; 8939 8940 BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} 8941 8942 BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, 8943 bool IsIndexSignExt) : 8944 Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} 8945 8946 bool equalBaseIndex(const BaseIndexOffset &Other) { 8947 return Other.Base == Base && Other.Index == Index && 8948 Other.IsIndexSignExt == IsIndexSignExt; 8949 } 8950 8951 /// Parses tree in Ptr for base, index, offset addresses. 8952 static BaseIndexOffset match(SDValue Ptr) { 8953 bool IsIndexSignExt = false; 8954 8955 // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD 8956 // instruction, then it could be just the BASE or everything else we don't 8957 // know how to handle. Just use Ptr as BASE and give up. 8958 if (Ptr->getOpcode() != ISD::ADD) 8959 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 8960 8961 // We know that we have at least an ADD instruction. Try to pattern match 8962 // the simple case of BASE + OFFSET. 8963 if (isa<ConstantSDNode>(Ptr->getOperand(1))) { 8964 int64_t Offset = cast<ConstantSDNode>(Ptr->getOperand(1))->getSExtValue(); 8965 return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, 8966 IsIndexSignExt); 8967 } 8968 8969 // Inside a loop the current BASE pointer is calculated using an ADD and a 8970 // MUL instruction. In this case Ptr is the actual BASE pointer. 8971 // (i64 add (i64 %array_ptr) 8972 // (i64 mul (i64 %induction_var) 8973 // (i64 %element_size))) 8974 if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) 8975 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 8976 8977 // Look at Base + Index + Offset cases. 8978 SDValue Base = Ptr->getOperand(0); 8979 SDValue IndexOffset = Ptr->getOperand(1); 8980 8981 // Skip signextends. 8982 if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { 8983 IndexOffset = IndexOffset->getOperand(0); 8984 IsIndexSignExt = true; 8985 } 8986 8987 // Either the case of Base + Index (no offset) or something else. 8988 if (IndexOffset->getOpcode() != ISD::ADD) 8989 return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); 8990 8991 // Now we have the case of Base + Index + offset. 8992 SDValue Index = IndexOffset->getOperand(0); 8993 SDValue Offset = IndexOffset->getOperand(1); 8994 8995 if (!isa<ConstantSDNode>(Offset)) 8996 return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); 8997 8998 // Ignore signextends. 8999 if (Index->getOpcode() == ISD::SIGN_EXTEND) { 9000 Index = Index->getOperand(0); 9001 IsIndexSignExt = true; 9002 } else IsIndexSignExt = false; 9003 9004 int64_t Off = cast<ConstantSDNode>(Offset)->getSExtValue(); 9005 return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); 9006 } 9007 }; 9008 9009 /// Holds a pointer to an LSBaseSDNode as well as information on where it 9010 /// is located in a sequence of memory operations connected by a chain. 9011 struct MemOpLink { 9012 MemOpLink (LSBaseSDNode *N, int64_t Offset, unsigned Seq): 9013 MemNode(N), OffsetFromBase(Offset), SequenceNum(Seq) { } 9014 // Ptr to the mem node. 9015 LSBaseSDNode *MemNode; 9016 // Offset from the base ptr. 9017 int64_t OffsetFromBase; 9018 // What is the sequence number of this mem node. 9019 // Lowest mem operand in the DAG starts at zero. 9020 unsigned SequenceNum; 9021 }; 9022 9023 bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { 9024 EVT MemVT = St->getMemoryVT(); 9025 int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; 9026 bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). 9027 hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); 9028 9029 // Don't merge vectors into wider inputs. 9030 if (MemVT.isVector() || !MemVT.isSimple()) 9031 return false; 9032 9033 // Perform an early exit check. Do not bother looking at stored values that 9034 // are not constants or loads. 9035 SDValue StoredVal = St->getValue(); 9036 bool IsLoadSrc = isa<LoadSDNode>(StoredVal); 9037 if (!isa<ConstantSDNode>(StoredVal) && !isa<ConstantFPSDNode>(StoredVal) && 9038 !IsLoadSrc) 9039 return false; 9040 9041 // Only look at ends of store sequences. 9042 SDValue Chain = SDValue(St, 1); 9043 if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) 9044 return false; 9045 9046 // This holds the base pointer, index, and the offset in bytes from the base 9047 // pointer. 9048 BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); 9049 9050 // We must have a base and an offset. 9051 if (!BasePtr.Base.getNode()) 9052 return false; 9053 9054 // Do not handle stores to undef base pointers. 9055 if (BasePtr.Base.getOpcode() == ISD::UNDEF) 9056 return false; 9057 9058 // Save the LoadSDNodes that we find in the chain. 9059 // We need to make sure that these nodes do not interfere with 9060 // any of the store nodes. 9061 SmallVector<LSBaseSDNode*, 8> AliasLoadNodes; 9062 9063 // Save the StoreSDNodes that we find in the chain. 9064 SmallVector<MemOpLink, 8> StoreNodes; 9065 9066 // Walk up the chain and look for nodes with offsets from the same 9067 // base pointer. Stop when reaching an instruction with a different kind 9068 // or instruction which has a different base pointer. 9069 unsigned Seq = 0; 9070 StoreSDNode *Index = St; 9071 while (Index) { 9072 // If the chain has more than one use, then we can't reorder the mem ops. 9073 if (Index != St && !SDValue(Index, 1)->hasOneUse()) 9074 break; 9075 9076 // Find the base pointer and offset for this memory node. 9077 BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); 9078 9079 // Check that the base pointer is the same as the original one. 9080 if (!Ptr.equalBaseIndex(BasePtr)) 9081 break; 9082 9083 // Check that the alignment is the same. 9084 if (Index->getAlignment() != St->getAlignment()) 9085 break; 9086 9087 // The memory operands must not be volatile. 9088 if (Index->isVolatile() || Index->isIndexed()) 9089 break; 9090 9091 // No truncation. 9092 if (StoreSDNode *St = dyn_cast<StoreSDNode>(Index)) 9093 if (St->isTruncatingStore()) 9094 break; 9095 9096 // The stored memory type must be the same. 9097 if (Index->getMemoryVT() != MemVT) 9098 break; 9099 9100 // We do not allow unaligned stores because we want to prevent overriding 9101 // stores. 9102 if (Index->getAlignment()*8 != MemVT.getSizeInBits()) 9103 break; 9104 9105 // We found a potential memory operand to merge. 9106 StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); 9107 9108 // Find the next memory operand in the chain. If the next operand in the 9109 // chain is a store then move up and continue the scan with the next 9110 // memory operand. If the next operand is a load save it and use alias 9111 // information to check if it interferes with anything. 9112 SDNode *NextInChain = Index->getChain().getNode(); 9113 while (1) { 9114 if (StoreSDNode *STn = dyn_cast<StoreSDNode>(NextInChain)) { 9115 // We found a store node. Use it for the next iteration. 9116 Index = STn; 9117 break; 9118 } else if (LoadSDNode *Ldn = dyn_cast<LoadSDNode>(NextInChain)) { 9119 if (Ldn->isVolatile()) { 9120 Index = nullptr; 9121 break; 9122 } 9123 9124 // Save the load node for later. Continue the scan. 9125 AliasLoadNodes.push_back(Ldn); 9126 NextInChain = Ldn->getChain().getNode(); 9127 continue; 9128 } else { 9129 Index = nullptr; 9130 break; 9131 } 9132 } 9133 } 9134 9135 // Check if there is anything to merge. 9136 if (StoreNodes.size() < 2) 9137 return false; 9138 9139 // Sort the memory operands according to their distance from the base pointer. 9140 std::sort(StoreNodes.begin(), StoreNodes.end(), 9141 [](MemOpLink LHS, MemOpLink RHS) { 9142 return LHS.OffsetFromBase < RHS.OffsetFromBase || 9143 (LHS.OffsetFromBase == RHS.OffsetFromBase && 9144 LHS.SequenceNum > RHS.SequenceNum); 9145 }); 9146 9147 // Scan the memory operations on the chain and find the first non-consecutive 9148 // store memory address. 9149 unsigned LastConsecutiveStore = 0; 9150 int64_t StartAddress = StoreNodes[0].OffsetFromBase; 9151 for (unsigned i = 0, e = StoreNodes.size(); i < e; ++i) { 9152 9153 // Check that the addresses are consecutive starting from the second 9154 // element in the list of stores. 9155 if (i > 0) { 9156 int64_t CurrAddress = StoreNodes[i].OffsetFromBase; 9157 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9158 break; 9159 } 9160 9161 bool Alias = false; 9162 // Check if this store interferes with any of the loads that we found. 9163 for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) 9164 if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { 9165 Alias = true; 9166 break; 9167 } 9168 // We found a load that alias with this store. Stop the sequence. 9169 if (Alias) 9170 break; 9171 9172 // Mark this node as useful. 9173 LastConsecutiveStore = i; 9174 } 9175 9176 // The node with the lowest store address. 9177 LSBaseSDNode *FirstInChain = StoreNodes[0].MemNode; 9178 9179 // Store the constants into memory as one consecutive store. 9180 if (!IsLoadSrc) { 9181 unsigned LastLegalType = 0; 9182 unsigned LastLegalVectorType = 0; 9183 bool NonZero = false; 9184 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9185 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9186 SDValue StoredVal = St->getValue(); 9187 9188 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(StoredVal)) { 9189 NonZero |= !C->isNullValue(); 9190 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(StoredVal)) { 9191 NonZero |= !C->getConstantFPValue()->isNullValue(); 9192 } else { 9193 // Non-constant. 9194 break; 9195 } 9196 9197 // Find a legal type for the constant store. 9198 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9199 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9200 if (TLI.isTypeLegal(StoreTy)) 9201 LastLegalType = i+1; 9202 // Or check whether a truncstore is legal. 9203 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 9204 TargetLowering::TypePromoteInteger) { 9205 EVT LegalizedStoredValueTy = 9206 TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); 9207 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) 9208 LastLegalType = i+1; 9209 } 9210 9211 // Find a legal type for the vector store. 9212 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 9213 if (TLI.isTypeLegal(Ty)) 9214 LastLegalVectorType = i + 1; 9215 } 9216 9217 // We only use vectors if the constant is known to be zero and the 9218 // function is not marked with the noimplicitfloat attribute. 9219 if (NonZero || NoVectors) 9220 LastLegalVectorType = 0; 9221 9222 // Check if we found a legal integer type to store. 9223 if (LastLegalType == 0 && LastLegalVectorType == 0) 9224 return false; 9225 9226 bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; 9227 unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; 9228 9229 // Make sure we have something to merge. 9230 if (NumElem < 2) 9231 return false; 9232 9233 unsigned EarliestNodeUsed = 0; 9234 for (unsigned i=0; i < NumElem; ++i) { 9235 // Find a chain for the new wide-store operand. Notice that some 9236 // of the store nodes that we found may not be selected for inclusion 9237 // in the wide store. The chain we use needs to be the chain of the 9238 // earliest store node which is *used* and replaced by the wide store. 9239 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9240 EarliestNodeUsed = i; 9241 } 9242 9243 // The earliest Node in the DAG. 9244 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9245 SDLoc DL(StoreNodes[0].MemNode); 9246 9247 SDValue StoredVal; 9248 if (UseVector) { 9249 // Find a legal type for the vector store. 9250 EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 9251 assert(TLI.isTypeLegal(Ty) && "Illegal vector store"); 9252 StoredVal = DAG.getConstant(0, Ty); 9253 } else { 9254 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9255 APInt StoreInt(StoreBW, 0); 9256 9257 // Construct a single integer constant which is made of the smaller 9258 // constant inputs. 9259 bool IsLE = TLI.isLittleEndian(); 9260 for (unsigned i = 0; i < NumElem ; ++i) { 9261 unsigned Idx = IsLE ?(NumElem - 1 - i) : i; 9262 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[Idx].MemNode); 9263 SDValue Val = St->getValue(); 9264 StoreInt<<=ElementSizeBytes*8; 9265 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Val)) { 9266 StoreInt|=C->getAPIntValue().zext(StoreBW); 9267 } else if (ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Val)) { 9268 StoreInt|= C->getValueAPF().bitcastToAPInt().zext(StoreBW); 9269 } else { 9270 assert(false && "Invalid constant element type"); 9271 } 9272 } 9273 9274 // Create the new Load and Store operations. 9275 EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9276 StoredVal = DAG.getConstant(StoreInt, StoreTy); 9277 } 9278 9279 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), DL, StoredVal, 9280 FirstInChain->getBasePtr(), 9281 FirstInChain->getPointerInfo(), 9282 false, false, 9283 FirstInChain->getAlignment()); 9284 9285 // Replace the first store with the new store 9286 CombineTo(EarliestOp, NewStore); 9287 // Erase all other stores. 9288 for (unsigned i = 0; i < NumElem ; ++i) { 9289 if (StoreNodes[i].MemNode == EarliestOp) 9290 continue; 9291 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9292 // ReplaceAllUsesWith will replace all uses that existed when it was 9293 // called, but graph optimizations may cause new ones to appear. For 9294 // example, the case in pr14333 looks like 9295 // 9296 // St's chain -> St -> another store -> X 9297 // 9298 // And the only difference from St to the other store is the chain. 9299 // When we change it's chain to be St's chain they become identical, 9300 // get CSEed and the net result is that X is now a use of St. 9301 // Since we know that St is redundant, just iterate. 9302 while (!St->use_empty()) 9303 DAG.ReplaceAllUsesWith(SDValue(St, 0), St->getChain()); 9304 removeFromWorkList(St); 9305 DAG.DeleteNode(St); 9306 } 9307 9308 return true; 9309 } 9310 9311 // Below we handle the case of multiple consecutive stores that 9312 // come from multiple consecutive loads. We merge them into a single 9313 // wide load and a single wide store. 9314 9315 // Look for load nodes which are used by the stored values. 9316 SmallVector<MemOpLink, 8> LoadNodes; 9317 9318 // Find acceptable loads. Loads need to have the same chain (token factor), 9319 // must not be zext, volatile, indexed, and they must be consecutive. 9320 BaseIndexOffset LdBasePtr; 9321 for (unsigned i=0; i<LastConsecutiveStore+1; ++i) { 9322 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9323 LoadSDNode *Ld = dyn_cast<LoadSDNode>(St->getValue()); 9324 if (!Ld) break; 9325 9326 // Loads must only have one use. 9327 if (!Ld->hasNUsesOfValue(1, 0)) 9328 break; 9329 9330 // Check that the alignment is the same as the stores. 9331 if (Ld->getAlignment() != St->getAlignment()) 9332 break; 9333 9334 // The memory operands must not be volatile. 9335 if (Ld->isVolatile() || Ld->isIndexed()) 9336 break; 9337 9338 // We do not accept ext loads. 9339 if (Ld->getExtensionType() != ISD::NON_EXTLOAD) 9340 break; 9341 9342 // The stored memory type must be the same. 9343 if (Ld->getMemoryVT() != MemVT) 9344 break; 9345 9346 BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); 9347 // If this is not the first ptr that we check. 9348 if (LdBasePtr.Base.getNode()) { 9349 // The base ptr must be the same. 9350 if (!LdPtr.equalBaseIndex(LdBasePtr)) 9351 break; 9352 } else { 9353 // Check that all other base pointers are the same as this one. 9354 LdBasePtr = LdPtr; 9355 } 9356 9357 // We found a potential memory operand to merge. 9358 LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); 9359 } 9360 9361 if (LoadNodes.size() < 2) 9362 return false; 9363 9364 // Scan the memory operations on the chain and find the first non-consecutive 9365 // load memory address. These variables hold the index in the store node 9366 // array. 9367 unsigned LastConsecutiveLoad = 0; 9368 // This variable refers to the size and not index in the array. 9369 unsigned LastLegalVectorType = 0; 9370 unsigned LastLegalIntegerType = 0; 9371 StartAddress = LoadNodes[0].OffsetFromBase; 9372 SDValue FirstChain = LoadNodes[0].MemNode->getChain(); 9373 for (unsigned i = 1; i < LoadNodes.size(); ++i) { 9374 // All loads much share the same chain. 9375 if (LoadNodes[i].MemNode->getChain() != FirstChain) 9376 break; 9377 9378 int64_t CurrAddress = LoadNodes[i].OffsetFromBase; 9379 if (CurrAddress - StartAddress != (ElementSizeBytes * i)) 9380 break; 9381 LastConsecutiveLoad = i; 9382 9383 // Find a legal type for the vector store. 9384 EVT StoreTy = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); 9385 if (TLI.isTypeLegal(StoreTy)) 9386 LastLegalVectorType = i + 1; 9387 9388 // Find a legal type for the integer store. 9389 unsigned StoreBW = (i+1) * ElementSizeBytes * 8; 9390 StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9391 if (TLI.isTypeLegal(StoreTy)) 9392 LastLegalIntegerType = i + 1; 9393 // Or check whether a truncstore and extload is legal. 9394 else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == 9395 TargetLowering::TypePromoteInteger) { 9396 EVT LegalizedStoredValueTy = 9397 TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); 9398 if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && 9399 TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && 9400 TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && 9401 TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) 9402 LastLegalIntegerType = i+1; 9403 } 9404 } 9405 9406 // Only use vector types if the vector type is larger than the integer type. 9407 // If they are the same, use integers. 9408 bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; 9409 unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); 9410 9411 // We add +1 here because the LastXXX variables refer to location while 9412 // the NumElem refers to array/index size. 9413 unsigned NumElem = std::min(LastConsecutiveStore, LastConsecutiveLoad) + 1; 9414 NumElem = std::min(LastLegalType, NumElem); 9415 9416 if (NumElem < 2) 9417 return false; 9418 9419 // The earliest Node in the DAG. 9420 unsigned EarliestNodeUsed = 0; 9421 LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; 9422 for (unsigned i=1; i<NumElem; ++i) { 9423 // Find a chain for the new wide-store operand. Notice that some 9424 // of the store nodes that we found may not be selected for inclusion 9425 // in the wide store. The chain we use needs to be the chain of the 9426 // earliest store node which is *used* and replaced by the wide store. 9427 if (StoreNodes[i].SequenceNum > StoreNodes[EarliestNodeUsed].SequenceNum) 9428 EarliestNodeUsed = i; 9429 } 9430 9431 // Find if it is better to use vectors or integers to load and store 9432 // to memory. 9433 EVT JointMemOpVT; 9434 if (UseVectorTy) { 9435 JointMemOpVT = EVT::getVectorVT(*DAG.getContext(), MemVT, NumElem); 9436 } else { 9437 unsigned StoreBW = NumElem * ElementSizeBytes * 8; 9438 JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); 9439 } 9440 9441 SDLoc LoadDL(LoadNodes[0].MemNode); 9442 SDLoc StoreDL(StoreNodes[0].MemNode); 9443 9444 LoadSDNode *FirstLoad = cast<LoadSDNode>(LoadNodes[0].MemNode); 9445 SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, 9446 FirstLoad->getChain(), 9447 FirstLoad->getBasePtr(), 9448 FirstLoad->getPointerInfo(), 9449 false, false, false, 9450 FirstLoad->getAlignment()); 9451 9452 SDValue NewStore = DAG.getStore(EarliestOp->getChain(), StoreDL, NewLoad, 9453 FirstInChain->getBasePtr(), 9454 FirstInChain->getPointerInfo(), false, false, 9455 FirstInChain->getAlignment()); 9456 9457 // Replace one of the loads with the new load. 9458 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[0].MemNode); 9459 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), 9460 SDValue(NewLoad.getNode(), 1)); 9461 9462 // Remove the rest of the load chains. 9463 for (unsigned i = 1; i < NumElem ; ++i) { 9464 // Replace all chain users of the old load nodes with the chain of the new 9465 // load node. 9466 LoadSDNode *Ld = cast<LoadSDNode>(LoadNodes[i].MemNode); 9467 DAG.ReplaceAllUsesOfValueWith(SDValue(Ld, 1), Ld->getChain()); 9468 } 9469 9470 // Replace the first store with the new store. 9471 CombineTo(EarliestOp, NewStore); 9472 // Erase all other stores. 9473 for (unsigned i = 0; i < NumElem ; ++i) { 9474 // Remove all Store nodes. 9475 if (StoreNodes[i].MemNode == EarliestOp) 9476 continue; 9477 StoreSDNode *St = cast<StoreSDNode>(StoreNodes[i].MemNode); 9478 DAG.ReplaceAllUsesOfValueWith(SDValue(St, 0), St->getChain()); 9479 removeFromWorkList(St); 9480 DAG.DeleteNode(St); 9481 } 9482 9483 return true; 9484 } 9485 9486 SDValue DAGCombiner::visitSTORE(SDNode *N) { 9487 StoreSDNode *ST = cast<StoreSDNode>(N); 9488 SDValue Chain = ST->getChain(); 9489 SDValue Value = ST->getValue(); 9490 SDValue Ptr = ST->getBasePtr(); 9491 9492 // If this is a store of a bit convert, store the input value if the 9493 // resultant store does not need a higher alignment than the original. 9494 if (Value.getOpcode() == ISD::BITCAST && !ST->isTruncatingStore() && 9495 ST->isUnindexed()) { 9496 unsigned OrigAlign = ST->getAlignment(); 9497 EVT SVT = Value.getOperand(0).getValueType(); 9498 unsigned Align = TLI.getDataLayout()-> 9499 getABITypeAlignment(SVT.getTypeForEVT(*DAG.getContext())); 9500 if (Align <= OrigAlign && 9501 ((!LegalOperations && !ST->isVolatile()) || 9502 TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) 9503 return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), 9504 Ptr, ST->getPointerInfo(), ST->isVolatile(), 9505 ST->isNonTemporal(), OrigAlign, 9506 ST->getTBAAInfo()); 9507 } 9508 9509 // Turn 'store undef, Ptr' -> nothing. 9510 if (Value.getOpcode() == ISD::UNDEF && ST->isUnindexed()) 9511 return Chain; 9512 9513 // Turn 'store float 1.0, Ptr' -> 'store int 0x12345678, Ptr' 9514 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(Value)) { 9515 // NOTE: If the original store is volatile, this transform must not increase 9516 // the number of stores. For example, on x86-32 an f64 can be stored in one 9517 // processor operation but an i64 (which is not legal) requires two. So the 9518 // transform should not be done in this case. 9519 if (Value.getOpcode() != ISD::TargetConstantFP) { 9520 SDValue Tmp; 9521 switch (CFP->getSimpleValueType(0).SimpleTy) { 9522 default: llvm_unreachable("Unknown FP type"); 9523 case MVT::f16: // We don't do this for these yet. 9524 case MVT::f80: 9525 case MVT::f128: 9526 case MVT::ppcf128: 9527 break; 9528 case MVT::f32: 9529 if ((isTypeLegal(MVT::i32) && !LegalOperations && !ST->isVolatile()) || 9530 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 9531 Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). 9532 bitcastToAPInt().getZExtValue(), MVT::i32); 9533 return DAG.getStore(Chain, SDLoc(N), Tmp, 9534 Ptr, ST->getMemOperand()); 9535 } 9536 break; 9537 case MVT::f64: 9538 if ((TLI.isTypeLegal(MVT::i64) && !LegalOperations && 9539 !ST->isVolatile()) || 9540 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { 9541 Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). 9542 getZExtValue(), MVT::i64); 9543 return DAG.getStore(Chain, SDLoc(N), Tmp, 9544 Ptr, ST->getMemOperand()); 9545 } 9546 9547 if (!ST->isVolatile() && 9548 TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { 9549 // Many FP stores are not made apparent until after legalize, e.g. for 9550 // argument passing. Since this is so common, custom legalize the 9551 // 64-bit integer store into two 32-bit stores. 9552 uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); 9553 SDValue Lo = DAG.getConstant(Val & 0xFFFFFFFF, MVT::i32); 9554 SDValue Hi = DAG.getConstant(Val >> 32, MVT::i32); 9555 if (TLI.isBigEndian()) std::swap(Lo, Hi); 9556 9557 unsigned Alignment = ST->getAlignment(); 9558 bool isVolatile = ST->isVolatile(); 9559 bool isNonTemporal = ST->isNonTemporal(); 9560 const MDNode *TBAAInfo = ST->getTBAAInfo(); 9561 9562 SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, 9563 Ptr, ST->getPointerInfo(), 9564 isVolatile, isNonTemporal, 9565 ST->getAlignment(), TBAAInfo); 9566 Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, 9567 DAG.getConstant(4, Ptr.getValueType())); 9568 Alignment = MinAlign(Alignment, 4U); 9569 SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, 9570 Ptr, ST->getPointerInfo().getWithOffset(4), 9571 isVolatile, isNonTemporal, 9572 Alignment, TBAAInfo); 9573 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, 9574 St0, St1); 9575 } 9576 9577 break; 9578 } 9579 } 9580 } 9581 9582 // Try to infer better alignment information than the store already has. 9583 if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { 9584 if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { 9585 if (Align > ST->getAlignment()) 9586 return DAG.getTruncStore(Chain, SDLoc(N), Value, 9587 Ptr, ST->getPointerInfo(), ST->getMemoryVT(), 9588 ST->isVolatile(), ST->isNonTemporal(), Align, 9589 ST->getTBAAInfo()); 9590 } 9591 } 9592 9593 // Try transforming a pair floating point load / store ops to integer 9594 // load / store ops. 9595 SDValue NewST = TransformFPLoadStorePair(N); 9596 if (NewST.getNode()) 9597 return NewST; 9598 9599 bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : 9600 TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); 9601 #ifndef NDEBUG 9602 if (CombinerAAOnlyFunc.getNumOccurrences() && 9603 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 9604 UseAA = false; 9605 #endif 9606 if (UseAA && ST->isUnindexed()) { 9607 // Walk up chain skipping non-aliasing memory nodes. 9608 SDValue BetterChain = FindBetterChain(N, Chain); 9609 9610 // If there is a better chain. 9611 if (Chain != BetterChain) { 9612 SDValue ReplStore; 9613 9614 // Replace the chain to avoid dependency. 9615 if (ST->isTruncatingStore()) { 9616 ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, 9617 ST->getMemoryVT(), ST->getMemOperand()); 9618 } else { 9619 ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, 9620 ST->getMemOperand()); 9621 } 9622 9623 // Create token to keep both nodes around. 9624 SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), 9625 MVT::Other, Chain, ReplStore); 9626 9627 // Make sure the new and old chains are cleaned up. 9628 AddToWorkList(Token.getNode()); 9629 9630 // Don't add users to work list. 9631 return CombineTo(N, Token, false); 9632 } 9633 } 9634 9635 // Try transforming N to an indexed store. 9636 if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) 9637 return SDValue(N, 0); 9638 9639 // FIXME: is there such a thing as a truncating indexed store? 9640 if (ST->isTruncatingStore() && ST->isUnindexed() && 9641 Value.getValueType().isInteger()) { 9642 // See if we can simplify the input to this truncstore with knowledge that 9643 // only the low bits are being used. For example: 9644 // "truncstore (or (shl x, 8), y), i8" -> "truncstore y, i8" 9645 SDValue Shorter = 9646 GetDemandedBits(Value, 9647 APInt::getLowBitsSet( 9648 Value.getValueType().getScalarType().getSizeInBits(), 9649 ST->getMemoryVT().getScalarType().getSizeInBits())); 9650 AddToWorkList(Value.getNode()); 9651 if (Shorter.getNode()) 9652 return DAG.getTruncStore(Chain, SDLoc(N), Shorter, 9653 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 9654 9655 // Otherwise, see if we can simplify the operation with 9656 // SimplifyDemandedBits, which only works if the value has a single use. 9657 if (SimplifyDemandedBits(Value, 9658 APInt::getLowBitsSet( 9659 Value.getValueType().getScalarType().getSizeInBits(), 9660 ST->getMemoryVT().getScalarType().getSizeInBits()))) 9661 return SDValue(N, 0); 9662 } 9663 9664 // If this is a load followed by a store to the same location, then the store 9665 // is dead/noop. 9666 if (LoadSDNode *Ld = dyn_cast<LoadSDNode>(Value)) { 9667 if (Ld->getBasePtr() == Ptr && ST->getMemoryVT() == Ld->getMemoryVT() && 9668 ST->isUnindexed() && !ST->isVolatile() && 9669 // There can't be any side effects between the load and store, such as 9670 // a call or store. 9671 Chain.reachesChainWithoutSideEffects(SDValue(Ld, 1))) { 9672 // The store is dead, remove it. 9673 return Chain; 9674 } 9675 } 9676 9677 // If this is an FP_ROUND or TRUNC followed by a store, fold this into a 9678 // truncating store. We can do this even if this is already a truncstore. 9679 if ((Value.getOpcode() == ISD::FP_ROUND || Value.getOpcode() == ISD::TRUNCATE) 9680 && Value.getNode()->hasOneUse() && ST->isUnindexed() && 9681 TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), 9682 ST->getMemoryVT())) { 9683 return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), 9684 Ptr, ST->getMemoryVT(), ST->getMemOperand()); 9685 } 9686 9687 // Only perform this optimization before the types are legal, because we 9688 // don't want to perform this optimization on every DAGCombine invocation. 9689 if (!LegalTypes) { 9690 bool EverChanged = false; 9691 9692 do { 9693 // There can be multiple store sequences on the same chain. 9694 // Keep trying to merge store sequences until we are unable to do so 9695 // or until we merge the last store on the chain. 9696 bool Changed = MergeConsecutiveStores(ST); 9697 EverChanged |= Changed; 9698 if (!Changed) break; 9699 } while (ST->getOpcode() != ISD::DELETED_NODE); 9700 9701 if (EverChanged) 9702 return SDValue(N, 0); 9703 } 9704 9705 return ReduceLoadOpStoreWidth(N); 9706 } 9707 9708 SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { 9709 SDValue InVec = N->getOperand(0); 9710 SDValue InVal = N->getOperand(1); 9711 SDValue EltNo = N->getOperand(2); 9712 SDLoc dl(N); 9713 9714 // If the inserted element is an UNDEF, just use the input vector. 9715 if (InVal.getOpcode() == ISD::UNDEF) 9716 return InVec; 9717 9718 EVT VT = InVec.getValueType(); 9719 9720 // If we can't generate a legal BUILD_VECTOR, exit 9721 if (LegalOperations && !TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) 9722 return SDValue(); 9723 9724 // Check that we know which element is being inserted 9725 if (!isa<ConstantSDNode>(EltNo)) 9726 return SDValue(); 9727 unsigned Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9728 9729 // Canonicalize insert_vector_elt dag nodes. 9730 // Example: 9731 // (insert_vector_elt (insert_vector_elt A, Idx0), Idx1) 9732 // -> (insert_vector_elt (insert_vector_elt A, Idx1), Idx0) 9733 // 9734 // Do this only if the child insert_vector node has one use; also 9735 // do this only if indices are both constants and Idx1 < Idx0. 9736 if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT && InVec.hasOneUse() 9737 && isa<ConstantSDNode>(InVec.getOperand(2))) { 9738 unsigned OtherElt = 9739 cast<ConstantSDNode>(InVec.getOperand(2))->getZExtValue(); 9740 if (Elt < OtherElt) { 9741 // Swap nodes. 9742 SDValue NewOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), VT, 9743 InVec.getOperand(0), InVal, EltNo); 9744 AddToWorkList(NewOp.getNode()); 9745 return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(InVec.getNode()), 9746 VT, NewOp, InVec.getOperand(1), InVec.getOperand(2)); 9747 } 9748 } 9749 9750 // Check that the operand is a BUILD_VECTOR (or UNDEF, which can essentially 9751 // be converted to a BUILD_VECTOR). Fill in the Ops vector with the 9752 // vector elements. 9753 SmallVector<SDValue, 8> Ops; 9754 // Do not combine these two vectors if the output vector will not replace 9755 // the input vector. 9756 if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { 9757 Ops.append(InVec.getNode()->op_begin(), 9758 InVec.getNode()->op_end()); 9759 } else if (InVec.getOpcode() == ISD::UNDEF) { 9760 unsigned NElts = VT.getVectorNumElements(); 9761 Ops.append(NElts, DAG.getUNDEF(InVal.getValueType())); 9762 } else { 9763 return SDValue(); 9764 } 9765 9766 // Insert the element 9767 if (Elt < Ops.size()) { 9768 // All the operands of BUILD_VECTOR must have the same type; 9769 // we enforce that here. 9770 EVT OpVT = Ops[0].getValueType(); 9771 if (InVal.getValueType() != OpVT) 9772 InVal = OpVT.bitsGT(InVal.getValueType()) ? 9773 DAG.getNode(ISD::ANY_EXTEND, dl, OpVT, InVal) : 9774 DAG.getNode(ISD::TRUNCATE, dl, OpVT, InVal); 9775 Ops[Elt] = InVal; 9776 } 9777 9778 // Return the new vector 9779 return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, Ops); 9780 } 9781 9782 SDValue DAGCombiner::ReplaceExtractVectorEltOfLoadWithNarrowedLoad( 9783 SDNode *EVE, EVT InVecVT, SDValue EltNo, LoadSDNode *OriginalLoad) { 9784 EVT ResultVT = EVE->getValueType(0); 9785 EVT VecEltVT = InVecVT.getVectorElementType(); 9786 unsigned Align = OriginalLoad->getAlignment(); 9787 unsigned NewAlign = TLI.getDataLayout()->getABITypeAlignment( 9788 VecEltVT.getTypeForEVT(*DAG.getContext())); 9789 9790 if (NewAlign > Align || !TLI.isOperationLegalOrCustom(ISD::LOAD, VecEltVT)) 9791 return SDValue(); 9792 9793 Align = NewAlign; 9794 9795 SDValue NewPtr = OriginalLoad->getBasePtr(); 9796 SDValue Offset; 9797 EVT PtrType = NewPtr.getValueType(); 9798 MachinePointerInfo MPI; 9799 if (auto *ConstEltNo = dyn_cast<ConstantSDNode>(EltNo)) { 9800 int Elt = ConstEltNo->getZExtValue(); 9801 unsigned PtrOff = VecEltVT.getSizeInBits() * Elt / 8; 9802 if (TLI.isBigEndian()) 9803 PtrOff = InVecVT.getSizeInBits() / 8 - PtrOff; 9804 Offset = DAG.getConstant(PtrOff, PtrType); 9805 MPI = OriginalLoad->getPointerInfo().getWithOffset(PtrOff); 9806 } else { 9807 Offset = DAG.getNode( 9808 ISD::MUL, SDLoc(EVE), EltNo.getValueType(), EltNo, 9809 DAG.getConstant(VecEltVT.getStoreSize(), EltNo.getValueType())); 9810 if (TLI.isBigEndian()) 9811 Offset = DAG.getNode( 9812 ISD::SUB, SDLoc(EVE), EltNo.getValueType(), 9813 DAG.getConstant(InVecVT.getStoreSize(), EltNo.getValueType()), Offset); 9814 MPI = OriginalLoad->getPointerInfo(); 9815 } 9816 NewPtr = DAG.getNode(ISD::ADD, SDLoc(EVE), PtrType, NewPtr, Offset); 9817 9818 // The replacement we need to do here is a little tricky: we need to 9819 // replace an extractelement of a load with a load. 9820 // Use ReplaceAllUsesOfValuesWith to do the replacement. 9821 // Note that this replacement assumes that the extractvalue is the only 9822 // use of the load; that's okay because we don't want to perform this 9823 // transformation in other cases anyway. 9824 SDValue Load; 9825 SDValue Chain; 9826 if (ResultVT.bitsGT(VecEltVT)) { 9827 // If the result type of vextract is wider than the load, then issue an 9828 // extending load instead. 9829 ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, VecEltVT) 9830 ? ISD::ZEXTLOAD 9831 : ISD::EXTLOAD; 9832 Load = DAG.getExtLoad(ExtType, SDLoc(EVE), ResultVT, OriginalLoad->getChain(), 9833 NewPtr, MPI, VecEltVT, OriginalLoad->isVolatile(), 9834 OriginalLoad->isNonTemporal(), Align, 9835 OriginalLoad->getTBAAInfo()); 9836 Chain = Load.getValue(1); 9837 } else { 9838 Load = DAG.getLoad( 9839 VecEltVT, SDLoc(EVE), OriginalLoad->getChain(), NewPtr, MPI, 9840 OriginalLoad->isVolatile(), OriginalLoad->isNonTemporal(), 9841 OriginalLoad->isInvariant(), Align, OriginalLoad->getTBAAInfo()); 9842 Chain = Load.getValue(1); 9843 if (ResultVT.bitsLT(VecEltVT)) 9844 Load = DAG.getNode(ISD::TRUNCATE, SDLoc(EVE), ResultVT, Load); 9845 else 9846 Load = DAG.getNode(ISD::BITCAST, SDLoc(EVE), ResultVT, Load); 9847 } 9848 WorkListRemover DeadNodes(*this); 9849 SDValue From[] = { SDValue(EVE, 0), SDValue(OriginalLoad, 1) }; 9850 SDValue To[] = { Load, Chain }; 9851 DAG.ReplaceAllUsesOfValuesWith(From, To, 2); 9852 // Since we're explicitly calling ReplaceAllUses, add the new node to the 9853 // worklist explicitly as well. 9854 AddToWorkList(Load.getNode()); 9855 AddUsersToWorkList(Load.getNode()); // Add users too 9856 // Make sure to revisit this node to clean it up; it will usually be dead. 9857 AddToWorkList(EVE); 9858 ++OpsNarrowed; 9859 return SDValue(EVE, 0); 9860 } 9861 9862 SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { 9863 // (vextract (scalar_to_vector val, 0) -> val 9864 SDValue InVec = N->getOperand(0); 9865 EVT VT = InVec.getValueType(); 9866 EVT NVT = N->getValueType(0); 9867 9868 if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR) { 9869 // Check if the result type doesn't match the inserted element type. A 9870 // SCALAR_TO_VECTOR may truncate the inserted element and the 9871 // EXTRACT_VECTOR_ELT may widen the extracted vector. 9872 SDValue InOp = InVec.getOperand(0); 9873 if (InOp.getValueType() != NVT) { 9874 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 9875 return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); 9876 } 9877 return InOp; 9878 } 9879 9880 SDValue EltNo = N->getOperand(1); 9881 bool ConstEltNo = isa<ConstantSDNode>(EltNo); 9882 9883 // Transform: (EXTRACT_VECTOR_ELT( VECTOR_SHUFFLE )) -> EXTRACT_VECTOR_ELT. 9884 // We only perform this optimization before the op legalization phase because 9885 // we may introduce new vector instructions which are not backed by TD 9886 // patterns. For example on AVX, extracting elements from a wide vector 9887 // without using extract_subvector. However, if we can find an underlying 9888 // scalar value, then we can always use that. 9889 if (InVec.getOpcode() == ISD::VECTOR_SHUFFLE 9890 && ConstEltNo) { 9891 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9892 int NumElem = VT.getVectorNumElements(); 9893 ShuffleVectorSDNode *SVOp = cast<ShuffleVectorSDNode>(InVec); 9894 // Find the new index to extract from. 9895 int OrigElt = SVOp->getMaskElt(Elt); 9896 9897 // Extracting an undef index is undef. 9898 if (OrigElt == -1) 9899 return DAG.getUNDEF(NVT); 9900 9901 // Select the right vector half to extract from. 9902 SDValue SVInVec; 9903 if (OrigElt < NumElem) { 9904 SVInVec = InVec->getOperand(0); 9905 } else { 9906 SVInVec = InVec->getOperand(1); 9907 OrigElt -= NumElem; 9908 } 9909 9910 if (SVInVec.getOpcode() == ISD::BUILD_VECTOR) { 9911 SDValue InOp = SVInVec.getOperand(OrigElt); 9912 if (InOp.getValueType() != NVT) { 9913 assert(InOp.getValueType().isInteger() && NVT.isInteger()); 9914 InOp = DAG.getSExtOrTrunc(InOp, SDLoc(SVInVec), NVT); 9915 } 9916 9917 return InOp; 9918 } 9919 9920 // FIXME: We should handle recursing on other vector shuffles and 9921 // scalar_to_vector here as well. 9922 9923 if (!LegalOperations) { 9924 EVT IndexTy = TLI.getVectorIdxTy(); 9925 return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, 9926 SVInVec, DAG.getConstant(OrigElt, IndexTy)); 9927 } 9928 } 9929 9930 bool BCNumEltsChanged = false; 9931 EVT ExtVT = VT.getVectorElementType(); 9932 EVT LVT = ExtVT; 9933 9934 // If the result of load has to be truncated, then it's not necessarily 9935 // profitable. 9936 if (NVT.bitsLT(LVT) && !TLI.isTruncateFree(LVT, NVT)) 9937 return SDValue(); 9938 9939 if (InVec.getOpcode() == ISD::BITCAST) { 9940 // Don't duplicate a load with other uses. 9941 if (!InVec.hasOneUse()) 9942 return SDValue(); 9943 9944 EVT BCVT = InVec.getOperand(0).getValueType(); 9945 if (!BCVT.isVector() || ExtVT.bitsGT(BCVT.getVectorElementType())) 9946 return SDValue(); 9947 if (VT.getVectorNumElements() != BCVT.getVectorNumElements()) 9948 BCNumEltsChanged = true; 9949 InVec = InVec.getOperand(0); 9950 ExtVT = BCVT.getVectorElementType(); 9951 } 9952 9953 // (vextract (vN[if]M load $addr), i) -> ([if]M load $addr + i * size) 9954 if (!LegalOperations && !ConstEltNo && InVec.hasOneUse() && 9955 ISD::isNormalLoad(InVec.getNode())) { 9956 SDValue Index = N->getOperand(1); 9957 if (LoadSDNode *OrigLoad = dyn_cast<LoadSDNode>(InVec)) 9958 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, Index, 9959 OrigLoad); 9960 } 9961 9962 // Perform only after legalization to ensure build_vector / vector_shuffle 9963 // optimizations have already been done. 9964 if (!LegalOperations) return SDValue(); 9965 9966 // (vextract (v4f32 load $addr), c) -> (f32 load $addr+c*size) 9967 // (vextract (v4f32 s2v (f32 load $addr)), c) -> (f32 load $addr+c*size) 9968 // (vextract (v4f32 shuffle (load $addr), <1,u,u,u>), 0) -> (f32 load $addr) 9969 9970 if (ConstEltNo) { 9971 int Elt = cast<ConstantSDNode>(EltNo)->getZExtValue(); 9972 9973 LoadSDNode *LN0 = nullptr; 9974 const ShuffleVectorSDNode *SVN = nullptr; 9975 if (ISD::isNormalLoad(InVec.getNode())) { 9976 LN0 = cast<LoadSDNode>(InVec); 9977 } else if (InVec.getOpcode() == ISD::SCALAR_TO_VECTOR && 9978 InVec.getOperand(0).getValueType() == ExtVT && 9979 ISD::isNormalLoad(InVec.getOperand(0).getNode())) { 9980 // Don't duplicate a load with other uses. 9981 if (!InVec.hasOneUse()) 9982 return SDValue(); 9983 9984 LN0 = cast<LoadSDNode>(InVec.getOperand(0)); 9985 } else if ((SVN = dyn_cast<ShuffleVectorSDNode>(InVec))) { 9986 // (vextract (vector_shuffle (load $addr), v2, <1, u, u, u>), 1) 9987 // => 9988 // (load $addr+1*size) 9989 9990 // Don't duplicate a load with other uses. 9991 if (!InVec.hasOneUse()) 9992 return SDValue(); 9993 9994 // If the bit convert changed the number of elements, it is unsafe 9995 // to examine the mask. 9996 if (BCNumEltsChanged) 9997 return SDValue(); 9998 9999 // Select the input vector, guarding against out of range extract vector. 10000 unsigned NumElems = VT.getVectorNumElements(); 10001 int Idx = (Elt > (int)NumElems) ? -1 : SVN->getMaskElt(Elt); 10002 InVec = (Idx < (int)NumElems) ? InVec.getOperand(0) : InVec.getOperand(1); 10003 10004 if (InVec.getOpcode() == ISD::BITCAST) { 10005 // Don't duplicate a load with other uses. 10006 if (!InVec.hasOneUse()) 10007 return SDValue(); 10008 10009 InVec = InVec.getOperand(0); 10010 } 10011 if (ISD::isNormalLoad(InVec.getNode())) { 10012 LN0 = cast<LoadSDNode>(InVec); 10013 Elt = (Idx < (int)NumElems) ? Idx : Idx - (int)NumElems; 10014 EltNo = DAG.getConstant(Elt, EltNo.getValueType()); 10015 } 10016 } 10017 10018 // Make sure we found a non-volatile load and the extractelement is 10019 // the only use. 10020 if (!LN0 || !LN0->hasNUsesOfValue(1,0) || LN0->isVolatile()) 10021 return SDValue(); 10022 10023 // If Idx was -1 above, Elt is going to be -1, so just return undef. 10024 if (Elt == -1) 10025 return DAG.getUNDEF(LVT); 10026 10027 return ReplaceExtractVectorEltOfLoadWithNarrowedLoad(N, VT, EltNo, LN0); 10028 } 10029 10030 return SDValue(); 10031 } 10032 10033 // Simplify (build_vec (ext )) to (bitcast (build_vec )) 10034 SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { 10035 // We perform this optimization post type-legalization because 10036 // the type-legalizer often scalarizes integer-promoted vectors. 10037 // Performing this optimization before may create bit-casts which 10038 // will be type-legalized to complex code sequences. 10039 // We perform this optimization only before the operation legalizer because we 10040 // may introduce illegal operations. 10041 if (Level != AfterLegalizeVectorOps && Level != AfterLegalizeTypes) 10042 return SDValue(); 10043 10044 unsigned NumInScalars = N->getNumOperands(); 10045 SDLoc dl(N); 10046 EVT VT = N->getValueType(0); 10047 10048 // Check to see if this is a BUILD_VECTOR of a bunch of values 10049 // which come from any_extend or zero_extend nodes. If so, we can create 10050 // a new BUILD_VECTOR using bit-casts which may enable other BUILD_VECTOR 10051 // optimizations. We do not handle sign-extend because we can't fill the sign 10052 // using shuffles. 10053 EVT SourceType = MVT::Other; 10054 bool AllAnyExt = true; 10055 10056 for (unsigned i = 0; i != NumInScalars; ++i) { 10057 SDValue In = N->getOperand(i); 10058 // Ignore undef inputs. 10059 if (In.getOpcode() == ISD::UNDEF) continue; 10060 10061 bool AnyExt = In.getOpcode() == ISD::ANY_EXTEND; 10062 bool ZeroExt = In.getOpcode() == ISD::ZERO_EXTEND; 10063 10064 // Abort if the element is not an extension. 10065 if (!ZeroExt && !AnyExt) { 10066 SourceType = MVT::Other; 10067 break; 10068 } 10069 10070 // The input is a ZeroExt or AnyExt. Check the original type. 10071 EVT InTy = In.getOperand(0).getValueType(); 10072 10073 // Check that all of the widened source types are the same. 10074 if (SourceType == MVT::Other) 10075 // First time. 10076 SourceType = InTy; 10077 else if (InTy != SourceType) { 10078 // Multiple income types. Abort. 10079 SourceType = MVT::Other; 10080 break; 10081 } 10082 10083 // Check if all of the extends are ANY_EXTENDs. 10084 AllAnyExt &= AnyExt; 10085 } 10086 10087 // In order to have valid types, all of the inputs must be extended from the 10088 // same source type and all of the inputs must be any or zero extend. 10089 // Scalar sizes must be a power of two. 10090 EVT OutScalarTy = VT.getScalarType(); 10091 bool ValidTypes = SourceType != MVT::Other && 10092 isPowerOf2_32(OutScalarTy.getSizeInBits()) && 10093 isPowerOf2_32(SourceType.getSizeInBits()); 10094 10095 // Create a new simpler BUILD_VECTOR sequence which other optimizations can 10096 // turn into a single shuffle instruction. 10097 if (!ValidTypes) 10098 return SDValue(); 10099 10100 bool isLE = TLI.isLittleEndian(); 10101 unsigned ElemRatio = OutScalarTy.getSizeInBits()/SourceType.getSizeInBits(); 10102 assert(ElemRatio > 1 && "Invalid element size ratio"); 10103 SDValue Filler = AllAnyExt ? DAG.getUNDEF(SourceType): 10104 DAG.getConstant(0, SourceType); 10105 10106 unsigned NewBVElems = ElemRatio * VT.getVectorNumElements(); 10107 SmallVector<SDValue, 8> Ops(NewBVElems, Filler); 10108 10109 // Populate the new build_vector 10110 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10111 SDValue Cast = N->getOperand(i); 10112 assert((Cast.getOpcode() == ISD::ANY_EXTEND || 10113 Cast.getOpcode() == ISD::ZERO_EXTEND || 10114 Cast.getOpcode() == ISD::UNDEF) && "Invalid cast opcode"); 10115 SDValue In; 10116 if (Cast.getOpcode() == ISD::UNDEF) 10117 In = DAG.getUNDEF(SourceType); 10118 else 10119 In = Cast->getOperand(0); 10120 unsigned Index = isLE ? (i * ElemRatio) : 10121 (i * ElemRatio + (ElemRatio - 1)); 10122 10123 assert(Index < Ops.size() && "Invalid index"); 10124 Ops[Index] = In; 10125 } 10126 10127 // The type of the new BUILD_VECTOR node. 10128 EVT VecVT = EVT::getVectorVT(*DAG.getContext(), SourceType, NewBVElems); 10129 assert(VecVT.getSizeInBits() == VT.getSizeInBits() && 10130 "Invalid vector size"); 10131 // Check if the new vector type is legal. 10132 if (!isTypeLegal(VecVT)) return SDValue(); 10133 10134 // Make the new BUILD_VECTOR. 10135 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, VecVT, Ops); 10136 10137 // The new BUILD_VECTOR node has the potential to be further optimized. 10138 AddToWorkList(BV.getNode()); 10139 // Bitcast to the desired type. 10140 return DAG.getNode(ISD::BITCAST, dl, VT, BV); 10141 } 10142 10143 SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { 10144 EVT VT = N->getValueType(0); 10145 10146 unsigned NumInScalars = N->getNumOperands(); 10147 SDLoc dl(N); 10148 10149 EVT SrcVT = MVT::Other; 10150 unsigned Opcode = ISD::DELETED_NODE; 10151 unsigned NumDefs = 0; 10152 10153 for (unsigned i = 0; i != NumInScalars; ++i) { 10154 SDValue In = N->getOperand(i); 10155 unsigned Opc = In.getOpcode(); 10156 10157 if (Opc == ISD::UNDEF) 10158 continue; 10159 10160 // If all scalar values are floats and converted from integers. 10161 if (Opcode == ISD::DELETED_NODE && 10162 (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { 10163 Opcode = Opc; 10164 } 10165 10166 if (Opc != Opcode) 10167 return SDValue(); 10168 10169 EVT InVT = In.getOperand(0).getValueType(); 10170 10171 // If all scalar values are typed differently, bail out. It's chosen to 10172 // simplify BUILD_VECTOR of integer types. 10173 if (SrcVT == MVT::Other) 10174 SrcVT = InVT; 10175 if (SrcVT != InVT) 10176 return SDValue(); 10177 NumDefs++; 10178 } 10179 10180 // If the vector has just one element defined, it's not worth to fold it into 10181 // a vectorized one. 10182 if (NumDefs < 2) 10183 return SDValue(); 10184 10185 assert((Opcode == ISD::UINT_TO_FP || Opcode == ISD::SINT_TO_FP) 10186 && "Should only handle conversion from integer to float."); 10187 assert(SrcVT != MVT::Other && "Cannot determine source type!"); 10188 10189 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); 10190 10191 if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) 10192 return SDValue(); 10193 10194 SmallVector<SDValue, 8> Opnds; 10195 for (unsigned i = 0; i != NumInScalars; ++i) { 10196 SDValue In = N->getOperand(i); 10197 10198 if (In.getOpcode() == ISD::UNDEF) 10199 Opnds.push_back(DAG.getUNDEF(SrcVT)); 10200 else 10201 Opnds.push_back(In.getOperand(0)); 10202 } 10203 SDValue BV = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Opnds); 10204 AddToWorkList(BV.getNode()); 10205 10206 return DAG.getNode(Opcode, dl, VT, BV); 10207 } 10208 10209 SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { 10210 unsigned NumInScalars = N->getNumOperands(); 10211 SDLoc dl(N); 10212 EVT VT = N->getValueType(0); 10213 10214 // A vector built entirely of undefs is undef. 10215 if (ISD::allOperandsUndef(N)) 10216 return DAG.getUNDEF(VT); 10217 10218 SDValue V = reduceBuildVecExtToExtBuildVec(N); 10219 if (V.getNode()) 10220 return V; 10221 10222 V = reduceBuildVecConvertToConvertBuildVec(N); 10223 if (V.getNode()) 10224 return V; 10225 10226 // Check to see if this is a BUILD_VECTOR of a bunch of EXTRACT_VECTOR_ELT 10227 // operations. If so, and if the EXTRACT_VECTOR_ELT vector inputs come from 10228 // at most two distinct vectors, turn this into a shuffle node. 10229 10230 // May only combine to shuffle after legalize if shuffle is legal. 10231 if (LegalOperations && 10232 !TLI.isOperationLegalOrCustom(ISD::VECTOR_SHUFFLE, VT)) 10233 return SDValue(); 10234 10235 SDValue VecIn1, VecIn2; 10236 for (unsigned i = 0; i != NumInScalars; ++i) { 10237 // Ignore undef inputs. 10238 if (N->getOperand(i).getOpcode() == ISD::UNDEF) continue; 10239 10240 // If this input is something other than a EXTRACT_VECTOR_ELT with a 10241 // constant index, bail out. 10242 if (N->getOperand(i).getOpcode() != ISD::EXTRACT_VECTOR_ELT || 10243 !isa<ConstantSDNode>(N->getOperand(i).getOperand(1))) { 10244 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10245 break; 10246 } 10247 10248 // We allow up to two distinct input vectors. 10249 SDValue ExtractedFromVec = N->getOperand(i).getOperand(0); 10250 if (ExtractedFromVec == VecIn1 || ExtractedFromVec == VecIn2) 10251 continue; 10252 10253 if (!VecIn1.getNode()) { 10254 VecIn1 = ExtractedFromVec; 10255 } else if (!VecIn2.getNode()) { 10256 VecIn2 = ExtractedFromVec; 10257 } else { 10258 // Too many inputs. 10259 VecIn1 = VecIn2 = SDValue(nullptr, 0); 10260 break; 10261 } 10262 } 10263 10264 // If everything is good, we can make a shuffle operation. 10265 if (VecIn1.getNode()) { 10266 SmallVector<int, 8> Mask; 10267 for (unsigned i = 0; i != NumInScalars; ++i) { 10268 if (N->getOperand(i).getOpcode() == ISD::UNDEF) { 10269 Mask.push_back(-1); 10270 continue; 10271 } 10272 10273 // If extracting from the first vector, just use the index directly. 10274 SDValue Extract = N->getOperand(i); 10275 SDValue ExtVal = Extract.getOperand(1); 10276 if (Extract.getOperand(0) == VecIn1) { 10277 unsigned ExtIndex = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 10278 if (ExtIndex > VT.getVectorNumElements()) 10279 return SDValue(); 10280 10281 Mask.push_back(ExtIndex); 10282 continue; 10283 } 10284 10285 // Otherwise, use InIdx + VecSize 10286 unsigned Idx = cast<ConstantSDNode>(ExtVal)->getZExtValue(); 10287 Mask.push_back(Idx+NumInScalars); 10288 } 10289 10290 // We can't generate a shuffle node with mismatched input and output types. 10291 // Attempt to transform a single input vector to the correct type. 10292 if ((VT != VecIn1.getValueType())) { 10293 // We don't support shuffeling between TWO values of different types. 10294 if (VecIn2.getNode()) 10295 return SDValue(); 10296 10297 // We only support widening of vectors which are half the size of the 10298 // output registers. For example XMM->YMM widening on X86 with AVX. 10299 if (VecIn1.getValueType().getSizeInBits()*2 != VT.getSizeInBits()) 10300 return SDValue(); 10301 10302 // If the input vector type has a different base type to the output 10303 // vector type, bail out. 10304 if (VecIn1.getValueType().getVectorElementType() != 10305 VT.getVectorElementType()) 10306 return SDValue(); 10307 10308 // Widen the input vector by adding undef values. 10309 VecIn1 = DAG.getNode(ISD::CONCAT_VECTORS, dl, VT, 10310 VecIn1, DAG.getUNDEF(VecIn1.getValueType())); 10311 } 10312 10313 // If VecIn2 is unused then change it to undef. 10314 VecIn2 = VecIn2.getNode() ? VecIn2 : DAG.getUNDEF(VT); 10315 10316 // Check that we were able to transform all incoming values to the same 10317 // type. 10318 if (VecIn2.getValueType() != VecIn1.getValueType() || 10319 VecIn1.getValueType() != VT) 10320 return SDValue(); 10321 10322 // Only type-legal BUILD_VECTOR nodes are converted to shuffle nodes. 10323 if (!isTypeLegal(VT)) 10324 return SDValue(); 10325 10326 // Return the new VECTOR_SHUFFLE node. 10327 SDValue Ops[2]; 10328 Ops[0] = VecIn1; 10329 Ops[1] = VecIn2; 10330 return DAG.getVectorShuffle(VT, dl, Ops[0], Ops[1], &Mask[0]); 10331 } 10332 10333 return SDValue(); 10334 } 10335 10336 SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { 10337 // TODO: Check to see if this is a CONCAT_VECTORS of a bunch of 10338 // EXTRACT_SUBVECTOR operations. If so, and if the EXTRACT_SUBVECTOR vector 10339 // inputs come from at most two distinct vectors, turn this into a shuffle 10340 // node. 10341 10342 // If we only have one input vector, we don't need to do any concatenation. 10343 if (N->getNumOperands() == 1) 10344 return N->getOperand(0); 10345 10346 // Check if all of the operands are undefs. 10347 EVT VT = N->getValueType(0); 10348 if (ISD::allOperandsUndef(N)) 10349 return DAG.getUNDEF(VT); 10350 10351 // Optimize concat_vectors where one of the vectors is undef. 10352 if (N->getNumOperands() == 2 && 10353 N->getOperand(1)->getOpcode() == ISD::UNDEF) { 10354 SDValue In = N->getOperand(0); 10355 assert(In.getValueType().isVector() && "Must concat vectors"); 10356 10357 // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). 10358 if (In->getOpcode() == ISD::BITCAST && 10359 !In->getOperand(0)->getValueType(0).isVector()) { 10360 SDValue Scalar = In->getOperand(0); 10361 EVT SclTy = Scalar->getValueType(0); 10362 10363 if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) 10364 return SDValue(); 10365 10366 EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, 10367 VT.getSizeInBits() / SclTy.getSizeInBits()); 10368 if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) 10369 return SDValue(); 10370 10371 SDLoc dl = SDLoc(N); 10372 SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); 10373 return DAG.getNode(ISD::BITCAST, dl, VT, Res); 10374 } 10375 } 10376 10377 // fold (concat_vectors (BUILD_VECTOR A, B, ...), (BUILD_VECTOR C, D, ...)) 10378 // -> (BUILD_VECTOR A, B, ..., C, D, ...) 10379 if (N->getNumOperands() == 2 && 10380 N->getOperand(0).getOpcode() == ISD::BUILD_VECTOR && 10381 N->getOperand(1).getOpcode() == ISD::BUILD_VECTOR) { 10382 EVT VT = N->getValueType(0); 10383 SDValue N0 = N->getOperand(0); 10384 SDValue N1 = N->getOperand(1); 10385 SmallVector<SDValue, 8> Opnds; 10386 unsigned BuildVecNumElts = N0.getNumOperands(); 10387 10388 EVT SclTy0 = N0.getOperand(0)->getValueType(0); 10389 EVT SclTy1 = N1.getOperand(0)->getValueType(0); 10390 if (SclTy0.isFloatingPoint()) { 10391 for (unsigned i = 0; i != BuildVecNumElts; ++i) 10392 Opnds.push_back(N0.getOperand(i)); 10393 for (unsigned i = 0; i != BuildVecNumElts; ++i) 10394 Opnds.push_back(N1.getOperand(i)); 10395 } else { 10396 // If BUILD_VECTOR are from built from integer, they may have different 10397 // operand types. Get the smaller type and truncate all operands to it. 10398 EVT MinTy = SclTy0.bitsLE(SclTy1) ? SclTy0 : SclTy1; 10399 for (unsigned i = 0; i != BuildVecNumElts; ++i) 10400 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 10401 N0.getOperand(i))); 10402 for (unsigned i = 0; i != BuildVecNumElts; ++i) 10403 Opnds.push_back(DAG.getNode(ISD::TRUNCATE, SDLoc(N), MinTy, 10404 N1.getOperand(i))); 10405 } 10406 10407 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, Opnds); 10408 } 10409 10410 // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR 10411 // nodes often generate nop CONCAT_VECTOR nodes. 10412 // Scan the CONCAT_VECTOR operands and look for a CONCAT operations that 10413 // place the incoming vectors at the exact same location. 10414 SDValue SingleSource = SDValue(); 10415 unsigned PartNumElem = N->getOperand(0).getValueType().getVectorNumElements(); 10416 10417 for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { 10418 SDValue Op = N->getOperand(i); 10419 10420 if (Op.getOpcode() == ISD::UNDEF) 10421 continue; 10422 10423 // Check if this is the identity extract: 10424 if (Op.getOpcode() != ISD::EXTRACT_SUBVECTOR) 10425 return SDValue(); 10426 10427 // Find the single incoming vector for the extract_subvector. 10428 if (SingleSource.getNode()) { 10429 if (Op.getOperand(0) != SingleSource) 10430 return SDValue(); 10431 } else { 10432 SingleSource = Op.getOperand(0); 10433 10434 // Check the source type is the same as the type of the result. 10435 // If not, this concat may extend the vector, so we can not 10436 // optimize it away. 10437 if (SingleSource.getValueType() != N->getValueType(0)) 10438 return SDValue(); 10439 } 10440 10441 unsigned IdentityIndex = i * PartNumElem; 10442 ConstantSDNode *CS = dyn_cast<ConstantSDNode>(Op.getOperand(1)); 10443 // The extract index must be constant. 10444 if (!CS) 10445 return SDValue(); 10446 10447 // Check that we are reading from the identity index. 10448 if (CS->getZExtValue() != IdentityIndex) 10449 return SDValue(); 10450 } 10451 10452 if (SingleSource.getNode()) 10453 return SingleSource; 10454 10455 return SDValue(); 10456 } 10457 10458 SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { 10459 EVT NVT = N->getValueType(0); 10460 SDValue V = N->getOperand(0); 10461 10462 if (V->getOpcode() == ISD::CONCAT_VECTORS) { 10463 // Combine: 10464 // (extract_subvec (concat V1, V2, ...), i) 10465 // Into: 10466 // Vi if possible 10467 // Only operand 0 is checked as 'concat' assumes all inputs of the same 10468 // type. 10469 if (V->getOperand(0).getValueType() != NVT) 10470 return SDValue(); 10471 unsigned Idx = dyn_cast<ConstantSDNode>(N->getOperand(1))->getZExtValue(); 10472 unsigned NumElems = NVT.getVectorNumElements(); 10473 assert((Idx % NumElems) == 0 && 10474 "IDX in concat is not a multiple of the result vector length."); 10475 return V->getOperand(Idx / NumElems); 10476 } 10477 10478 // Skip bitcasting 10479 if (V->getOpcode() == ISD::BITCAST) 10480 V = V.getOperand(0); 10481 10482 if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { 10483 SDLoc dl(N); 10484 // Handle only simple case where vector being inserted and vector 10485 // being extracted are of same type, and are half size of larger vectors. 10486 EVT BigVT = V->getOperand(0).getValueType(); 10487 EVT SmallVT = V->getOperand(1).getValueType(); 10488 if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) 10489 return SDValue(); 10490 10491 // Only handle cases where both indexes are constants with the same type. 10492 ConstantSDNode *ExtIdx = dyn_cast<ConstantSDNode>(N->getOperand(1)); 10493 ConstantSDNode *InsIdx = dyn_cast<ConstantSDNode>(V->getOperand(2)); 10494 10495 if (InsIdx && ExtIdx && 10496 InsIdx->getValueType(0).getSizeInBits() <= 64 && 10497 ExtIdx->getValueType(0).getSizeInBits() <= 64) { 10498 // Combine: 10499 // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) 10500 // Into: 10501 // indices are equal or bit offsets are equal => V1 10502 // otherwise => (extract_subvec V1, ExtIdx) 10503 if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == 10504 ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) 10505 return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); 10506 return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, 10507 DAG.getNode(ISD::BITCAST, dl, 10508 N->getOperand(0).getValueType(), 10509 V->getOperand(0)), N->getOperand(1)); 10510 } 10511 } 10512 10513 return SDValue(); 10514 } 10515 10516 // Tries to turn a shuffle of two CONCAT_VECTORS into a single concat. 10517 static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { 10518 EVT VT = N->getValueType(0); 10519 unsigned NumElts = VT.getVectorNumElements(); 10520 10521 SDValue N0 = N->getOperand(0); 10522 SDValue N1 = N->getOperand(1); 10523 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 10524 10525 SmallVector<SDValue, 4> Ops; 10526 EVT ConcatVT = N0.getOperand(0).getValueType(); 10527 unsigned NumElemsPerConcat = ConcatVT.getVectorNumElements(); 10528 unsigned NumConcats = NumElts / NumElemsPerConcat; 10529 10530 // Look at every vector that's inserted. We're looking for exact 10531 // subvector-sized copies from a concatenated vector 10532 for (unsigned I = 0; I != NumConcats; ++I) { 10533 // Make sure we're dealing with a copy. 10534 unsigned Begin = I * NumElemsPerConcat; 10535 bool AllUndef = true, NoUndef = true; 10536 for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { 10537 if (SVN->getMaskElt(J) >= 0) 10538 AllUndef = false; 10539 else 10540 NoUndef = false; 10541 } 10542 10543 if (NoUndef) { 10544 if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) 10545 return SDValue(); 10546 10547 for (unsigned J = 1; J != NumElemsPerConcat; ++J) 10548 if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) 10549 return SDValue(); 10550 10551 unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; 10552 if (FirstElt < N0.getNumOperands()) 10553 Ops.push_back(N0.getOperand(FirstElt)); 10554 else 10555 Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); 10556 10557 } else if (AllUndef) { 10558 Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); 10559 } else { // Mixed with general masks and undefs, can't do optimization. 10560 return SDValue(); 10561 } 10562 } 10563 10564 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops); 10565 } 10566 10567 SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { 10568 EVT VT = N->getValueType(0); 10569 unsigned NumElts = VT.getVectorNumElements(); 10570 10571 SDValue N0 = N->getOperand(0); 10572 SDValue N1 = N->getOperand(1); 10573 10574 assert(N0.getValueType() == VT && "Vector shuffle must be normalized in DAG"); 10575 10576 // Canonicalize shuffle undef, undef -> undef 10577 if (N0.getOpcode() == ISD::UNDEF && N1.getOpcode() == ISD::UNDEF) 10578 return DAG.getUNDEF(VT); 10579 10580 ShuffleVectorSDNode *SVN = cast<ShuffleVectorSDNode>(N); 10581 10582 // Canonicalize shuffle v, v -> v, undef 10583 if (N0 == N1) { 10584 SmallVector<int, 8> NewMask; 10585 for (unsigned i = 0; i != NumElts; ++i) { 10586 int Idx = SVN->getMaskElt(i); 10587 if (Idx >= (int)NumElts) Idx -= NumElts; 10588 NewMask.push_back(Idx); 10589 } 10590 return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), 10591 &NewMask[0]); 10592 } 10593 10594 // Canonicalize shuffle undef, v -> v, undef. Commute the shuffle mask. 10595 if (N0.getOpcode() == ISD::UNDEF) { 10596 SmallVector<int, 8> NewMask; 10597 for (unsigned i = 0; i != NumElts; ++i) { 10598 int Idx = SVN->getMaskElt(i); 10599 if (Idx >= 0) { 10600 if (Idx >= (int)NumElts) 10601 Idx -= NumElts; 10602 else 10603 Idx = -1; // remove reference to lhs 10604 } 10605 NewMask.push_back(Idx); 10606 } 10607 return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), 10608 &NewMask[0]); 10609 } 10610 10611 // Remove references to rhs if it is undef 10612 if (N1.getOpcode() == ISD::UNDEF) { 10613 bool Changed = false; 10614 SmallVector<int, 8> NewMask; 10615 for (unsigned i = 0; i != NumElts; ++i) { 10616 int Idx = SVN->getMaskElt(i); 10617 if (Idx >= (int)NumElts) { 10618 Idx = -1; 10619 Changed = true; 10620 } 10621 NewMask.push_back(Idx); 10622 } 10623 if (Changed) 10624 return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); 10625 } 10626 10627 // If it is a splat, check if the argument vector is another splat or a 10628 // build_vector with all scalar elements the same. 10629 if (SVN->isSplat() && SVN->getSplatIndex() < (int)NumElts) { 10630 SDNode *V = N0.getNode(); 10631 10632 // If this is a bit convert that changes the element type of the vector but 10633 // not the number of vector elements, look through it. Be careful not to 10634 // look though conversions that change things like v4f32 to v2f64. 10635 if (V->getOpcode() == ISD::BITCAST) { 10636 SDValue ConvInput = V->getOperand(0); 10637 if (ConvInput.getValueType().isVector() && 10638 ConvInput.getValueType().getVectorNumElements() == NumElts) 10639 V = ConvInput.getNode(); 10640 } 10641 10642 if (V->getOpcode() == ISD::BUILD_VECTOR) { 10643 assert(V->getNumOperands() == NumElts && 10644 "BUILD_VECTOR has wrong number of operands"); 10645 SDValue Base; 10646 bool AllSame = true; 10647 for (unsigned i = 0; i != NumElts; ++i) { 10648 if (V->getOperand(i).getOpcode() != ISD::UNDEF) { 10649 Base = V->getOperand(i); 10650 break; 10651 } 10652 } 10653 // Splat of <u, u, u, u>, return <u, u, u, u> 10654 if (!Base.getNode()) 10655 return N0; 10656 for (unsigned i = 0; i != NumElts; ++i) { 10657 if (V->getOperand(i) != Base) { 10658 AllSame = false; 10659 break; 10660 } 10661 } 10662 // Splat of <x, x, x, x>, return <x, x, x, x> 10663 if (AllSame) 10664 return N0; 10665 } 10666 } 10667 10668 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 10669 Level < AfterLegalizeVectorOps && 10670 (N1.getOpcode() == ISD::UNDEF || 10671 (N1.getOpcode() == ISD::CONCAT_VECTORS && 10672 N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType()))) { 10673 SDValue V = partitionShuffleOfConcats(N, DAG); 10674 10675 if (V.getNode()) 10676 return V; 10677 } 10678 10679 // If this shuffle node is simply a swizzle of another shuffle node, 10680 // then try to simplify it. 10681 if (N0.getOpcode() == ISD::VECTOR_SHUFFLE && Level < AfterLegalizeDAG && 10682 N1.getOpcode() == ISD::UNDEF) { 10683 10684 ShuffleVectorSDNode *OtherSV = cast<ShuffleVectorSDNode>(N0); 10685 10686 // The incoming shuffle must be of the same type as the result of the 10687 // current shuffle. 10688 assert(OtherSV->getOperand(0).getValueType() == VT && 10689 "Shuffle types don't match"); 10690 10691 SmallVector<int, 4> Mask; 10692 // Compute the combined shuffle mask. 10693 for (unsigned i = 0; i != NumElts; ++i) { 10694 int Idx = SVN->getMaskElt(i); 10695 assert(Idx < (int)NumElts && "Index references undef operand"); 10696 // Next, this index comes from the first value, which is the incoming 10697 // shuffle. Adopt the incoming index. 10698 if (Idx >= 0) 10699 Idx = OtherSV->getMaskElt(Idx); 10700 Mask.push_back(Idx); 10701 } 10702 10703 bool CommuteOperands = false; 10704 if (N0.getOperand(1).getOpcode() != ISD::UNDEF) { 10705 // To be valid, the combine shuffle mask should only reference elements 10706 // from one of the two vectors in input to the inner shufflevector. 10707 bool IsValidMask = true; 10708 for (unsigned i = 0; i != NumElts && IsValidMask; ++i) 10709 // See if the combined mask only reference undefs or elements coming 10710 // from the first shufflevector operand. 10711 IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] < NumElts; 10712 10713 if (!IsValidMask) { 10714 IsValidMask = true; 10715 for (unsigned i = 0; i != NumElts && IsValidMask; ++i) 10716 // Check that all the elements come from the second shuffle operand. 10717 IsValidMask = Mask[i] < 0 || (unsigned)Mask[i] >= NumElts; 10718 CommuteOperands = IsValidMask; 10719 } 10720 10721 // Early exit if the combined shuffle mask is not valid. 10722 if (!IsValidMask) 10723 return SDValue(); 10724 } 10725 10726 // See if this pair of shuffles can be safely folded according to either 10727 // of the following rules: 10728 // shuffle(shuffle(x, y), undef) -> x 10729 // shuffle(shuffle(x, undef), undef) -> x 10730 // shuffle(shuffle(x, y), undef) -> y 10731 bool IsIdentityMask = true; 10732 unsigned BaseMaskIndex = CommuteOperands ? NumElts : 0; 10733 for (unsigned i = 0; i != NumElts && IsIdentityMask; ++i) { 10734 // Skip Undefs. 10735 if (Mask[i] < 0) 10736 continue; 10737 10738 // The combined shuffle must map each index to itself. 10739 IsIdentityMask = (unsigned)Mask[i] == i + BaseMaskIndex; 10740 } 10741 10742 if (IsIdentityMask) { 10743 if (CommuteOperands) 10744 // optimize shuffle(shuffle(x, y), undef) -> y. 10745 return OtherSV->getOperand(1); 10746 10747 // optimize shuffle(shuffle(x, undef), undef) -> x 10748 // optimize shuffle(shuffle(x, y), undef) -> x 10749 return OtherSV->getOperand(0); 10750 } 10751 10752 // It may still be beneficial to combine the two shuffles if the 10753 // resulting shuffle is legal. 10754 if (TLI.isShuffleMaskLegal(Mask, VT)) { 10755 if (!CommuteOperands) 10756 // shuffle(shuffle(x, undef, M1), undef, M2) -> shuffle(x, undef, M3). 10757 // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(x, undef, M3) 10758 return DAG.getVectorShuffle(VT, SDLoc(N), N0->getOperand(0), N1, 10759 &Mask[0]); 10760 10761 // shuffle(shuffle(x, y, M1), undef, M2) -> shuffle(undef, y, M3) 10762 return DAG.getVectorShuffle(VT, SDLoc(N), N1, N0->getOperand(1), 10763 &Mask[0]); 10764 } 10765 } 10766 10767 return SDValue(); 10768 } 10769 10770 SDValue DAGCombiner::visitINSERT_SUBVECTOR(SDNode *N) { 10771 SDValue N0 = N->getOperand(0); 10772 SDValue N2 = N->getOperand(2); 10773 10774 // If the input vector is a concatenation, and the insert replaces 10775 // one of the halves, we can optimize into a single concat_vectors. 10776 if (N0.getOpcode() == ISD::CONCAT_VECTORS && 10777 N0->getNumOperands() == 2 && N2.getOpcode() == ISD::Constant) { 10778 APInt InsIdx = cast<ConstantSDNode>(N2)->getAPIntValue(); 10779 EVT VT = N->getValueType(0); 10780 10781 // Lower half: fold (insert_subvector (concat_vectors X, Y), Z) -> 10782 // (concat_vectors Z, Y) 10783 if (InsIdx == 0) 10784 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 10785 N->getOperand(1), N0.getOperand(1)); 10786 10787 // Upper half: fold (insert_subvector (concat_vectors X, Y), Z) -> 10788 // (concat_vectors X, Z) 10789 if (InsIdx == VT.getVectorNumElements()/2) 10790 return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, 10791 N0.getOperand(0), N->getOperand(1)); 10792 } 10793 10794 return SDValue(); 10795 } 10796 10797 /// XformToShuffleWithZero - Returns a vector_shuffle if it able to transform 10798 /// an AND to a vector_shuffle with the destination vector and a zero vector. 10799 /// e.g. AND V, <0xffffffff, 0, 0xffffffff, 0>. ==> 10800 /// vector_shuffle V, Zero, <0, 4, 2, 4> 10801 SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { 10802 EVT VT = N->getValueType(0); 10803 SDLoc dl(N); 10804 SDValue LHS = N->getOperand(0); 10805 SDValue RHS = N->getOperand(1); 10806 if (N->getOpcode() == ISD::AND) { 10807 if (RHS.getOpcode() == ISD::BITCAST) 10808 RHS = RHS.getOperand(0); 10809 if (RHS.getOpcode() == ISD::BUILD_VECTOR) { 10810 SmallVector<int, 8> Indices; 10811 unsigned NumElts = RHS.getNumOperands(); 10812 for (unsigned i = 0; i != NumElts; ++i) { 10813 SDValue Elt = RHS.getOperand(i); 10814 if (!isa<ConstantSDNode>(Elt)) 10815 return SDValue(); 10816 10817 if (cast<ConstantSDNode>(Elt)->isAllOnesValue()) 10818 Indices.push_back(i); 10819 else if (cast<ConstantSDNode>(Elt)->isNullValue()) 10820 Indices.push_back(NumElts); 10821 else 10822 return SDValue(); 10823 } 10824 10825 // Let's see if the target supports this vector_shuffle. 10826 EVT RVT = RHS.getValueType(); 10827 if (!TLI.isVectorClearMaskLegal(Indices, RVT)) 10828 return SDValue(); 10829 10830 // Return the new VECTOR_SHUFFLE node. 10831 EVT EltVT = RVT.getVectorElementType(); 10832 SmallVector<SDValue,8> ZeroOps(RVT.getVectorNumElements(), 10833 DAG.getConstant(0, EltVT)); 10834 SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, ZeroOps); 10835 LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); 10836 SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); 10837 return DAG.getNode(ISD::BITCAST, dl, VT, Shuf); 10838 } 10839 } 10840 10841 return SDValue(); 10842 } 10843 10844 /// SimplifyVBinOp - Visit a binary vector operation, like ADD. 10845 SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { 10846 assert(N->getValueType(0).isVector() && 10847 "SimplifyVBinOp only works on vectors!"); 10848 10849 SDValue LHS = N->getOperand(0); 10850 SDValue RHS = N->getOperand(1); 10851 SDValue Shuffle = XformToShuffleWithZero(N); 10852 if (Shuffle.getNode()) return Shuffle; 10853 10854 // If the LHS and RHS are BUILD_VECTOR nodes, see if we can constant fold 10855 // this operation. 10856 if (LHS.getOpcode() == ISD::BUILD_VECTOR && 10857 RHS.getOpcode() == ISD::BUILD_VECTOR) { 10858 // Check if both vectors are constants. If not bail out. 10859 if (!(cast<BuildVectorSDNode>(LHS)->isConstant() && 10860 cast<BuildVectorSDNode>(RHS)->isConstant())) 10861 return SDValue(); 10862 10863 SmallVector<SDValue, 8> Ops; 10864 for (unsigned i = 0, e = LHS.getNumOperands(); i != e; ++i) { 10865 SDValue LHSOp = LHS.getOperand(i); 10866 SDValue RHSOp = RHS.getOperand(i); 10867 10868 // Can't fold divide by zero. 10869 if (N->getOpcode() == ISD::SDIV || N->getOpcode() == ISD::UDIV || 10870 N->getOpcode() == ISD::FDIV) { 10871 if ((RHSOp.getOpcode() == ISD::Constant && 10872 cast<ConstantSDNode>(RHSOp.getNode())->isNullValue()) || 10873 (RHSOp.getOpcode() == ISD::ConstantFP && 10874 cast<ConstantFPSDNode>(RHSOp.getNode())->getValueAPF().isZero())) 10875 break; 10876 } 10877 10878 EVT VT = LHSOp.getValueType(); 10879 EVT RVT = RHSOp.getValueType(); 10880 if (RVT != VT) { 10881 // Integer BUILD_VECTOR operands may have types larger than the element 10882 // size (e.g., when the element type is not legal). Prior to type 10883 // legalization, the types may not match between the two BUILD_VECTORS. 10884 // Truncate one of the operands to make them match. 10885 if (RVT.getSizeInBits() > VT.getSizeInBits()) { 10886 RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); 10887 } else { 10888 LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); 10889 VT = RVT; 10890 } 10891 } 10892 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, 10893 LHSOp, RHSOp); 10894 if (FoldOp.getOpcode() != ISD::UNDEF && 10895 FoldOp.getOpcode() != ISD::Constant && 10896 FoldOp.getOpcode() != ISD::ConstantFP) 10897 break; 10898 Ops.push_back(FoldOp); 10899 AddToWorkList(FoldOp.getNode()); 10900 } 10901 10902 if (Ops.size() == LHS.getNumOperands()) 10903 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), Ops); 10904 } 10905 10906 // Type legalization might introduce new shuffles in the DAG. 10907 // Fold (VBinOp (shuffle (A, Undef, Mask)), (shuffle (B, Undef, Mask))) 10908 // -> (shuffle (VBinOp (A, B)), Undef, Mask). 10909 if (LegalTypes && isa<ShuffleVectorSDNode>(LHS) && 10910 isa<ShuffleVectorSDNode>(RHS) && LHS.hasOneUse() && RHS.hasOneUse() && 10911 LHS.getOperand(1).getOpcode() == ISD::UNDEF && 10912 RHS.getOperand(1).getOpcode() == ISD::UNDEF) { 10913 ShuffleVectorSDNode *SVN0 = cast<ShuffleVectorSDNode>(LHS); 10914 ShuffleVectorSDNode *SVN1 = cast<ShuffleVectorSDNode>(RHS); 10915 10916 if (SVN0->getMask().equals(SVN1->getMask())) { 10917 EVT VT = N->getValueType(0); 10918 SDValue UndefVector = LHS.getOperand(1); 10919 SDValue NewBinOp = DAG.getNode(N->getOpcode(), SDLoc(N), VT, 10920 LHS.getOperand(0), RHS.getOperand(0)); 10921 AddUsersToWorkList(N); 10922 return DAG.getVectorShuffle(VT, SDLoc(N), NewBinOp, UndefVector, 10923 &SVN0->getMask()[0]); 10924 } 10925 } 10926 10927 return SDValue(); 10928 } 10929 10930 /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. 10931 SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { 10932 assert(N->getValueType(0).isVector() && 10933 "SimplifyVUnaryOp only works on vectors!"); 10934 10935 SDValue N0 = N->getOperand(0); 10936 10937 if (N0.getOpcode() != ISD::BUILD_VECTOR) 10938 return SDValue(); 10939 10940 // Operand is a BUILD_VECTOR node, see if we can constant fold it. 10941 SmallVector<SDValue, 8> Ops; 10942 for (unsigned i = 0, e = N0.getNumOperands(); i != e; ++i) { 10943 SDValue Op = N0.getOperand(i); 10944 if (Op.getOpcode() != ISD::UNDEF && 10945 Op.getOpcode() != ISD::ConstantFP) 10946 break; 10947 EVT EltVT = Op.getValueType(); 10948 SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); 10949 if (FoldOp.getOpcode() != ISD::UNDEF && 10950 FoldOp.getOpcode() != ISD::ConstantFP) 10951 break; 10952 Ops.push_back(FoldOp); 10953 AddToWorkList(FoldOp.getNode()); 10954 } 10955 10956 if (Ops.size() != N0.getNumOperands()) 10957 return SDValue(); 10958 10959 return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), Ops); 10960 } 10961 10962 SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, 10963 SDValue N1, SDValue N2){ 10964 assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); 10965 10966 SDValue SCC = SimplifySelectCC(DL, N0.getOperand(0), N0.getOperand(1), N1, N2, 10967 cast<CondCodeSDNode>(N0.getOperand(2))->get()); 10968 10969 // If we got a simplified select_cc node back from SimplifySelectCC, then 10970 // break it down into a new SETCC node, and a new SELECT node, and then return 10971 // the SELECT node, since we were called with a SELECT node. 10972 if (SCC.getNode()) { 10973 // Check to see if we got a select_cc back (to turn into setcc/select). 10974 // Otherwise, just return whatever node we got back, like fabs. 10975 if (SCC.getOpcode() == ISD::SELECT_CC) { 10976 SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), 10977 N0.getValueType(), 10978 SCC.getOperand(0), SCC.getOperand(1), 10979 SCC.getOperand(4)); 10980 AddToWorkList(SETCC.getNode()); 10981 return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), 10982 SCC.getOperand(2), SCC.getOperand(3), SETCC); 10983 } 10984 10985 return SCC; 10986 } 10987 return SDValue(); 10988 } 10989 10990 /// SimplifySelectOps - Given a SELECT or a SELECT_CC node, where LHS and RHS 10991 /// are the two values being selected between, see if we can simplify the 10992 /// select. Callers of this should assume that TheSelect is deleted if this 10993 /// returns true. As such, they should return the appropriate thing (e.g. the 10994 /// node) back to the top-level of the DAG combiner loop to avoid it being 10995 /// looked at. 10996 bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, 10997 SDValue RHS) { 10998 10999 // Cannot simplify select with vector condition 11000 if (TheSelect->getOperand(0).getValueType().isVector()) return false; 11001 11002 // If this is a select from two identical things, try to pull the operation 11003 // through the select. 11004 if (LHS.getOpcode() != RHS.getOpcode() || 11005 !LHS.hasOneUse() || !RHS.hasOneUse()) 11006 return false; 11007 11008 // If this is a load and the token chain is identical, replace the select 11009 // of two loads with a load through a select of the address to load from. 11010 // This triggers in things like "select bool X, 10.0, 123.0" after the FP 11011 // constants have been dropped into the constant pool. 11012 if (LHS.getOpcode() == ISD::LOAD) { 11013 LoadSDNode *LLD = cast<LoadSDNode>(LHS); 11014 LoadSDNode *RLD = cast<LoadSDNode>(RHS); 11015 11016 // Token chains must be identical. 11017 if (LHS.getOperand(0) != RHS.getOperand(0) || 11018 // Do not let this transformation reduce the number of volatile loads. 11019 LLD->isVolatile() || RLD->isVolatile() || 11020 // If this is an EXTLOAD, the VT's must match. 11021 LLD->getMemoryVT() != RLD->getMemoryVT() || 11022 // If this is an EXTLOAD, the kind of extension must match. 11023 (LLD->getExtensionType() != RLD->getExtensionType() && 11024 // The only exception is if one of the extensions is anyext. 11025 LLD->getExtensionType() != ISD::EXTLOAD && 11026 RLD->getExtensionType() != ISD::EXTLOAD) || 11027 // FIXME: this discards src value information. This is 11028 // over-conservative. It would be beneficial to be able to remember 11029 // both potential memory locations. Since we are discarding 11030 // src value info, don't do the transformation if the memory 11031 // locations are not in the default address space. 11032 LLD->getPointerInfo().getAddrSpace() != 0 || 11033 RLD->getPointerInfo().getAddrSpace() != 0 || 11034 !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), 11035 LLD->getBasePtr().getValueType())) 11036 return false; 11037 11038 // Check that the select condition doesn't reach either load. If so, 11039 // folding this will induce a cycle into the DAG. If not, this is safe to 11040 // xform, so create a select of the addresses. 11041 SDValue Addr; 11042 if (TheSelect->getOpcode() == ISD::SELECT) { 11043 SDNode *CondNode = TheSelect->getOperand(0).getNode(); 11044 if ((LLD->hasAnyUseOfValue(1) && LLD->isPredecessorOf(CondNode)) || 11045 (RLD->hasAnyUseOfValue(1) && RLD->isPredecessorOf(CondNode))) 11046 return false; 11047 // The loads must not depend on one another. 11048 if (LLD->isPredecessorOf(RLD) || 11049 RLD->isPredecessorOf(LLD)) 11050 return false; 11051 Addr = DAG.getSelect(SDLoc(TheSelect), 11052 LLD->getBasePtr().getValueType(), 11053 TheSelect->getOperand(0), LLD->getBasePtr(), 11054 RLD->getBasePtr()); 11055 } else { // Otherwise SELECT_CC 11056 SDNode *CondLHS = TheSelect->getOperand(0).getNode(); 11057 SDNode *CondRHS = TheSelect->getOperand(1).getNode(); 11058 11059 if ((LLD->hasAnyUseOfValue(1) && 11060 (LLD->isPredecessorOf(CondLHS) || LLD->isPredecessorOf(CondRHS))) || 11061 (RLD->hasAnyUseOfValue(1) && 11062 (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) 11063 return false; 11064 11065 Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), 11066 LLD->getBasePtr().getValueType(), 11067 TheSelect->getOperand(0), 11068 TheSelect->getOperand(1), 11069 LLD->getBasePtr(), RLD->getBasePtr(), 11070 TheSelect->getOperand(4)); 11071 } 11072 11073 SDValue Load; 11074 if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { 11075 Load = DAG.getLoad(TheSelect->getValueType(0), 11076 SDLoc(TheSelect), 11077 // FIXME: Discards pointer and TBAA info. 11078 LLD->getChain(), Addr, MachinePointerInfo(), 11079 LLD->isVolatile(), LLD->isNonTemporal(), 11080 LLD->isInvariant(), LLD->getAlignment()); 11081 } else { 11082 Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? 11083 RLD->getExtensionType() : LLD->getExtensionType(), 11084 SDLoc(TheSelect), 11085 TheSelect->getValueType(0), 11086 // FIXME: Discards pointer and TBAA info. 11087 LLD->getChain(), Addr, MachinePointerInfo(), 11088 LLD->getMemoryVT(), LLD->isVolatile(), 11089 LLD->isNonTemporal(), LLD->getAlignment()); 11090 } 11091 11092 // Users of the select now use the result of the load. 11093 CombineTo(TheSelect, Load); 11094 11095 // Users of the old loads now use the new load's chain. We know the 11096 // old-load value is dead now. 11097 CombineTo(LHS.getNode(), Load.getValue(0), Load.getValue(1)); 11098 CombineTo(RHS.getNode(), Load.getValue(0), Load.getValue(1)); 11099 return true; 11100 } 11101 11102 return false; 11103 } 11104 11105 /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 11106 /// where 'cond' is the comparison specified by CC. 11107 SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, 11108 SDValue N2, SDValue N3, 11109 ISD::CondCode CC, bool NotExtCompare) { 11110 // (x ? y : y) -> y. 11111 if (N2 == N3) return N2; 11112 11113 EVT VT = N2.getValueType(); 11114 ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1.getNode()); 11115 ConstantSDNode *N2C = dyn_cast<ConstantSDNode>(N2.getNode()); 11116 ConstantSDNode *N3C = dyn_cast<ConstantSDNode>(N3.getNode()); 11117 11118 // Determine if the condition we're dealing with is constant 11119 SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), 11120 N0, N1, CC, DL, false); 11121 if (SCC.getNode()) AddToWorkList(SCC.getNode()); 11122 ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(SCC.getNode()); 11123 11124 // fold select_cc true, x, y -> x 11125 if (SCCC && !SCCC->isNullValue()) 11126 return N2; 11127 // fold select_cc false, x, y -> y 11128 if (SCCC && SCCC->isNullValue()) 11129 return N3; 11130 11131 // Check to see if we can simplify the select into an fabs node 11132 if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N1)) { 11133 // Allow either -0.0 or 0.0 11134 if (CFP->getValueAPF().isZero()) { 11135 // select (setg[te] X, +/-0.0), X, fneg(X) -> fabs 11136 if ((CC == ISD::SETGE || CC == ISD::SETGT) && 11137 N0 == N2 && N3.getOpcode() == ISD::FNEG && 11138 N2 == N3.getOperand(0)) 11139 return DAG.getNode(ISD::FABS, DL, VT, N0); 11140 11141 // select (setl[te] X, +/-0.0), fneg(X), X -> fabs 11142 if ((CC == ISD::SETLT || CC == ISD::SETLE) && 11143 N0 == N3 && N2.getOpcode() == ISD::FNEG && 11144 N2.getOperand(0) == N3) 11145 return DAG.getNode(ISD::FABS, DL, VT, N3); 11146 } 11147 } 11148 11149 // Turn "(a cond b) ? 1.0f : 2.0f" into "load (tmp + ((a cond b) ? 0 : 4)" 11150 // where "tmp" is a constant pool entry containing an array with 1.0 and 2.0 11151 // in it. This is a win when the constant is not otherwise available because 11152 // it replaces two constant pool loads with one. We only do this if the FP 11153 // type is known to be legal, because if it isn't, then we are before legalize 11154 // types an we want the other legalization to happen first (e.g. to avoid 11155 // messing with soft float) and if the ConstantFP is not legal, because if 11156 // it is legal, we may not need to store the FP constant in a constant pool. 11157 if (ConstantFPSDNode *TV = dyn_cast<ConstantFPSDNode>(N2)) 11158 if (ConstantFPSDNode *FV = dyn_cast<ConstantFPSDNode>(N3)) { 11159 if (TLI.isTypeLegal(N2.getValueType()) && 11160 (TLI.getOperationAction(ISD::ConstantFP, N2.getValueType()) != 11161 TargetLowering::Legal && 11162 !TLI.isFPImmLegal(TV->getValueAPF(), TV->getValueType(0)) && 11163 !TLI.isFPImmLegal(FV->getValueAPF(), FV->getValueType(0))) && 11164 // If both constants have multiple uses, then we won't need to do an 11165 // extra load, they are likely around in registers for other users. 11166 (TV->hasOneUse() || FV->hasOneUse())) { 11167 Constant *Elts[] = { 11168 const_cast<ConstantFP*>(FV->getConstantFPValue()), 11169 const_cast<ConstantFP*>(TV->getConstantFPValue()) 11170 }; 11171 Type *FPTy = Elts[0]->getType(); 11172 const DataLayout &TD = *TLI.getDataLayout(); 11173 11174 // Create a ConstantArray of the two constants. 11175 Constant *CA = ConstantArray::get(ArrayType::get(FPTy, 2), Elts); 11176 SDValue CPIdx = DAG.getConstantPool(CA, TLI.getPointerTy(), 11177 TD.getPrefTypeAlignment(FPTy)); 11178 unsigned Alignment = cast<ConstantPoolSDNode>(CPIdx)->getAlignment(); 11179 11180 // Get the offsets to the 0 and 1 element of the array so that we can 11181 // select between them. 11182 SDValue Zero = DAG.getIntPtrConstant(0); 11183 unsigned EltSize = (unsigned)TD.getTypeAllocSize(Elts[0]->getType()); 11184 SDValue One = DAG.getIntPtrConstant(EltSize); 11185 11186 SDValue Cond = DAG.getSetCC(DL, 11187 getSetCCResultType(N0.getValueType()), 11188 N0, N1, CC); 11189 AddToWorkList(Cond.getNode()); 11190 SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), 11191 Cond, One, Zero); 11192 AddToWorkList(CstOffset.getNode()); 11193 CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, 11194 CstOffset); 11195 AddToWorkList(CPIdx.getNode()); 11196 return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, 11197 MachinePointerInfo::getConstantPool(), false, 11198 false, false, Alignment); 11199 11200 } 11201 } 11202 11203 // Check to see if we can perform the "gzip trick", transforming 11204 // (select_cc setlt X, 0, A, 0) -> (and (sra X, (sub size(X), 1), A) 11205 if (N1C && N3C && N3C->isNullValue() && CC == ISD::SETLT && 11206 (N1C->isNullValue() || // (a < 0) ? b : 0 11207 (N1C->getAPIntValue() == 1 && N0 == N2))) { // (a < 1) ? a : 0 11208 EVT XType = N0.getValueType(); 11209 EVT AType = N2.getValueType(); 11210 if (XType.bitsGE(AType)) { 11211 // and (sra X, size(X)-1, A) -> "and (srl X, C2), A" iff A is a 11212 // single-bit constant. 11213 if (N2C && ((N2C->getAPIntValue() & (N2C->getAPIntValue()-1)) == 0)) { 11214 unsigned ShCtV = N2C->getAPIntValue().logBase2(); 11215 ShCtV = XType.getSizeInBits()-ShCtV-1; 11216 SDValue ShCt = DAG.getConstant(ShCtV, 11217 getShiftAmountTy(N0.getValueType())); 11218 SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), 11219 XType, N0, ShCt); 11220 AddToWorkList(Shift.getNode()); 11221 11222 if (XType.bitsGT(AType)) { 11223 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 11224 AddToWorkList(Shift.getNode()); 11225 } 11226 11227 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 11228 } 11229 11230 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), 11231 XType, N0, 11232 DAG.getConstant(XType.getSizeInBits()-1, 11233 getShiftAmountTy(N0.getValueType()))); 11234 AddToWorkList(Shift.getNode()); 11235 11236 if (XType.bitsGT(AType)) { 11237 Shift = DAG.getNode(ISD::TRUNCATE, DL, AType, Shift); 11238 AddToWorkList(Shift.getNode()); 11239 } 11240 11241 return DAG.getNode(ISD::AND, DL, AType, Shift, N2); 11242 } 11243 } 11244 11245 // fold (select_cc seteq (and x, y), 0, 0, A) -> (and (shr (shl x)) A) 11246 // where y is has a single bit set. 11247 // A plaintext description would be, we can turn the SELECT_CC into an AND 11248 // when the condition can be materialized as an all-ones register. Any 11249 // single bit-test can be materialized as an all-ones register with 11250 // shift-left and shift-right-arith. 11251 if (CC == ISD::SETEQ && N0->getOpcode() == ISD::AND && 11252 N0->getValueType(0) == VT && 11253 N1C && N1C->isNullValue() && 11254 N2C && N2C->isNullValue()) { 11255 SDValue AndLHS = N0->getOperand(0); 11256 ConstantSDNode *ConstAndRHS = dyn_cast<ConstantSDNode>(N0->getOperand(1)); 11257 if (ConstAndRHS && ConstAndRHS->getAPIntValue().countPopulation() == 1) { 11258 // Shift the tested bit over the sign bit. 11259 APInt AndMask = ConstAndRHS->getAPIntValue(); 11260 SDValue ShlAmt = 11261 DAG.getConstant(AndMask.countLeadingZeros(), 11262 getShiftAmountTy(AndLHS.getValueType())); 11263 SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); 11264 11265 // Now arithmetic right shift it all the way over, so the result is either 11266 // all-ones, or zero. 11267 SDValue ShrAmt = 11268 DAG.getConstant(AndMask.getBitWidth()-1, 11269 getShiftAmountTy(Shl.getValueType())); 11270 SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); 11271 11272 return DAG.getNode(ISD::AND, DL, VT, Shr, N3); 11273 } 11274 } 11275 11276 // fold select C, 16, 0 -> shl C, 4 11277 if (N2C && N3C && N3C->isNullValue() && N2C->getAPIntValue().isPowerOf2() && 11278 TLI.getBooleanContents(N0.getValueType()) == 11279 TargetLowering::ZeroOrOneBooleanContent) { 11280 11281 // If the caller doesn't want us to simplify this into a zext of a compare, 11282 // don't do it. 11283 if (NotExtCompare && N2C->getAPIntValue() == 1) 11284 return SDValue(); 11285 11286 // Get a SetCC of the condition 11287 // NOTE: Don't create a SETCC if it's not legal on this target. 11288 if (!LegalOperations || 11289 TLI.isOperationLegal(ISD::SETCC, 11290 LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { 11291 SDValue Temp, SCC; 11292 // cast from setcc result type to select result type 11293 if (LegalTypes) { 11294 SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), 11295 N0, N1, CC); 11296 if (N2.getValueType().bitsLT(SCC.getValueType())) 11297 Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), 11298 N2.getValueType()); 11299 else 11300 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 11301 N2.getValueType(), SCC); 11302 } else { 11303 SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); 11304 Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), 11305 N2.getValueType(), SCC); 11306 } 11307 11308 AddToWorkList(SCC.getNode()); 11309 AddToWorkList(Temp.getNode()); 11310 11311 if (N2C->getAPIntValue() == 1) 11312 return Temp; 11313 11314 // shl setcc result by log2 n2c 11315 return DAG.getNode( 11316 ISD::SHL, DL, N2.getValueType(), Temp, 11317 DAG.getConstant(N2C->getAPIntValue().logBase2(), 11318 getShiftAmountTy(Temp.getValueType()))); 11319 } 11320 } 11321 11322 // Check to see if this is the equivalent of setcc 11323 // FIXME: Turn all of these into setcc if setcc if setcc is legal 11324 // otherwise, go ahead with the folds. 11325 if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { 11326 EVT XType = N0.getValueType(); 11327 if (!LegalOperations || 11328 TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { 11329 SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); 11330 if (Res.getValueType() != VT) 11331 Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); 11332 return Res; 11333 } 11334 11335 // fold (seteq X, 0) -> (srl (ctlz X, log2(size(X)))) 11336 if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && 11337 (!LegalOperations || 11338 TLI.isOperationLegal(ISD::CTLZ, XType))) { 11339 SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); 11340 return DAG.getNode(ISD::SRL, DL, XType, Ctlz, 11341 DAG.getConstant(Log2_32(XType.getSizeInBits()), 11342 getShiftAmountTy(Ctlz.getValueType()))); 11343 } 11344 // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) 11345 if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { 11346 SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), 11347 XType, DAG.getConstant(0, XType), N0); 11348 SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); 11349 return DAG.getNode(ISD::SRL, DL, XType, 11350 DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), 11351 DAG.getConstant(XType.getSizeInBits()-1, 11352 getShiftAmountTy(XType))); 11353 } 11354 // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) 11355 if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { 11356 SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, 11357 DAG.getConstant(XType.getSizeInBits()-1, 11358 getShiftAmountTy(N0.getValueType()))); 11359 return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); 11360 } 11361 } 11362 11363 // Check to see if this is an integer abs. 11364 // select_cc setg[te] X, 0, X, -X -> 11365 // select_cc setgt X, -1, X, -X -> 11366 // select_cc setl[te] X, 0, -X, X -> 11367 // select_cc setlt X, 1, -X, X -> 11368 // Y = sra (X, size(X)-1); xor (add (X, Y), Y) 11369 if (N1C) { 11370 ConstantSDNode *SubC = nullptr; 11371 if (((N1C->isNullValue() && (CC == ISD::SETGT || CC == ISD::SETGE)) || 11372 (N1C->isAllOnesValue() && CC == ISD::SETGT)) && 11373 N0 == N2 && N3.getOpcode() == ISD::SUB && N0 == N3.getOperand(1)) 11374 SubC = dyn_cast<ConstantSDNode>(N3.getOperand(0)); 11375 else if (((N1C->isNullValue() && (CC == ISD::SETLT || CC == ISD::SETLE)) || 11376 (N1C->isOne() && CC == ISD::SETLT)) && 11377 N0 == N3 && N2.getOpcode() == ISD::SUB && N0 == N2.getOperand(1)) 11378 SubC = dyn_cast<ConstantSDNode>(N2.getOperand(0)); 11379 11380 EVT XType = N0.getValueType(); 11381 if (SubC && SubC->isNullValue() && XType.isInteger()) { 11382 SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, 11383 N0, 11384 DAG.getConstant(XType.getSizeInBits()-1, 11385 getShiftAmountTy(N0.getValueType()))); 11386 SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), 11387 XType, N0, Shift); 11388 AddToWorkList(Shift.getNode()); 11389 AddToWorkList(Add.getNode()); 11390 return DAG.getNode(ISD::XOR, DL, XType, Add, Shift); 11391 } 11392 } 11393 11394 return SDValue(); 11395 } 11396 11397 /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. 11398 SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, 11399 SDValue N1, ISD::CondCode Cond, 11400 SDLoc DL, bool foldBooleans) { 11401 TargetLowering::DAGCombinerInfo 11402 DagCombineInfo(DAG, Level, false, this); 11403 return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); 11404 } 11405 11406 /// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, 11407 /// return a DAG expression to select that will generate the same value by 11408 /// multiplying by a magic number. See: 11409 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 11410 SDValue DAGCombiner::BuildSDIV(SDNode *N) { 11411 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 11412 if (!C) 11413 return SDValue(); 11414 11415 // Avoid division by zero. 11416 if (!C->getAPIntValue()) 11417 return SDValue(); 11418 11419 std::vector<SDNode*> Built; 11420 SDValue S = 11421 TLI.BuildSDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 11422 11423 for (SDNode *N : Built) 11424 AddToWorkList(N); 11425 return S; 11426 } 11427 11428 /// BuildUDIV - Given an ISD::UDIV node expressing a divide by constant, 11429 /// return a DAG expression to select that will generate the same value by 11430 /// multiplying by a magic number. See: 11431 /// <http://the.wall.riscom.net/books/proc/ppc/cwg/code2.html> 11432 SDValue DAGCombiner::BuildUDIV(SDNode *N) { 11433 ConstantSDNode *C = isConstOrConstSplat(N->getOperand(1)); 11434 if (!C) 11435 return SDValue(); 11436 11437 // Avoid division by zero. 11438 if (!C->getAPIntValue()) 11439 return SDValue(); 11440 11441 std::vector<SDNode*> Built; 11442 SDValue S = 11443 TLI.BuildUDIV(N, C->getAPIntValue(), DAG, LegalOperations, &Built); 11444 11445 for (SDNode *N : Built) 11446 AddToWorkList(N); 11447 return S; 11448 } 11449 11450 /// FindBaseOffset - Return true if base is a frame index, which is known not 11451 // to alias with anything but itself. Provides base object and offset as 11452 // results. 11453 static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, 11454 const GlobalValue *&GV, const void *&CV) { 11455 // Assume it is a primitive operation. 11456 Base = Ptr; Offset = 0; GV = nullptr; CV = nullptr; 11457 11458 // If it's an adding a simple constant then integrate the offset. 11459 if (Base.getOpcode() == ISD::ADD) { 11460 if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Base.getOperand(1))) { 11461 Base = Base.getOperand(0); 11462 Offset += C->getZExtValue(); 11463 } 11464 } 11465 11466 // Return the underlying GlobalValue, and update the Offset. Return false 11467 // for GlobalAddressSDNode since the same GlobalAddress may be represented 11468 // by multiple nodes with different offsets. 11469 if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Base)) { 11470 GV = G->getGlobal(); 11471 Offset += G->getOffset(); 11472 return false; 11473 } 11474 11475 // Return the underlying Constant value, and update the Offset. Return false 11476 // for ConstantSDNodes since the same constant pool entry may be represented 11477 // by multiple nodes with different offsets. 11478 if (ConstantPoolSDNode *C = dyn_cast<ConstantPoolSDNode>(Base)) { 11479 CV = C->isMachineConstantPoolEntry() ? (const void *)C->getMachineCPVal() 11480 : (const void *)C->getConstVal(); 11481 Offset += C->getOffset(); 11482 return false; 11483 } 11484 // If it's any of the following then it can't alias with anything but itself. 11485 return isa<FrameIndexSDNode>(Base); 11486 } 11487 11488 /// isAlias - Return true if there is any possibility that the two addresses 11489 /// overlap. 11490 bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) const { 11491 // If they are the same then they must be aliases. 11492 if (Op0->getBasePtr() == Op1->getBasePtr()) return true; 11493 11494 // If they are both volatile then they cannot be reordered. 11495 if (Op0->isVolatile() && Op1->isVolatile()) return true; 11496 11497 // Gather base node and offset information. 11498 SDValue Base1, Base2; 11499 int64_t Offset1, Offset2; 11500 const GlobalValue *GV1, *GV2; 11501 const void *CV1, *CV2; 11502 bool isFrameIndex1 = FindBaseOffset(Op0->getBasePtr(), 11503 Base1, Offset1, GV1, CV1); 11504 bool isFrameIndex2 = FindBaseOffset(Op1->getBasePtr(), 11505 Base2, Offset2, GV2, CV2); 11506 11507 // If they have a same base address then check to see if they overlap. 11508 if (Base1 == Base2 || (GV1 && (GV1 == GV2)) || (CV1 && (CV1 == CV2))) 11509 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 11510 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 11511 11512 // It is possible for different frame indices to alias each other, mostly 11513 // when tail call optimization reuses return address slots for arguments. 11514 // To catch this case, look up the actual index of frame indices to compute 11515 // the real alias relationship. 11516 if (isFrameIndex1 && isFrameIndex2) { 11517 MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); 11518 Offset1 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base1)->getIndex()); 11519 Offset2 += MFI->getObjectOffset(cast<FrameIndexSDNode>(Base2)->getIndex()); 11520 return !((Offset1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= Offset2 || 11521 (Offset2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= Offset1); 11522 } 11523 11524 // Otherwise, if we know what the bases are, and they aren't identical, then 11525 // we know they cannot alias. 11526 if ((isFrameIndex1 || CV1 || GV1) && (isFrameIndex2 || CV2 || GV2)) 11527 return false; 11528 11529 // If we know required SrcValue1 and SrcValue2 have relatively large alignment 11530 // compared to the size and offset of the access, we may be able to prove they 11531 // do not alias. This check is conservative for now to catch cases created by 11532 // splitting vector types. 11533 if ((Op0->getOriginalAlignment() == Op1->getOriginalAlignment()) && 11534 (Op0->getSrcValueOffset() != Op1->getSrcValueOffset()) && 11535 (Op0->getMemoryVT().getSizeInBits() >> 3 == 11536 Op1->getMemoryVT().getSizeInBits() >> 3) && 11537 (Op0->getOriginalAlignment() > Op0->getMemoryVT().getSizeInBits()) >> 3) { 11538 int64_t OffAlign1 = Op0->getSrcValueOffset() % Op0->getOriginalAlignment(); 11539 int64_t OffAlign2 = Op1->getSrcValueOffset() % Op1->getOriginalAlignment(); 11540 11541 // There is no overlap between these relatively aligned accesses of similar 11542 // size, return no alias. 11543 if ((OffAlign1 + (Op0->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign2 || 11544 (OffAlign2 + (Op1->getMemoryVT().getSizeInBits() >> 3)) <= OffAlign1) 11545 return false; 11546 } 11547 11548 bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : 11549 TLI.getTargetMachine().getSubtarget<TargetSubtargetInfo>().useAA(); 11550 #ifndef NDEBUG 11551 if (CombinerAAOnlyFunc.getNumOccurrences() && 11552 CombinerAAOnlyFunc != DAG.getMachineFunction().getName()) 11553 UseAA = false; 11554 #endif 11555 if (UseAA && 11556 Op0->getMemOperand()->getValue() && Op1->getMemOperand()->getValue()) { 11557 // Use alias analysis information. 11558 int64_t MinOffset = std::min(Op0->getSrcValueOffset(), 11559 Op1->getSrcValueOffset()); 11560 int64_t Overlap1 = (Op0->getMemoryVT().getSizeInBits() >> 3) + 11561 Op0->getSrcValueOffset() - MinOffset; 11562 int64_t Overlap2 = (Op1->getMemoryVT().getSizeInBits() >> 3) + 11563 Op1->getSrcValueOffset() - MinOffset; 11564 AliasAnalysis::AliasResult AAResult = 11565 AA.alias(AliasAnalysis::Location(Op0->getMemOperand()->getValue(), 11566 Overlap1, 11567 UseTBAA ? Op0->getTBAAInfo() : nullptr), 11568 AliasAnalysis::Location(Op1->getMemOperand()->getValue(), 11569 Overlap2, 11570 UseTBAA ? Op1->getTBAAInfo() : nullptr)); 11571 if (AAResult == AliasAnalysis::NoAlias) 11572 return false; 11573 } 11574 11575 // Otherwise we have to assume they alias. 11576 return true; 11577 } 11578 11579 /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, 11580 /// looking for aliasing nodes and adding them to the Aliases vector. 11581 void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, 11582 SmallVectorImpl<SDValue> &Aliases) { 11583 SmallVector<SDValue, 8> Chains; // List of chains to visit. 11584 SmallPtrSet<SDNode *, 16> Visited; // Visited node set. 11585 11586 // Get alias information for node. 11587 bool IsLoad = isa<LoadSDNode>(N) && !cast<LSBaseSDNode>(N)->isVolatile(); 11588 11589 // Starting off. 11590 Chains.push_back(OriginalChain); 11591 unsigned Depth = 0; 11592 11593 // Look at each chain and determine if it is an alias. If so, add it to the 11594 // aliases list. If not, then continue up the chain looking for the next 11595 // candidate. 11596 while (!Chains.empty()) { 11597 SDValue Chain = Chains.back(); 11598 Chains.pop_back(); 11599 11600 // For TokenFactor nodes, look at each operand and only continue up the 11601 // chain until we find two aliases. If we've seen two aliases, assume we'll 11602 // find more and revert to original chain since the xform is unlikely to be 11603 // profitable. 11604 // 11605 // FIXME: The depth check could be made to return the last non-aliasing 11606 // chain we found before we hit a tokenfactor rather than the original 11607 // chain. 11608 if (Depth > 6 || Aliases.size() == 2) { 11609 Aliases.clear(); 11610 Aliases.push_back(OriginalChain); 11611 return; 11612 } 11613 11614 // Don't bother if we've been before. 11615 if (!Visited.insert(Chain.getNode())) 11616 continue; 11617 11618 switch (Chain.getOpcode()) { 11619 case ISD::EntryToken: 11620 // Entry token is ideal chain operand, but handled in FindBetterChain. 11621 break; 11622 11623 case ISD::LOAD: 11624 case ISD::STORE: { 11625 // Get alias information for Chain. 11626 bool IsOpLoad = isa<LoadSDNode>(Chain.getNode()) && 11627 !cast<LSBaseSDNode>(Chain.getNode())->isVolatile(); 11628 11629 // If chain is alias then stop here. 11630 if (!(IsLoad && IsOpLoad) && 11631 isAlias(cast<LSBaseSDNode>(N), cast<LSBaseSDNode>(Chain.getNode()))) { 11632 Aliases.push_back(Chain); 11633 } else { 11634 // Look further up the chain. 11635 Chains.push_back(Chain.getOperand(0)); 11636 ++Depth; 11637 } 11638 break; 11639 } 11640 11641 case ISD::TokenFactor: 11642 // We have to check each of the operands of the token factor for "small" 11643 // token factors, so we queue them up. Adding the operands to the queue 11644 // (stack) in reverse order maintains the original order and increases the 11645 // likelihood that getNode will find a matching token factor (CSE.) 11646 if (Chain.getNumOperands() > 16) { 11647 Aliases.push_back(Chain); 11648 break; 11649 } 11650 for (unsigned n = Chain.getNumOperands(); n;) 11651 Chains.push_back(Chain.getOperand(--n)); 11652 ++Depth; 11653 break; 11654 11655 default: 11656 // For all other instructions we will just have to take what we can get. 11657 Aliases.push_back(Chain); 11658 break; 11659 } 11660 } 11661 11662 // We need to be careful here to also search for aliases through the 11663 // value operand of a store, etc. Consider the following situation: 11664 // Token1 = ... 11665 // L1 = load Token1, %52 11666 // S1 = store Token1, L1, %51 11667 // L2 = load Token1, %52+8 11668 // S2 = store Token1, L2, %51+8 11669 // Token2 = Token(S1, S2) 11670 // L3 = load Token2, %53 11671 // S3 = store Token2, L3, %52 11672 // L4 = load Token2, %53+8 11673 // S4 = store Token2, L4, %52+8 11674 // If we search for aliases of S3 (which loads address %52), and we look 11675 // only through the chain, then we'll miss the trivial dependence on L1 11676 // (which also loads from %52). We then might change all loads and 11677 // stores to use Token1 as their chain operand, which could result in 11678 // copying %53 into %52 before copying %52 into %51 (which should 11679 // happen first). 11680 // 11681 // The problem is, however, that searching for such data dependencies 11682 // can become expensive, and the cost is not directly related to the 11683 // chain depth. Instead, we'll rule out such configurations here by 11684 // insisting that we've visited all chain users (except for users 11685 // of the original chain, which is not necessary). When doing this, 11686 // we need to look through nodes we don't care about (otherwise, things 11687 // like register copies will interfere with trivial cases). 11688 11689 SmallVector<const SDNode *, 16> Worklist; 11690 for (SmallPtrSet<SDNode *, 16>::iterator I = Visited.begin(), 11691 IE = Visited.end(); I != IE; ++I) 11692 if (*I != OriginalChain.getNode()) 11693 Worklist.push_back(*I); 11694 11695 while (!Worklist.empty()) { 11696 const SDNode *M = Worklist.pop_back_val(); 11697 11698 // We have already visited M, and want to make sure we've visited any uses 11699 // of M that we care about. For uses that we've not visisted, and don't 11700 // care about, queue them to the worklist. 11701 11702 for (SDNode::use_iterator UI = M->use_begin(), 11703 UIE = M->use_end(); UI != UIE; ++UI) 11704 if (UI.getUse().getValueType() == MVT::Other && Visited.insert(*UI)) { 11705 if (isa<MemIntrinsicSDNode>(*UI) || isa<MemSDNode>(*UI)) { 11706 // We've not visited this use, and we care about it (it could have an 11707 // ordering dependency with the original node). 11708 Aliases.clear(); 11709 Aliases.push_back(OriginalChain); 11710 return; 11711 } 11712 11713 // We've not visited this use, but we don't care about it. Mark it as 11714 // visited and enqueue it to the worklist. 11715 Worklist.push_back(*UI); 11716 } 11717 } 11718 } 11719 11720 /// FindBetterChain - Walk up chain skipping non-aliasing memory nodes, looking 11721 /// for a better chain (aliasing node.) 11722 SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { 11723 SmallVector<SDValue, 8> Aliases; // Ops for replacing token factor. 11724 11725 // Accumulate all the aliases to this node. 11726 GatherAllAliases(N, OldChain, Aliases); 11727 11728 // If no operands then chain to entry token. 11729 if (Aliases.size() == 0) 11730 return DAG.getEntryNode(); 11731 11732 // If a single operand then chain to it. We don't need to revisit it. 11733 if (Aliases.size() == 1) 11734 return Aliases[0]; 11735 11736 // Construct a custom tailored token factor. 11737 return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Aliases); 11738 } 11739 11740 // SelectionDAG::Combine - This is the entry point for the file. 11741 // 11742 void SelectionDAG::Combine(CombineLevel Level, AliasAnalysis &AA, 11743 CodeGenOpt::Level OptLevel) { 11744 /// run - This is the main entry point to this class. 11745 /// 11746 DAGCombiner(*this, AA, OptLevel).Run(Level); 11747 } 11748