1 //===- InstCombineAddSub.cpp ----------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the visit functions for add, fadd, sub, and fsub. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "InstCombine.h" 15 #include "llvm/Analysis/InstructionSimplify.h" 16 #include "llvm/IR/DataLayout.h" 17 #include "llvm/Support/GetElementPtrTypeIterator.h" 18 #include "llvm/Support/PatternMatch.h" 19 using namespace llvm; 20 using namespace PatternMatch; 21 22 namespace { 23 24 /// Class representing coefficient of floating-point addend. 25 /// This class needs to be highly efficient, which is especially true for 26 /// the constructor. As of I write this comment, the cost of the default 27 /// constructor is merely 4-byte-store-zero (Assuming compiler is able to 28 /// perform write-merging). 29 /// 30 class FAddendCoef { 31 public: 32 // The constructor has to initialize a APFloat, which is uncessary for 33 // most addends which have coefficient either 1 or -1. So, the constructor 34 // is expensive. In order to avoid the cost of the constructor, we should 35 // reuse some instances whenever possible. The pre-created instances 36 // FAddCombine::Add[0-5] embodies this idea. 37 // 38 FAddendCoef() : IsFp(false), BufHasFpVal(false), IntVal(0) {} 39 ~FAddendCoef(); 40 41 void set(short C) { 42 assert(!insaneIntVal(C) && "Insane coefficient"); 43 IsFp = false; IntVal = C; 44 } 45 46 void set(const APFloat& C); 47 48 void negate(); 49 50 bool isZero() const { return isInt() ? !IntVal : getFpVal().isZero(); } 51 Value *getValue(Type *) const; 52 53 // If possible, don't define operator+/operator- etc because these 54 // operators inevitably call FAddendCoef's constructor which is not cheap. 55 void operator=(const FAddendCoef &A); 56 void operator+=(const FAddendCoef &A); 57 void operator-=(const FAddendCoef &A); 58 void operator*=(const FAddendCoef &S); 59 60 bool isOne() const { return isInt() && IntVal == 1; } 61 bool isTwo() const { return isInt() && IntVal == 2; } 62 bool isMinusOne() const { return isInt() && IntVal == -1; } 63 bool isMinusTwo() const { return isInt() && IntVal == -2; } 64 65 private: 66 bool insaneIntVal(int V) { return V > 4 || V < -4; } 67 APFloat *getFpValPtr(void) 68 { return reinterpret_cast<APFloat*>(&FpValBuf.buffer[0]); } 69 const APFloat *getFpValPtr(void) const 70 { return reinterpret_cast<const APFloat*>(&FpValBuf.buffer[0]); } 71 72 const APFloat &getFpVal(void) const { 73 assert(IsFp && BufHasFpVal && "Incorret state"); 74 return *getFpValPtr(); 75 } 76 77 APFloat &getFpVal(void) 78 { assert(IsFp && BufHasFpVal && "Incorret state"); return *getFpValPtr(); } 79 80 bool isInt() const { return !IsFp; } 81 82 private: 83 84 bool IsFp; 85 86 // True iff FpValBuf contains an instance of APFloat. 87 bool BufHasFpVal; 88 89 // The integer coefficient of an individual addend is either 1 or -1, 90 // and we try to simplify at most 4 addends from neighboring at most 91 // two instructions. So the range of <IntVal> falls in [-4, 4]. APInt 92 // is overkill of this end. 93 short IntVal; 94 95 AlignedCharArrayUnion<APFloat> FpValBuf; 96 }; 97 98 /// FAddend is used to represent floating-point addend. An addend is 99 /// represented as <C, V>, where the V is a symbolic value, and C is a 100 /// constant coefficient. A constant addend is represented as <C, 0>. 101 /// 102 class FAddend { 103 public: 104 FAddend() { Val = 0; } 105 106 Value *getSymVal (void) const { return Val; } 107 const FAddendCoef &getCoef(void) const { return Coeff; } 108 109 bool isConstant() const { return Val == 0; } 110 bool isZero() const { return Coeff.isZero(); } 111 112 void set(short Coefficient, Value *V) { Coeff.set(Coefficient), Val = V; } 113 void set(const APFloat& Coefficient, Value *V) 114 { Coeff.set(Coefficient); Val = V; } 115 void set(const ConstantFP* Coefficient, Value *V) 116 { Coeff.set(Coefficient->getValueAPF()); Val = V; } 117 118 void negate() { Coeff.negate(); } 119 120 /// Drill down the U-D chain one step to find the definition of V, and 121 /// try to break the definition into one or two addends. 122 static unsigned drillValueDownOneStep(Value* V, FAddend &A0, FAddend &A1); 123 124 /// Similar to FAddend::drillDownOneStep() except that the value being 125 /// splitted is the addend itself. 126 unsigned drillAddendDownOneStep(FAddend &Addend0, FAddend &Addend1) const; 127 128 void operator+=(const FAddend &T) { 129 assert((Val == T.Val) && "Symbolic-values disagree"); 130 Coeff += T.Coeff; 131 } 132 133 private: 134 void Scale(const FAddendCoef& ScaleAmt) { Coeff *= ScaleAmt; } 135 136 // This addend has the value of "Coeff * Val". 137 Value *Val; 138 FAddendCoef Coeff; 139 }; 140 141 /// FAddCombine is the class for optimizing an unsafe fadd/fsub along 142 /// with its neighboring at most two instructions. 143 /// 144 class FAddCombine { 145 public: 146 FAddCombine(InstCombiner::BuilderTy *B) : Builder(B), Instr(0) {} 147 Value *simplify(Instruction *FAdd); 148 149 private: 150 typedef SmallVector<const FAddend*, 4> AddendVect; 151 152 Value *simplifyFAdd(AddendVect& V, unsigned InstrQuota); 153 154 Value *performFactorization(Instruction *I); 155 156 /// Convert given addend to a Value 157 Value *createAddendVal(const FAddend &A, bool& NeedNeg); 158 159 /// Return the number of instructions needed to emit the N-ary addition. 160 unsigned calcInstrNumber(const AddendVect& Vect); 161 Value *createFSub(Value *Opnd0, Value *Opnd1); 162 Value *createFAdd(Value *Opnd0, Value *Opnd1); 163 Value *createFMul(Value *Opnd0, Value *Opnd1); 164 Value *createFDiv(Value *Opnd0, Value *Opnd1); 165 Value *createFNeg(Value *V); 166 Value *createNaryFAdd(const AddendVect& Opnds, unsigned InstrQuota); 167 void createInstPostProc(Instruction *NewInst); 168 169 InstCombiner::BuilderTy *Builder; 170 Instruction *Instr; 171 172 private: 173 // Debugging stuff are clustered here. 174 #ifndef NDEBUG 175 unsigned CreateInstrNum; 176 void initCreateInstNum() { CreateInstrNum = 0; } 177 void incCreateInstNum() { CreateInstrNum++; } 178 #else 179 void initCreateInstNum() {} 180 void incCreateInstNum() {} 181 #endif 182 }; 183 } 184 185 //===----------------------------------------------------------------------===// 186 // 187 // Implementation of 188 // {FAddendCoef, FAddend, FAddition, FAddCombine}. 189 // 190 //===----------------------------------------------------------------------===// 191 FAddendCoef::~FAddendCoef() { 192 if (BufHasFpVal) 193 getFpValPtr()->~APFloat(); 194 } 195 196 void FAddendCoef::set(const APFloat& C) { 197 APFloat *P = getFpValPtr(); 198 199 if (isInt()) { 200 // As the buffer is meanless byte stream, we cannot call 201 // APFloat::operator=(). 202 new(P) APFloat(C); 203 } else 204 *P = C; 205 206 IsFp = BufHasFpVal = true; 207 } 208 209 void FAddendCoef::operator=(const FAddendCoef& That) { 210 if (That.isInt()) 211 set(That.IntVal); 212 else 213 set(That.getFpVal()); 214 } 215 216 void FAddendCoef::operator+=(const FAddendCoef &That) { 217 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 218 if (isInt() == That.isInt()) { 219 if (isInt()) 220 IntVal += That.IntVal; 221 else 222 getFpVal().add(That.getFpVal(), RndMode); 223 return; 224 } 225 226 if (isInt()) { 227 const APFloat &T = That.getFpVal(); 228 set(T); 229 getFpVal().add(APFloat(T.getSemantics(), IntVal), RndMode); 230 return; 231 } 232 233 APFloat &T = getFpVal(); 234 T.add(APFloat(T.getSemantics(), That.IntVal), RndMode); 235 } 236 237 void FAddendCoef::operator-=(const FAddendCoef &That) { 238 enum APFloat::roundingMode RndMode = APFloat::rmNearestTiesToEven; 239 if (isInt() == That.isInt()) { 240 if (isInt()) 241 IntVal -= That.IntVal; 242 else 243 getFpVal().subtract(That.getFpVal(), RndMode); 244 return; 245 } 246 247 if (isInt()) { 248 const APFloat &T = That.getFpVal(); 249 set(T); 250 getFpVal().subtract(APFloat(T.getSemantics(), IntVal), RndMode); 251 return; 252 } 253 254 APFloat &T = getFpVal(); 255 T.subtract(APFloat(T.getSemantics(), IntVal), RndMode); 256 } 257 258 void FAddendCoef::operator*=(const FAddendCoef &That) { 259 if (That.isOne()) 260 return; 261 262 if (That.isMinusOne()) { 263 negate(); 264 return; 265 } 266 267 if (isInt() && That.isInt()) { 268 int Res = IntVal * (int)That.IntVal; 269 assert(!insaneIntVal(Res) && "Insane int value"); 270 IntVal = Res; 271 return; 272 } 273 274 const fltSemantics &Semantic = 275 isInt() ? That.getFpVal().getSemantics() : getFpVal().getSemantics(); 276 277 if (isInt()) 278 set(APFloat(Semantic, IntVal)); 279 APFloat &F0 = getFpVal(); 280 281 if (That.isInt()) 282 F0.multiply(APFloat(Semantic, That.IntVal), APFloat::rmNearestTiesToEven); 283 else 284 F0.multiply(That.getFpVal(), APFloat::rmNearestTiesToEven); 285 286 return; 287 } 288 289 void FAddendCoef::negate() { 290 if (isInt()) 291 IntVal = 0 - IntVal; 292 else 293 getFpVal().changeSign(); 294 } 295 296 Value *FAddendCoef::getValue(Type *Ty) const { 297 return isInt() ? 298 ConstantFP::get(Ty, float(IntVal)) : 299 ConstantFP::get(Ty->getContext(), getFpVal()); 300 } 301 302 // The definition of <Val> Addends 303 // ========================================= 304 // A + B <1, A>, <1,B> 305 // A - B <1, A>, <1,B> 306 // 0 - B <-1, B> 307 // C * A, <C, A> 308 // A + C <1, A> <C, NULL> 309 // 0 +/- 0 <0, NULL> (corner case) 310 // 311 // Legend: A and B are not constant, C is constant 312 // 313 unsigned FAddend::drillValueDownOneStep 314 (Value *Val, FAddend &Addend0, FAddend &Addend1) { 315 Instruction *I = 0; 316 if (Val == 0 || !(I = dyn_cast<Instruction>(Val))) 317 return 0; 318 319 unsigned Opcode = I->getOpcode(); 320 321 if (Opcode == Instruction::FAdd || Opcode == Instruction::FSub) { 322 ConstantFP *C0, *C1; 323 Value *Opnd0 = I->getOperand(0); 324 Value *Opnd1 = I->getOperand(1); 325 if ((C0 = dyn_cast<ConstantFP>(Opnd0)) && C0->isZero()) 326 Opnd0 = 0; 327 328 if ((C1 = dyn_cast<ConstantFP>(Opnd1)) && C1->isZero()) 329 Opnd1 = 0; 330 331 if (Opnd0) { 332 if (!C0) 333 Addend0.set(1, Opnd0); 334 else 335 Addend0.set(C0, 0); 336 } 337 338 if (Opnd1) { 339 FAddend &Addend = Opnd0 ? Addend1 : Addend0; 340 if (!C1) 341 Addend.set(1, Opnd1); 342 else 343 Addend.set(C1, 0); 344 if (Opcode == Instruction::FSub) 345 Addend.negate(); 346 } 347 348 if (Opnd0 || Opnd1) 349 return Opnd0 && Opnd1 ? 2 : 1; 350 351 // Both operands are zero. Weird! 352 Addend0.set(APFloat(C0->getValueAPF().getSemantics()), 0); 353 return 1; 354 } 355 356 if (I->getOpcode() == Instruction::FMul) { 357 Value *V0 = I->getOperand(0); 358 Value *V1 = I->getOperand(1); 359 if (ConstantFP *C = dyn_cast<ConstantFP>(V0)) { 360 Addend0.set(C, V1); 361 return 1; 362 } 363 364 if (ConstantFP *C = dyn_cast<ConstantFP>(V1)) { 365 Addend0.set(C, V0); 366 return 1; 367 } 368 } 369 370 return 0; 371 } 372 373 // Try to break *this* addend into two addends. e.g. Suppose this addend is 374 // <2.3, V>, and V = X + Y, by calling this function, we obtain two addends, 375 // i.e. <2.3, X> and <2.3, Y>. 376 // 377 unsigned FAddend::drillAddendDownOneStep 378 (FAddend &Addend0, FAddend &Addend1) const { 379 if (isConstant()) 380 return 0; 381 382 unsigned BreakNum = FAddend::drillValueDownOneStep(Val, Addend0, Addend1); 383 if (!BreakNum || Coeff.isOne()) 384 return BreakNum; 385 386 Addend0.Scale(Coeff); 387 388 if (BreakNum == 2) 389 Addend1.Scale(Coeff); 390 391 return BreakNum; 392 } 393 394 // Try to perform following optimization on the input instruction I. Return the 395 // simplified expression if was successful; otherwise, return 0. 396 // 397 // Instruction "I" is Simplified into 398 // ------------------------------------------------------- 399 // (x * y) +/- (x * z) x * (y +/- z) 400 // (y / x) +/- (z / x) (y +/- z) / x 401 // 402 Value *FAddCombine::performFactorization(Instruction *I) { 403 assert((I->getOpcode() == Instruction::FAdd || 404 I->getOpcode() == Instruction::FSub) && "Expect add/sub"); 405 406 Instruction *I0 = dyn_cast<Instruction>(I->getOperand(0)); 407 Instruction *I1 = dyn_cast<Instruction>(I->getOperand(1)); 408 409 if (!I0 || !I1 || I0->getOpcode() != I1->getOpcode()) 410 return 0; 411 412 bool isMpy = false; 413 if (I0->getOpcode() == Instruction::FMul) 414 isMpy = true; 415 else if (I0->getOpcode() != Instruction::FDiv) 416 return 0; 417 418 Value *Opnd0_0 = I0->getOperand(0); 419 Value *Opnd0_1 = I0->getOperand(1); 420 Value *Opnd1_0 = I1->getOperand(0); 421 Value *Opnd1_1 = I1->getOperand(1); 422 423 // Input Instr I Factor AddSub0 AddSub1 424 // ---------------------------------------------- 425 // (x*y) +/- (x*z) x y z 426 // (y/x) +/- (z/x) x y z 427 // 428 Value *Factor = 0; 429 Value *AddSub0 = 0, *AddSub1 = 0; 430 431 if (isMpy) { 432 if (Opnd0_0 == Opnd1_0 || Opnd0_0 == Opnd1_1) 433 Factor = Opnd0_0; 434 else if (Opnd0_1 == Opnd1_0 || Opnd0_1 == Opnd1_1) 435 Factor = Opnd0_1; 436 437 if (Factor) { 438 AddSub0 = (Factor == Opnd0_0) ? Opnd0_1 : Opnd0_0; 439 AddSub1 = (Factor == Opnd1_0) ? Opnd1_1 : Opnd1_0; 440 } 441 } else if (Opnd0_1 == Opnd1_1) { 442 Factor = Opnd0_1; 443 AddSub0 = Opnd0_0; 444 AddSub1 = Opnd1_0; 445 } 446 447 if (!Factor) 448 return 0; 449 450 // Create expression "NewAddSub = AddSub0 +/- AddsSub1" 451 Value *NewAddSub = (I->getOpcode() == Instruction::FAdd) ? 452 createFAdd(AddSub0, AddSub1) : 453 createFSub(AddSub0, AddSub1); 454 if (ConstantFP *CFP = dyn_cast<ConstantFP>(NewAddSub)) { 455 const APFloat &F = CFP->getValueAPF(); 456 if (!F.isNormal() || F.isDenormal()) 457 return 0; 458 } 459 460 if (isMpy) 461 return createFMul(Factor, NewAddSub); 462 463 return createFDiv(NewAddSub, Factor); 464 } 465 466 Value *FAddCombine::simplify(Instruction *I) { 467 assert(I->hasUnsafeAlgebra() && "Should be in unsafe mode"); 468 469 // Currently we are not able to handle vector type. 470 if (I->getType()->isVectorTy()) 471 return 0; 472 473 assert((I->getOpcode() == Instruction::FAdd || 474 I->getOpcode() == Instruction::FSub) && "Expect add/sub"); 475 476 // Save the instruction before calling other member-functions. 477 Instr = I; 478 479 FAddend Opnd0, Opnd1, Opnd0_0, Opnd0_1, Opnd1_0, Opnd1_1; 480 481 unsigned OpndNum = FAddend::drillValueDownOneStep(I, Opnd0, Opnd1); 482 483 // Step 1: Expand the 1st addend into Opnd0_0 and Opnd0_1. 484 unsigned Opnd0_ExpNum = 0; 485 unsigned Opnd1_ExpNum = 0; 486 487 if (!Opnd0.isConstant()) 488 Opnd0_ExpNum = Opnd0.drillAddendDownOneStep(Opnd0_0, Opnd0_1); 489 490 // Step 2: Expand the 2nd addend into Opnd1_0 and Opnd1_1. 491 if (OpndNum == 2 && !Opnd1.isConstant()) 492 Opnd1_ExpNum = Opnd1.drillAddendDownOneStep(Opnd1_0, Opnd1_1); 493 494 // Step 3: Try to optimize Opnd0_0 + Opnd0_1 + Opnd1_0 + Opnd1_1 495 if (Opnd0_ExpNum && Opnd1_ExpNum) { 496 AddendVect AllOpnds; 497 AllOpnds.push_back(&Opnd0_0); 498 AllOpnds.push_back(&Opnd1_0); 499 if (Opnd0_ExpNum == 2) 500 AllOpnds.push_back(&Opnd0_1); 501 if (Opnd1_ExpNum == 2) 502 AllOpnds.push_back(&Opnd1_1); 503 504 // Compute instruction quota. We should save at least one instruction. 505 unsigned InstQuota = 0; 506 507 Value *V0 = I->getOperand(0); 508 Value *V1 = I->getOperand(1); 509 InstQuota = ((!isa<Constant>(V0) && V0->hasOneUse()) && 510 (!isa<Constant>(V1) && V1->hasOneUse())) ? 2 : 1; 511 512 if (Value *R = simplifyFAdd(AllOpnds, InstQuota)) 513 return R; 514 } 515 516 if (OpndNum != 2) { 517 // The input instruction is : "I=0.0 +/- V". If the "V" were able to be 518 // splitted into two addends, say "V = X - Y", the instruction would have 519 // been optimized into "I = Y - X" in the previous steps. 520 // 521 const FAddendCoef &CE = Opnd0.getCoef(); 522 return CE.isOne() ? Opnd0.getSymVal() : 0; 523 } 524 525 // step 4: Try to optimize Opnd0 + Opnd1_0 [+ Opnd1_1] 526 if (Opnd1_ExpNum) { 527 AddendVect AllOpnds; 528 AllOpnds.push_back(&Opnd0); 529 AllOpnds.push_back(&Opnd1_0); 530 if (Opnd1_ExpNum == 2) 531 AllOpnds.push_back(&Opnd1_1); 532 533 if (Value *R = simplifyFAdd(AllOpnds, 1)) 534 return R; 535 } 536 537 // step 5: Try to optimize Opnd1 + Opnd0_0 [+ Opnd0_1] 538 if (Opnd0_ExpNum) { 539 AddendVect AllOpnds; 540 AllOpnds.push_back(&Opnd1); 541 AllOpnds.push_back(&Opnd0_0); 542 if (Opnd0_ExpNum == 2) 543 AllOpnds.push_back(&Opnd0_1); 544 545 if (Value *R = simplifyFAdd(AllOpnds, 1)) 546 return R; 547 } 548 549 // step 6: Try factorization as the last resort, 550 return performFactorization(I); 551 } 552 553 Value *FAddCombine::simplifyFAdd(AddendVect& Addends, unsigned InstrQuota) { 554 555 unsigned AddendNum = Addends.size(); 556 assert(AddendNum <= 4 && "Too many addends"); 557 558 // For saving intermediate results; 559 unsigned NextTmpIdx = 0; 560 FAddend TmpResult[3]; 561 562 // Points to the constant addend of the resulting simplified expression. 563 // If the resulting expr has constant-addend, this constant-addend is 564 // desirable to reside at the top of the resulting expression tree. Placing 565 // constant close to supper-expr(s) will potentially reveal some optimization 566 // opportunities in super-expr(s). 567 // 568 const FAddend *ConstAdd = 0; 569 570 // Simplified addends are placed <SimpVect>. 571 AddendVect SimpVect; 572 573 // The outer loop works on one symbolic-value at a time. Suppose the input 574 // addends are : <a1, x>, <b1, y>, <a2, x>, <c1, z>, <b2, y>, ... 575 // The symbolic-values will be processed in this order: x, y, z. 576 // 577 for (unsigned SymIdx = 0; SymIdx < AddendNum; SymIdx++) { 578 579 const FAddend *ThisAddend = Addends[SymIdx]; 580 if (!ThisAddend) { 581 // This addend was processed before. 582 continue; 583 } 584 585 Value *Val = ThisAddend->getSymVal(); 586 unsigned StartIdx = SimpVect.size(); 587 SimpVect.push_back(ThisAddend); 588 589 // The inner loop collects addends sharing same symbolic-value, and these 590 // addends will be later on folded into a single addend. Following above 591 // example, if the symbolic value "y" is being processed, the inner loop 592 // will collect two addends "<b1,y>" and "<b2,Y>". These two addends will 593 // be later on folded into "<b1+b2, y>". 594 // 595 for (unsigned SameSymIdx = SymIdx + 1; 596 SameSymIdx < AddendNum; SameSymIdx++) { 597 const FAddend *T = Addends[SameSymIdx]; 598 if (T && T->getSymVal() == Val) { 599 // Set null such that next iteration of the outer loop will not process 600 // this addend again. 601 Addends[SameSymIdx] = 0; 602 SimpVect.push_back(T); 603 } 604 } 605 606 // If multiple addends share same symbolic value, fold them together. 607 if (StartIdx + 1 != SimpVect.size()) { 608 FAddend &R = TmpResult[NextTmpIdx ++]; 609 R = *SimpVect[StartIdx]; 610 for (unsigned Idx = StartIdx + 1; Idx < SimpVect.size(); Idx++) 611 R += *SimpVect[Idx]; 612 613 // Pop all addends being folded and push the resulting folded addend. 614 SimpVect.resize(StartIdx); 615 if (Val != 0) { 616 if (!R.isZero()) { 617 SimpVect.push_back(&R); 618 } 619 } else { 620 // Don't push constant addend at this time. It will be the last element 621 // of <SimpVect>. 622 ConstAdd = &R; 623 } 624 } 625 } 626 627 assert((NextTmpIdx <= sizeof(TmpResult)/sizeof(TmpResult[0]) + 1) && 628 "out-of-bound access"); 629 630 if (ConstAdd) 631 SimpVect.push_back(ConstAdd); 632 633 Value *Result; 634 if (!SimpVect.empty()) 635 Result = createNaryFAdd(SimpVect, InstrQuota); 636 else { 637 // The addition is folded to 0.0. 638 Result = ConstantFP::get(Instr->getType(), 0.0); 639 } 640 641 return Result; 642 } 643 644 Value *FAddCombine::createNaryFAdd 645 (const AddendVect &Opnds, unsigned InstrQuota) { 646 assert(!Opnds.empty() && "Expect at least one addend"); 647 648 // Step 1: Check if the # of instructions needed exceeds the quota. 649 // 650 unsigned InstrNeeded = calcInstrNumber(Opnds); 651 if (InstrNeeded > InstrQuota) 652 return 0; 653 654 initCreateInstNum(); 655 656 // step 2: Emit the N-ary addition. 657 // Note that at most three instructions are involved in Fadd-InstCombine: the 658 // addition in question, and at most two neighboring instructions. 659 // The resulting optimized addition should have at least one less instruction 660 // than the original addition expression tree. This implies that the resulting 661 // N-ary addition has at most two instructions, and we don't need to worry 662 // about tree-height when constructing the N-ary addition. 663 664 Value *LastVal = 0; 665 bool LastValNeedNeg = false; 666 667 // Iterate the addends, creating fadd/fsub using adjacent two addends. 668 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 669 I != E; I++) { 670 bool NeedNeg; 671 Value *V = createAddendVal(**I, NeedNeg); 672 if (!LastVal) { 673 LastVal = V; 674 LastValNeedNeg = NeedNeg; 675 continue; 676 } 677 678 if (LastValNeedNeg == NeedNeg) { 679 LastVal = createFAdd(LastVal, V); 680 continue; 681 } 682 683 if (LastValNeedNeg) 684 LastVal = createFSub(V, LastVal); 685 else 686 LastVal = createFSub(LastVal, V); 687 688 LastValNeedNeg = false; 689 } 690 691 if (LastValNeedNeg) { 692 LastVal = createFNeg(LastVal); 693 } 694 695 #ifndef NDEBUG 696 assert(CreateInstrNum == InstrNeeded && 697 "Inconsistent in instruction numbers"); 698 #endif 699 700 return LastVal; 701 } 702 703 Value *FAddCombine::createFSub 704 (Value *Opnd0, Value *Opnd1) { 705 Value *V = Builder->CreateFSub(Opnd0, Opnd1); 706 if (Instruction *I = dyn_cast<Instruction>(V)) 707 createInstPostProc(I); 708 return V; 709 } 710 711 Value *FAddCombine::createFNeg(Value *V) { 712 Value *Zero = cast<Value>(ConstantFP::get(V->getType(), 0.0)); 713 return createFSub(Zero, V); 714 } 715 716 Value *FAddCombine::createFAdd 717 (Value *Opnd0, Value *Opnd1) { 718 Value *V = Builder->CreateFAdd(Opnd0, Opnd1); 719 if (Instruction *I = dyn_cast<Instruction>(V)) 720 createInstPostProc(I); 721 return V; 722 } 723 724 Value *FAddCombine::createFMul(Value *Opnd0, Value *Opnd1) { 725 Value *V = Builder->CreateFMul(Opnd0, Opnd1); 726 if (Instruction *I = dyn_cast<Instruction>(V)) 727 createInstPostProc(I); 728 return V; 729 } 730 731 Value *FAddCombine::createFDiv(Value *Opnd0, Value *Opnd1) { 732 Value *V = Builder->CreateFDiv(Opnd0, Opnd1); 733 if (Instruction *I = dyn_cast<Instruction>(V)) 734 createInstPostProc(I); 735 return V; 736 } 737 738 void FAddCombine::createInstPostProc(Instruction *NewInstr) { 739 NewInstr->setDebugLoc(Instr->getDebugLoc()); 740 741 // Keep track of the number of instruction created. 742 incCreateInstNum(); 743 744 // Propagate fast-math flags 745 NewInstr->setFastMathFlags(Instr->getFastMathFlags()); 746 } 747 748 // Return the number of instruction needed to emit the N-ary addition. 749 // NOTE: Keep this function in sync with createAddendVal(). 750 unsigned FAddCombine::calcInstrNumber(const AddendVect &Opnds) { 751 unsigned OpndNum = Opnds.size(); 752 unsigned InstrNeeded = OpndNum - 1; 753 754 // The number of addends in the form of "(-1)*x". 755 unsigned NegOpndNum = 0; 756 757 // Adjust the number of instructions needed to emit the N-ary add. 758 for (AddendVect::const_iterator I = Opnds.begin(), E = Opnds.end(); 759 I != E; I++) { 760 const FAddend *Opnd = *I; 761 if (Opnd->isConstant()) 762 continue; 763 764 const FAddendCoef &CE = Opnd->getCoef(); 765 if (CE.isMinusOne() || CE.isMinusTwo()) 766 NegOpndNum++; 767 768 // Let the addend be "c * x". If "c == +/-1", the value of the addend 769 // is immediately available; otherwise, it needs exactly one instruction 770 // to evaluate the value. 771 if (!CE.isMinusOne() && !CE.isOne()) 772 InstrNeeded++; 773 } 774 if (NegOpndNum == OpndNum) 775 InstrNeeded++; 776 return InstrNeeded; 777 } 778 779 // Input Addend Value NeedNeg(output) 780 // ================================================================ 781 // Constant C C false 782 // <+/-1, V> V coefficient is -1 783 // <2/-2, V> "fadd V, V" coefficient is -2 784 // <C, V> "fmul V, C" false 785 // 786 // NOTE: Keep this function in sync with FAddCombine::calcInstrNumber. 787 Value *FAddCombine::createAddendVal 788 (const FAddend &Opnd, bool &NeedNeg) { 789 const FAddendCoef &Coeff = Opnd.getCoef(); 790 791 if (Opnd.isConstant()) { 792 NeedNeg = false; 793 return Coeff.getValue(Instr->getType()); 794 } 795 796 Value *OpndVal = Opnd.getSymVal(); 797 798 if (Coeff.isMinusOne() || Coeff.isOne()) { 799 NeedNeg = Coeff.isMinusOne(); 800 return OpndVal; 801 } 802 803 if (Coeff.isTwo() || Coeff.isMinusTwo()) { 804 NeedNeg = Coeff.isMinusTwo(); 805 return createFAdd(OpndVal, OpndVal); 806 } 807 808 NeedNeg = false; 809 return createFMul(OpndVal, Coeff.getValue(Instr->getType())); 810 } 811 812 /// AddOne - Add one to a ConstantInt. 813 static Constant *AddOne(Constant *C) { 814 return ConstantExpr::getAdd(C, ConstantInt::get(C->getType(), 1)); 815 } 816 817 /// SubOne - Subtract one from a ConstantInt. 818 static Constant *SubOne(ConstantInt *C) { 819 return ConstantInt::get(C->getContext(), C->getValue()-1); 820 } 821 822 823 // dyn_castFoldableMul - If this value is a multiply that can be folded into 824 // other computations (because it has a constant operand), return the 825 // non-constant operand of the multiply, and set CST to point to the multiplier. 826 // Otherwise, return null. 827 // 828 static inline Value *dyn_castFoldableMul(Value *V, ConstantInt *&CST) { 829 if (!V->hasOneUse() || !V->getType()->isIntegerTy()) 830 return 0; 831 832 Instruction *I = dyn_cast<Instruction>(V); 833 if (I == 0) return 0; 834 835 if (I->getOpcode() == Instruction::Mul) 836 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) 837 return I->getOperand(0); 838 if (I->getOpcode() == Instruction::Shl) 839 if ((CST = dyn_cast<ConstantInt>(I->getOperand(1)))) { 840 // The multiplier is really 1 << CST. 841 uint32_t BitWidth = cast<IntegerType>(V->getType())->getBitWidth(); 842 uint32_t CSTVal = CST->getLimitedValue(BitWidth); 843 CST = ConstantInt::get(V->getType()->getContext(), 844 APInt(BitWidth, 1).shl(CSTVal)); 845 return I->getOperand(0); 846 } 847 return 0; 848 } 849 850 851 /// WillNotOverflowSignedAdd - Return true if we can prove that: 852 /// (sext (add LHS, RHS)) === (add (sext LHS), (sext RHS)) 853 /// This basically requires proving that the add in the original type would not 854 /// overflow to change the sign bit or have a carry out. 855 bool InstCombiner::WillNotOverflowSignedAdd(Value *LHS, Value *RHS) { 856 // There are different heuristics we can use for this. Here are some simple 857 // ones. 858 859 // Add has the property that adding any two 2's complement numbers can only 860 // have one carry bit which can change a sign. As such, if LHS and RHS each 861 // have at least two sign bits, we know that the addition of the two values 862 // will sign extend fine. 863 if (ComputeNumSignBits(LHS) > 1 && ComputeNumSignBits(RHS) > 1) 864 return true; 865 866 867 // If one of the operands only has one non-zero bit, and if the other operand 868 // has a known-zero bit in a more significant place than it (not including the 869 // sign bit) the ripple may go up to and fill the zero, but won't change the 870 // sign. For example, (X & ~4) + 1. 871 872 // TODO: Implement. 873 874 return false; 875 } 876 877 Instruction *InstCombiner::visitAdd(BinaryOperator &I) { 878 bool Changed = SimplifyAssociativeOrCommutative(I); 879 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 880 881 if (Value *V = SimplifyAddInst(LHS, RHS, I.hasNoSignedWrap(), 882 I.hasNoUnsignedWrap(), TD)) 883 return ReplaceInstUsesWith(I, V); 884 885 // (A*B)+(A*C) -> A*(B+C) etc 886 if (Value *V = SimplifyUsingDistributiveLaws(I)) 887 return ReplaceInstUsesWith(I, V); 888 889 if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) { 890 // X + (signbit) --> X ^ signbit 891 const APInt &Val = CI->getValue(); 892 if (Val.isSignBit()) 893 return BinaryOperator::CreateXor(LHS, RHS); 894 895 // See if SimplifyDemandedBits can simplify this. This handles stuff like 896 // (X & 254)+1 -> (X&254)|1 897 if (SimplifyDemandedInstructionBits(I)) 898 return &I; 899 900 // zext(bool) + C -> bool ? C + 1 : C 901 if (ZExtInst *ZI = dyn_cast<ZExtInst>(LHS)) 902 if (ZI->getSrcTy()->isIntegerTy(1)) 903 return SelectInst::Create(ZI->getOperand(0), AddOne(CI), CI); 904 905 Value *XorLHS = 0; ConstantInt *XorRHS = 0; 906 if (match(LHS, m_Xor(m_Value(XorLHS), m_ConstantInt(XorRHS)))) { 907 uint32_t TySizeBits = I.getType()->getScalarSizeInBits(); 908 const APInt &RHSVal = CI->getValue(); 909 unsigned ExtendAmt = 0; 910 // If we have ADD(XOR(AND(X, 0xFF), 0x80), 0xF..F80), it's a sext. 911 // If we have ADD(XOR(AND(X, 0xFF), 0xF..F80), 0x80), it's a sext. 912 if (XorRHS->getValue() == -RHSVal) { 913 if (RHSVal.isPowerOf2()) 914 ExtendAmt = TySizeBits - RHSVal.logBase2() - 1; 915 else if (XorRHS->getValue().isPowerOf2()) 916 ExtendAmt = TySizeBits - XorRHS->getValue().logBase2() - 1; 917 } 918 919 if (ExtendAmt) { 920 APInt Mask = APInt::getHighBitsSet(TySizeBits, ExtendAmt); 921 if (!MaskedValueIsZero(XorLHS, Mask)) 922 ExtendAmt = 0; 923 } 924 925 if (ExtendAmt) { 926 Constant *ShAmt = ConstantInt::get(I.getType(), ExtendAmt); 927 Value *NewShl = Builder->CreateShl(XorLHS, ShAmt, "sext"); 928 return BinaryOperator::CreateAShr(NewShl, ShAmt); 929 } 930 931 // If this is a xor that was canonicalized from a sub, turn it back into 932 // a sub and fuse this add with it. 933 if (LHS->hasOneUse() && (XorRHS->getValue()+1).isPowerOf2()) { 934 IntegerType *IT = cast<IntegerType>(I.getType()); 935 APInt LHSKnownOne(IT->getBitWidth(), 0); 936 APInt LHSKnownZero(IT->getBitWidth(), 0); 937 ComputeMaskedBits(XorLHS, LHSKnownZero, LHSKnownOne); 938 if ((XorRHS->getValue() | LHSKnownZero).isAllOnesValue()) 939 return BinaryOperator::CreateSub(ConstantExpr::getAdd(XorRHS, CI), 940 XorLHS); 941 } 942 } 943 } 944 945 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 946 if (Instruction *NV = FoldOpIntoPhi(I)) 947 return NV; 948 949 if (I.getType()->isIntegerTy(1)) 950 return BinaryOperator::CreateXor(LHS, RHS); 951 952 // X + X --> X << 1 953 if (LHS == RHS) { 954 BinaryOperator *New = 955 BinaryOperator::CreateShl(LHS, ConstantInt::get(I.getType(), 1)); 956 New->setHasNoSignedWrap(I.hasNoSignedWrap()); 957 New->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 958 return New; 959 } 960 961 // -A + B --> B - A 962 // -A + -B --> -(A + B) 963 if (Value *LHSV = dyn_castNegVal(LHS)) { 964 if (!isa<Constant>(RHS)) 965 if (Value *RHSV = dyn_castNegVal(RHS)) { 966 Value *NewAdd = Builder->CreateAdd(LHSV, RHSV, "sum"); 967 return BinaryOperator::CreateNeg(NewAdd); 968 } 969 970 return BinaryOperator::CreateSub(RHS, LHSV); 971 } 972 973 // A + -B --> A - B 974 if (!isa<Constant>(RHS)) 975 if (Value *V = dyn_castNegVal(RHS)) 976 return BinaryOperator::CreateSub(LHS, V); 977 978 979 ConstantInt *C2; 980 if (Value *X = dyn_castFoldableMul(LHS, C2)) { 981 if (X == RHS) // X*C + X --> X * (C+1) 982 return BinaryOperator::CreateMul(RHS, AddOne(C2)); 983 984 // X*C1 + X*C2 --> X * (C1+C2) 985 ConstantInt *C1; 986 if (X == dyn_castFoldableMul(RHS, C1)) 987 return BinaryOperator::CreateMul(X, ConstantExpr::getAdd(C1, C2)); 988 } 989 990 // X + X*C --> X * (C+1) 991 if (dyn_castFoldableMul(RHS, C2) == LHS) 992 return BinaryOperator::CreateMul(LHS, AddOne(C2)); 993 994 // A+B --> A|B iff A and B have no bits set in common. 995 if (IntegerType *IT = dyn_cast<IntegerType>(I.getType())) { 996 APInt LHSKnownOne(IT->getBitWidth(), 0); 997 APInt LHSKnownZero(IT->getBitWidth(), 0); 998 ComputeMaskedBits(LHS, LHSKnownZero, LHSKnownOne); 999 if (LHSKnownZero != 0) { 1000 APInt RHSKnownOne(IT->getBitWidth(), 0); 1001 APInt RHSKnownZero(IT->getBitWidth(), 0); 1002 ComputeMaskedBits(RHS, RHSKnownZero, RHSKnownOne); 1003 1004 // No bits in common -> bitwise or. 1005 if ((LHSKnownZero|RHSKnownZero).isAllOnesValue()) 1006 return BinaryOperator::CreateOr(LHS, RHS); 1007 } 1008 } 1009 1010 // W*X + Y*Z --> W * (X+Z) iff W == Y 1011 { 1012 Value *W, *X, *Y, *Z; 1013 if (match(LHS, m_Mul(m_Value(W), m_Value(X))) && 1014 match(RHS, m_Mul(m_Value(Y), m_Value(Z)))) { 1015 if (W != Y) { 1016 if (W == Z) { 1017 std::swap(Y, Z); 1018 } else if (Y == X) { 1019 std::swap(W, X); 1020 } else if (X == Z) { 1021 std::swap(Y, Z); 1022 std::swap(W, X); 1023 } 1024 } 1025 1026 if (W == Y) { 1027 Value *NewAdd = Builder->CreateAdd(X, Z, LHS->getName()); 1028 return BinaryOperator::CreateMul(W, NewAdd); 1029 } 1030 } 1031 } 1032 1033 if (ConstantInt *CRHS = dyn_cast<ConstantInt>(RHS)) { 1034 Value *X = 0; 1035 if (match(LHS, m_Not(m_Value(X)))) // ~X + C --> (C-1) - X 1036 return BinaryOperator::CreateSub(SubOne(CRHS), X); 1037 1038 // (X & FF00) + xx00 -> (X+xx00) & FF00 1039 if (LHS->hasOneUse() && 1040 match(LHS, m_And(m_Value(X), m_ConstantInt(C2))) && 1041 CRHS->getValue() == (CRHS->getValue() & C2->getValue())) { 1042 // See if all bits from the first bit set in the Add RHS up are included 1043 // in the mask. First, get the rightmost bit. 1044 const APInt &AddRHSV = CRHS->getValue(); 1045 1046 // Form a mask of all bits from the lowest bit added through the top. 1047 APInt AddRHSHighBits(~((AddRHSV & -AddRHSV)-1)); 1048 1049 // See if the and mask includes all of these bits. 1050 APInt AddRHSHighBitsAnd(AddRHSHighBits & C2->getValue()); 1051 1052 if (AddRHSHighBits == AddRHSHighBitsAnd) { 1053 // Okay, the xform is safe. Insert the new add pronto. 1054 Value *NewAdd = Builder->CreateAdd(X, CRHS, LHS->getName()); 1055 return BinaryOperator::CreateAnd(NewAdd, C2); 1056 } 1057 } 1058 1059 // Try to fold constant add into select arguments. 1060 if (SelectInst *SI = dyn_cast<SelectInst>(LHS)) 1061 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1062 return R; 1063 } 1064 1065 // add (select X 0 (sub n A)) A --> select X A n 1066 { 1067 SelectInst *SI = dyn_cast<SelectInst>(LHS); 1068 Value *A = RHS; 1069 if (!SI) { 1070 SI = dyn_cast<SelectInst>(RHS); 1071 A = LHS; 1072 } 1073 if (SI && SI->hasOneUse()) { 1074 Value *TV = SI->getTrueValue(); 1075 Value *FV = SI->getFalseValue(); 1076 Value *N; 1077 1078 // Can we fold the add into the argument of the select? 1079 // We check both true and false select arguments for a matching subtract. 1080 if (match(FV, m_Zero()) && match(TV, m_Sub(m_Value(N), m_Specific(A)))) 1081 // Fold the add into the true select value. 1082 return SelectInst::Create(SI->getCondition(), N, A); 1083 1084 if (match(TV, m_Zero()) && match(FV, m_Sub(m_Value(N), m_Specific(A)))) 1085 // Fold the add into the false select value. 1086 return SelectInst::Create(SI->getCondition(), A, N); 1087 } 1088 } 1089 1090 // Check for (add (sext x), y), see if we can merge this into an 1091 // integer add followed by a sext. 1092 if (SExtInst *LHSConv = dyn_cast<SExtInst>(LHS)) { 1093 // (add (sext x), cst) --> (sext (add x, cst')) 1094 if (ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS)) { 1095 Constant *CI = 1096 ConstantExpr::getTrunc(RHSC, LHSConv->getOperand(0)->getType()); 1097 if (LHSConv->hasOneUse() && 1098 ConstantExpr::getSExt(CI, I.getType()) == RHSC && 1099 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 1100 // Insert the new, smaller add. 1101 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1102 CI, "addconv"); 1103 return new SExtInst(NewAdd, I.getType()); 1104 } 1105 } 1106 1107 // (add (sext x), (sext y)) --> (sext (add int x, y)) 1108 if (SExtInst *RHSConv = dyn_cast<SExtInst>(RHS)) { 1109 // Only do this if x/y have the same type, if at last one of them has a 1110 // single use (so we don't increase the number of sexts), and if the 1111 // integer add will not overflow. 1112 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 1113 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 1114 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 1115 RHSConv->getOperand(0))) { 1116 // Insert the new integer add. 1117 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1118 RHSConv->getOperand(0), "addconv"); 1119 return new SExtInst(NewAdd, I.getType()); 1120 } 1121 } 1122 } 1123 1124 // Check for (x & y) + (x ^ y) 1125 { 1126 Value *A = 0, *B = 0; 1127 if (match(RHS, m_Xor(m_Value(A), m_Value(B))) && 1128 (match(LHS, m_And(m_Specific(A), m_Specific(B))) || 1129 match(LHS, m_And(m_Specific(B), m_Specific(A))))) 1130 return BinaryOperator::CreateOr(A, B); 1131 1132 if (match(LHS, m_Xor(m_Value(A), m_Value(B))) && 1133 (match(RHS, m_And(m_Specific(A), m_Specific(B))) || 1134 match(RHS, m_And(m_Specific(B), m_Specific(A))))) 1135 return BinaryOperator::CreateOr(A, B); 1136 } 1137 1138 return Changed ? &I : 0; 1139 } 1140 1141 Instruction *InstCombiner::visitFAdd(BinaryOperator &I) { 1142 bool Changed = SimplifyAssociativeOrCommutative(I); 1143 Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); 1144 1145 if (Value *V = SimplifyFAddInst(LHS, RHS, I.getFastMathFlags(), TD)) 1146 return ReplaceInstUsesWith(I, V); 1147 1148 if (isa<Constant>(RHS) && isa<PHINode>(LHS)) 1149 if (Instruction *NV = FoldOpIntoPhi(I)) 1150 return NV; 1151 1152 // -A + B --> B - A 1153 // -A + -B --> -(A + B) 1154 if (Value *LHSV = dyn_castFNegVal(LHS)) 1155 return BinaryOperator::CreateFSub(RHS, LHSV); 1156 1157 // A + -B --> A - B 1158 if (!isa<Constant>(RHS)) 1159 if (Value *V = dyn_castFNegVal(RHS)) 1160 return BinaryOperator::CreateFSub(LHS, V); 1161 1162 // Check for (fadd double (sitofp x), y), see if we can merge this into an 1163 // integer add followed by a promotion. 1164 if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) { 1165 // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst)) 1166 // ... if the constant fits in the integer value. This is useful for things 1167 // like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer 1168 // requires a constant pool load, and generally allows the add to be better 1169 // instcombined. 1170 if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHS)) { 1171 Constant *CI = 1172 ConstantExpr::getFPToSI(CFP, LHSConv->getOperand(0)->getType()); 1173 if (LHSConv->hasOneUse() && 1174 ConstantExpr::getSIToFP(CI, I.getType()) == CFP && 1175 WillNotOverflowSignedAdd(LHSConv->getOperand(0), CI)) { 1176 // Insert the new integer add. 1177 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1178 CI, "addconv"); 1179 return new SIToFPInst(NewAdd, I.getType()); 1180 } 1181 } 1182 1183 // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y)) 1184 if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) { 1185 // Only do this if x/y have the same type, if at last one of them has a 1186 // single use (so we don't increase the number of int->fp conversions), 1187 // and if the integer add will not overflow. 1188 if (LHSConv->getOperand(0)->getType()==RHSConv->getOperand(0)->getType()&& 1189 (LHSConv->hasOneUse() || RHSConv->hasOneUse()) && 1190 WillNotOverflowSignedAdd(LHSConv->getOperand(0), 1191 RHSConv->getOperand(0))) { 1192 // Insert the new integer add. 1193 Value *NewAdd = Builder->CreateNSWAdd(LHSConv->getOperand(0), 1194 RHSConv->getOperand(0),"addconv"); 1195 return new SIToFPInst(NewAdd, I.getType()); 1196 } 1197 } 1198 } 1199 1200 if (I.hasUnsafeAlgebra()) { 1201 if (Value *V = FAddCombine(Builder).simplify(&I)) 1202 return ReplaceInstUsesWith(I, V); 1203 } 1204 1205 return Changed ? &I : 0; 1206 } 1207 1208 1209 /// Optimize pointer differences into the same array into a size. Consider: 1210 /// &A[10] - &A[0]: we should compile this to "10". LHS/RHS are the pointer 1211 /// operands to the ptrtoint instructions for the LHS/RHS of the subtract. 1212 /// 1213 Value *InstCombiner::OptimizePointerDifference(Value *LHS, Value *RHS, 1214 Type *Ty) { 1215 assert(TD && "Must have target data info for this"); 1216 1217 // If LHS is a gep based on RHS or RHS is a gep based on LHS, we can optimize 1218 // this. 1219 bool Swapped = false; 1220 GEPOperator *GEP1 = 0, *GEP2 = 0; 1221 1222 // For now we require one side to be the base pointer "A" or a constant 1223 // GEP derived from it. 1224 if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 1225 // (gep X, ...) - X 1226 if (LHSGEP->getOperand(0) == RHS) { 1227 GEP1 = LHSGEP; 1228 Swapped = false; 1229 } else if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 1230 // (gep X, ...) - (gep X, ...) 1231 if (LHSGEP->getOperand(0)->stripPointerCasts() == 1232 RHSGEP->getOperand(0)->stripPointerCasts()) { 1233 GEP2 = RHSGEP; 1234 GEP1 = LHSGEP; 1235 Swapped = false; 1236 } 1237 } 1238 } 1239 1240 if (GEPOperator *RHSGEP = dyn_cast<GEPOperator>(RHS)) { 1241 // X - (gep X, ...) 1242 if (RHSGEP->getOperand(0) == LHS) { 1243 GEP1 = RHSGEP; 1244 Swapped = true; 1245 } else if (GEPOperator *LHSGEP = dyn_cast<GEPOperator>(LHS)) { 1246 // (gep X, ...) - (gep X, ...) 1247 if (RHSGEP->getOperand(0)->stripPointerCasts() == 1248 LHSGEP->getOperand(0)->stripPointerCasts()) { 1249 GEP2 = LHSGEP; 1250 GEP1 = RHSGEP; 1251 Swapped = true; 1252 } 1253 } 1254 } 1255 1256 // Avoid duplicating the arithmetic if GEP2 has non-constant indices and 1257 // multiple users. 1258 if (GEP1 == 0 || 1259 (GEP2 != 0 && !GEP2->hasAllConstantIndices() && !GEP2->hasOneUse())) 1260 return 0; 1261 1262 // Emit the offset of the GEP and an intptr_t. 1263 Value *Result = EmitGEPOffset(GEP1); 1264 1265 // If we had a constant expression GEP on the other side offsetting the 1266 // pointer, subtract it from the offset we have. 1267 if (GEP2) { 1268 Value *Offset = EmitGEPOffset(GEP2); 1269 Result = Builder->CreateSub(Result, Offset); 1270 } 1271 1272 // If we have p - gep(p, ...) then we have to negate the result. 1273 if (Swapped) 1274 Result = Builder->CreateNeg(Result, "diff.neg"); 1275 1276 return Builder->CreateIntCast(Result, Ty, true); 1277 } 1278 1279 1280 Instruction *InstCombiner::visitSub(BinaryOperator &I) { 1281 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1282 1283 if (Value *V = SimplifySubInst(Op0, Op1, I.hasNoSignedWrap(), 1284 I.hasNoUnsignedWrap(), TD)) 1285 return ReplaceInstUsesWith(I, V); 1286 1287 // (A*B)-(A*C) -> A*(B-C) etc 1288 if (Value *V = SimplifyUsingDistributiveLaws(I)) 1289 return ReplaceInstUsesWith(I, V); 1290 1291 // If this is a 'B = x-(-A)', change to B = x+A. This preserves NSW/NUW. 1292 if (Value *V = dyn_castNegVal(Op1)) { 1293 BinaryOperator *Res = BinaryOperator::CreateAdd(Op0, V); 1294 Res->setHasNoSignedWrap(I.hasNoSignedWrap()); 1295 Res->setHasNoUnsignedWrap(I.hasNoUnsignedWrap()); 1296 return Res; 1297 } 1298 1299 if (I.getType()->isIntegerTy(1)) 1300 return BinaryOperator::CreateXor(Op0, Op1); 1301 1302 // Replace (-1 - A) with (~A). 1303 if (match(Op0, m_AllOnes())) 1304 return BinaryOperator::CreateNot(Op1); 1305 1306 if (ConstantInt *C = dyn_cast<ConstantInt>(Op0)) { 1307 // C - ~X == X + (1+C) 1308 Value *X = 0; 1309 if (match(Op1, m_Not(m_Value(X)))) 1310 return BinaryOperator::CreateAdd(X, AddOne(C)); 1311 1312 // -(X >>u 31) -> (X >>s 31) 1313 // -(X >>s 31) -> (X >>u 31) 1314 if (C->isZero()) { 1315 Value *X; ConstantInt *CI; 1316 if (match(Op1, m_LShr(m_Value(X), m_ConstantInt(CI))) && 1317 // Verify we are shifting out everything but the sign bit. 1318 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 1319 return BinaryOperator::CreateAShr(X, CI); 1320 1321 if (match(Op1, m_AShr(m_Value(X), m_ConstantInt(CI))) && 1322 // Verify we are shifting out everything but the sign bit. 1323 CI->getValue() == I.getType()->getPrimitiveSizeInBits()-1) 1324 return BinaryOperator::CreateLShr(X, CI); 1325 } 1326 1327 // Try to fold constant sub into select arguments. 1328 if (SelectInst *SI = dyn_cast<SelectInst>(Op1)) 1329 if (Instruction *R = FoldOpIntoSelect(I, SI)) 1330 return R; 1331 1332 // C-(X+C2) --> (C-C2)-X 1333 ConstantInt *C2; 1334 if (match(Op1, m_Add(m_Value(X), m_ConstantInt(C2)))) 1335 return BinaryOperator::CreateSub(ConstantExpr::getSub(C, C2), X); 1336 1337 if (SimplifyDemandedInstructionBits(I)) 1338 return &I; 1339 1340 // Fold (sub 0, (zext bool to B)) --> (sext bool to B) 1341 if (C->isZero() && match(Op1, m_ZExt(m_Value(X)))) 1342 if (X->getType()->isIntegerTy(1)) 1343 return CastInst::CreateSExtOrBitCast(X, Op1->getType()); 1344 1345 // Fold (sub 0, (sext bool to B)) --> (zext bool to B) 1346 if (C->isZero() && match(Op1, m_SExt(m_Value(X)))) 1347 if (X->getType()->isIntegerTy(1)) 1348 return CastInst::CreateZExtOrBitCast(X, Op1->getType()); 1349 } 1350 1351 1352 { Value *Y; 1353 // X-(X+Y) == -Y X-(Y+X) == -Y 1354 if (match(Op1, m_Add(m_Specific(Op0), m_Value(Y))) || 1355 match(Op1, m_Add(m_Value(Y), m_Specific(Op0)))) 1356 return BinaryOperator::CreateNeg(Y); 1357 1358 // (X-Y)-X == -Y 1359 if (match(Op0, m_Sub(m_Specific(Op1), m_Value(Y)))) 1360 return BinaryOperator::CreateNeg(Y); 1361 } 1362 1363 if (Op1->hasOneUse()) { 1364 Value *X = 0, *Y = 0, *Z = 0; 1365 Constant *C = 0; 1366 ConstantInt *CI = 0; 1367 1368 // (X - (Y - Z)) --> (X + (Z - Y)). 1369 if (match(Op1, m_Sub(m_Value(Y), m_Value(Z)))) 1370 return BinaryOperator::CreateAdd(Op0, 1371 Builder->CreateSub(Z, Y, Op1->getName())); 1372 1373 // (X - (X & Y)) --> (X & ~Y) 1374 // 1375 if (match(Op1, m_And(m_Value(Y), m_Specific(Op0))) || 1376 match(Op1, m_And(m_Specific(Op0), m_Value(Y)))) 1377 return BinaryOperator::CreateAnd(Op0, 1378 Builder->CreateNot(Y, Y->getName() + ".not")); 1379 1380 // 0 - (X sdiv C) -> (X sdiv -C) 1381 if (match(Op1, m_SDiv(m_Value(X), m_Constant(C))) && 1382 match(Op0, m_Zero())) 1383 return BinaryOperator::CreateSDiv(X, ConstantExpr::getNeg(C)); 1384 1385 // 0 - (X << Y) -> (-X << Y) when X is freely negatable. 1386 if (match(Op1, m_Shl(m_Value(X), m_Value(Y))) && match(Op0, m_Zero())) 1387 if (Value *XNeg = dyn_castNegVal(X)) 1388 return BinaryOperator::CreateShl(XNeg, Y); 1389 1390 // X - X*C --> X * (1-C) 1391 if (match(Op1, m_Mul(m_Specific(Op0), m_ConstantInt(CI)))) { 1392 Constant *CP1 = ConstantExpr::getSub(ConstantInt::get(I.getType(),1), CI); 1393 return BinaryOperator::CreateMul(Op0, CP1); 1394 } 1395 1396 // X - X<<C --> X * (1-(1<<C)) 1397 if (match(Op1, m_Shl(m_Specific(Op0), m_ConstantInt(CI)))) { 1398 Constant *One = ConstantInt::get(I.getType(), 1); 1399 C = ConstantExpr::getSub(One, ConstantExpr::getShl(One, CI)); 1400 return BinaryOperator::CreateMul(Op0, C); 1401 } 1402 1403 // X - A*-B -> X + A*B 1404 // X - -A*B -> X + A*B 1405 Value *A, *B; 1406 if (match(Op1, m_Mul(m_Value(A), m_Neg(m_Value(B)))) || 1407 match(Op1, m_Mul(m_Neg(m_Value(A)), m_Value(B)))) 1408 return BinaryOperator::CreateAdd(Op0, Builder->CreateMul(A, B)); 1409 1410 // X - A*CI -> X + A*-CI 1411 // X - CI*A -> X + A*-CI 1412 if (match(Op1, m_Mul(m_Value(A), m_ConstantInt(CI))) || 1413 match(Op1, m_Mul(m_ConstantInt(CI), m_Value(A)))) { 1414 Value *NewMul = Builder->CreateMul(A, ConstantExpr::getNeg(CI)); 1415 return BinaryOperator::CreateAdd(Op0, NewMul); 1416 } 1417 } 1418 1419 ConstantInt *C1; 1420 if (Value *X = dyn_castFoldableMul(Op0, C1)) { 1421 if (X == Op1) // X*C - X --> X * (C-1) 1422 return BinaryOperator::CreateMul(Op1, SubOne(C1)); 1423 1424 ConstantInt *C2; // X*C1 - X*C2 -> X * (C1-C2) 1425 if (X == dyn_castFoldableMul(Op1, C2)) 1426 return BinaryOperator::CreateMul(X, ConstantExpr::getSub(C1, C2)); 1427 } 1428 1429 // Optimize pointer differences into the same array into a size. Consider: 1430 // &A[10] - &A[0]: we should compile this to "10". 1431 if (TD) { 1432 Value *LHSOp, *RHSOp; 1433 if (match(Op0, m_PtrToInt(m_Value(LHSOp))) && 1434 match(Op1, m_PtrToInt(m_Value(RHSOp)))) 1435 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1436 return ReplaceInstUsesWith(I, Res); 1437 1438 // trunc(p)-trunc(q) -> trunc(p-q) 1439 if (match(Op0, m_Trunc(m_PtrToInt(m_Value(LHSOp)))) && 1440 match(Op1, m_Trunc(m_PtrToInt(m_Value(RHSOp))))) 1441 if (Value *Res = OptimizePointerDifference(LHSOp, RHSOp, I.getType())) 1442 return ReplaceInstUsesWith(I, Res); 1443 } 1444 1445 return 0; 1446 } 1447 1448 Instruction *InstCombiner::visitFSub(BinaryOperator &I) { 1449 Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1); 1450 1451 if (Value *V = SimplifyFSubInst(Op0, Op1, I.getFastMathFlags(), TD)) 1452 return ReplaceInstUsesWith(I, V); 1453 1454 // If this is a 'B = x-(-A)', change to B = x+A... 1455 if (Value *V = dyn_castFNegVal(Op1)) 1456 return BinaryOperator::CreateFAdd(Op0, V); 1457 1458 if (I.hasUnsafeAlgebra()) { 1459 if (Value *V = FAddCombine(Builder).simplify(&I)) 1460 return ReplaceInstUsesWith(I, V); 1461 } 1462 1463 return 0; 1464 } 1465