1 //===- InstCombineVectorOps.cpp -------------------------------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements instcombine for ExtractElement, InsertElement and 11 // ShuffleVector. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "InstCombineInternal.h" 16 #include "llvm/ADT/DenseMap.h" 17 #include "llvm/Analysis/InstructionSimplify.h" 18 #include "llvm/Analysis/VectorUtils.h" 19 #include "llvm/IR/PatternMatch.h" 20 using namespace llvm; 21 using namespace PatternMatch; 22 23 #define DEBUG_TYPE "instcombine" 24 25 /// Return true if the value is cheaper to scalarize than it is to leave as a 26 /// vector operation. isConstant indicates whether we're extracting one known 27 /// element. If false we're extracting a variable index. 28 static bool cheapToScalarize(Value *V, bool isConstant) { 29 if (Constant *C = dyn_cast<Constant>(V)) { 30 if (isConstant) return true; 31 32 // If all elts are the same, we can extract it and use any of the values. 33 if (Constant *Op0 = C->getAggregateElement(0U)) { 34 for (unsigned i = 1, e = V->getType()->getVectorNumElements(); i != e; 35 ++i) 36 if (C->getAggregateElement(i) != Op0) 37 return false; 38 return true; 39 } 40 } 41 Instruction *I = dyn_cast<Instruction>(V); 42 if (!I) return false; 43 44 // Insert element gets simplified to the inserted element or is deleted if 45 // this is constant idx extract element and its a constant idx insertelt. 46 if (I->getOpcode() == Instruction::InsertElement && isConstant && 47 isa<ConstantInt>(I->getOperand(2))) 48 return true; 49 if (I->getOpcode() == Instruction::Load && I->hasOneUse()) 50 return true; 51 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) 52 if (BO->hasOneUse() && 53 (cheapToScalarize(BO->getOperand(0), isConstant) || 54 cheapToScalarize(BO->getOperand(1), isConstant))) 55 return true; 56 if (CmpInst *CI = dyn_cast<CmpInst>(I)) 57 if (CI->hasOneUse() && 58 (cheapToScalarize(CI->getOperand(0), isConstant) || 59 cheapToScalarize(CI->getOperand(1), isConstant))) 60 return true; 61 62 return false; 63 } 64 65 // If we have a PHI node with a vector type that has only 2 uses: feed 66 // itself and be an operand of extractelement at a constant location, 67 // try to replace the PHI of the vector type with a PHI of a scalar type. 68 Instruction *InstCombiner::scalarizePHI(ExtractElementInst &EI, PHINode *PN) { 69 // Verify that the PHI node has exactly 2 uses. Otherwise return NULL. 70 if (!PN->hasNUses(2)) 71 return nullptr; 72 73 // If so, it's known at this point that one operand is PHI and the other is 74 // an extractelement node. Find the PHI user that is not the extractelement 75 // node. 76 auto iu = PN->user_begin(); 77 Instruction *PHIUser = dyn_cast<Instruction>(*iu); 78 if (PHIUser == cast<Instruction>(&EI)) 79 PHIUser = cast<Instruction>(*(++iu)); 80 81 // Verify that this PHI user has one use, which is the PHI itself, 82 // and that it is a binary operation which is cheap to scalarize. 83 // otherwise return NULL. 84 if (!PHIUser->hasOneUse() || !(PHIUser->user_back() == PN) || 85 !(isa<BinaryOperator>(PHIUser)) || !cheapToScalarize(PHIUser, true)) 86 return nullptr; 87 88 // Create a scalar PHI node that will replace the vector PHI node 89 // just before the current PHI node. 90 PHINode *scalarPHI = cast<PHINode>(InsertNewInstWith( 91 PHINode::Create(EI.getType(), PN->getNumIncomingValues(), ""), *PN)); 92 // Scalarize each PHI operand. 93 for (unsigned i = 0; i < PN->getNumIncomingValues(); i++) { 94 Value *PHIInVal = PN->getIncomingValue(i); 95 BasicBlock *inBB = PN->getIncomingBlock(i); 96 Value *Elt = EI.getIndexOperand(); 97 // If the operand is the PHI induction variable: 98 if (PHIInVal == PHIUser) { 99 // Scalarize the binary operation. Its first operand is the 100 // scalar PHI, and the second operand is extracted from the other 101 // vector operand. 102 BinaryOperator *B0 = cast<BinaryOperator>(PHIUser); 103 unsigned opId = (B0->getOperand(0) == PN) ? 1 : 0; 104 Value *Op = InsertNewInstWith( 105 ExtractElementInst::Create(B0->getOperand(opId), Elt, 106 B0->getOperand(opId)->getName() + ".Elt"), 107 *B0); 108 Value *newPHIUser = InsertNewInstWith( 109 BinaryOperator::Create(B0->getOpcode(), scalarPHI, Op), *B0); 110 scalarPHI->addIncoming(newPHIUser, inBB); 111 } else { 112 // Scalarize PHI input: 113 Instruction *newEI = ExtractElementInst::Create(PHIInVal, Elt, ""); 114 // Insert the new instruction into the predecessor basic block. 115 Instruction *pos = dyn_cast<Instruction>(PHIInVal); 116 BasicBlock::iterator InsertPos; 117 if (pos && !isa<PHINode>(pos)) { 118 InsertPos = ++pos->getIterator(); 119 } else { 120 InsertPos = inBB->getFirstInsertionPt(); 121 } 122 123 InsertNewInstWith(newEI, *InsertPos); 124 125 scalarPHI->addIncoming(newEI, inBB); 126 } 127 } 128 return ReplaceInstUsesWith(EI, scalarPHI); 129 } 130 131 Instruction *InstCombiner::visitExtractElementInst(ExtractElementInst &EI) { 132 if (Value *V = SimplifyExtractElementInst( 133 EI.getVectorOperand(), EI.getIndexOperand(), DL, TLI, DT, AC)) 134 return ReplaceInstUsesWith(EI, V); 135 136 // If vector val is constant with all elements the same, replace EI with 137 // that element. We handle a known element # below. 138 if (Constant *C = dyn_cast<Constant>(EI.getOperand(0))) 139 if (cheapToScalarize(C, false)) 140 return ReplaceInstUsesWith(EI, C->getAggregateElement(0U)); 141 142 // If extracting a specified index from the vector, see if we can recursively 143 // find a previously computed scalar that was inserted into the vector. 144 if (ConstantInt *IdxC = dyn_cast<ConstantInt>(EI.getOperand(1))) { 145 unsigned IndexVal = IdxC->getZExtValue(); 146 unsigned VectorWidth = EI.getVectorOperandType()->getNumElements(); 147 148 // InstSimplify handles cases where the index is invalid. 149 assert(IndexVal < VectorWidth); 150 151 // This instruction only demands the single element from the input vector. 152 // If the input vector has a single use, simplify it based on this use 153 // property. 154 if (EI.getOperand(0)->hasOneUse() && VectorWidth != 1) { 155 APInt UndefElts(VectorWidth, 0); 156 APInt DemandedMask(VectorWidth, 0); 157 DemandedMask.setBit(IndexVal); 158 if (Value *V = SimplifyDemandedVectorElts(EI.getOperand(0), DemandedMask, 159 UndefElts)) { 160 EI.setOperand(0, V); 161 return &EI; 162 } 163 } 164 165 // If this extractelement is directly using a bitcast from a vector of 166 // the same number of elements, see if we can find the source element from 167 // it. In this case, we will end up needing to bitcast the scalars. 168 if (BitCastInst *BCI = dyn_cast<BitCastInst>(EI.getOperand(0))) { 169 if (VectorType *VT = dyn_cast<VectorType>(BCI->getOperand(0)->getType())) 170 if (VT->getNumElements() == VectorWidth) 171 if (Value *Elt = findScalarElement(BCI->getOperand(0), IndexVal)) 172 return new BitCastInst(Elt, EI.getType()); 173 } 174 175 // If there's a vector PHI feeding a scalar use through this extractelement 176 // instruction, try to scalarize the PHI. 177 if (PHINode *PN = dyn_cast<PHINode>(EI.getOperand(0))) { 178 Instruction *scalarPHI = scalarizePHI(EI, PN); 179 if (scalarPHI) 180 return scalarPHI; 181 } 182 } 183 184 if (Instruction *I = dyn_cast<Instruction>(EI.getOperand(0))) { 185 // Push extractelement into predecessor operation if legal and 186 // profitable to do so. 187 if (BinaryOperator *BO = dyn_cast<BinaryOperator>(I)) { 188 if (I->hasOneUse() && 189 cheapToScalarize(BO, isa<ConstantInt>(EI.getOperand(1)))) { 190 Value *newEI0 = 191 Builder->CreateExtractElement(BO->getOperand(0), EI.getOperand(1), 192 EI.getName()+".lhs"); 193 Value *newEI1 = 194 Builder->CreateExtractElement(BO->getOperand(1), EI.getOperand(1), 195 EI.getName()+".rhs"); 196 return BinaryOperator::Create(BO->getOpcode(), newEI0, newEI1); 197 } 198 } else if (InsertElementInst *IE = dyn_cast<InsertElementInst>(I)) { 199 // Extracting the inserted element? 200 if (IE->getOperand(2) == EI.getOperand(1)) 201 return ReplaceInstUsesWith(EI, IE->getOperand(1)); 202 // If the inserted and extracted elements are constants, they must not 203 // be the same value, extract from the pre-inserted value instead. 204 if (isa<Constant>(IE->getOperand(2)) && isa<Constant>(EI.getOperand(1))) { 205 Worklist.AddValue(EI.getOperand(0)); 206 EI.setOperand(0, IE->getOperand(0)); 207 return &EI; 208 } 209 } else if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(I)) { 210 // If this is extracting an element from a shufflevector, figure out where 211 // it came from and extract from the appropriate input element instead. 212 if (ConstantInt *Elt = dyn_cast<ConstantInt>(EI.getOperand(1))) { 213 int SrcIdx = SVI->getMaskValue(Elt->getZExtValue()); 214 Value *Src; 215 unsigned LHSWidth = 216 SVI->getOperand(0)->getType()->getVectorNumElements(); 217 218 if (SrcIdx < 0) 219 return ReplaceInstUsesWith(EI, UndefValue::get(EI.getType())); 220 if (SrcIdx < (int)LHSWidth) 221 Src = SVI->getOperand(0); 222 else { 223 SrcIdx -= LHSWidth; 224 Src = SVI->getOperand(1); 225 } 226 Type *Int32Ty = Type::getInt32Ty(EI.getContext()); 227 return ExtractElementInst::Create(Src, 228 ConstantInt::get(Int32Ty, 229 SrcIdx, false)); 230 } 231 } else if (CastInst *CI = dyn_cast<CastInst>(I)) { 232 // Canonicalize extractelement(cast) -> cast(extractelement). 233 // Bitcasts can change the number of vector elements, and they cost 234 // nothing. 235 if (CI->hasOneUse() && (CI->getOpcode() != Instruction::BitCast)) { 236 Value *EE = Builder->CreateExtractElement(CI->getOperand(0), 237 EI.getIndexOperand()); 238 Worklist.AddValue(EE); 239 return CastInst::Create(CI->getOpcode(), EE, EI.getType()); 240 } 241 } else if (SelectInst *SI = dyn_cast<SelectInst>(I)) { 242 if (SI->hasOneUse()) { 243 // TODO: For a select on vectors, it might be useful to do this if it 244 // has multiple extractelement uses. For vector select, that seems to 245 // fight the vectorizer. 246 247 // If we are extracting an element from a vector select or a select on 248 // vectors, create a select on the scalars extracted from the vector 249 // arguments. 250 Value *TrueVal = SI->getTrueValue(); 251 Value *FalseVal = SI->getFalseValue(); 252 253 Value *Cond = SI->getCondition(); 254 if (Cond->getType()->isVectorTy()) { 255 Cond = Builder->CreateExtractElement(Cond, 256 EI.getIndexOperand(), 257 Cond->getName() + ".elt"); 258 } 259 260 Value *V1Elem 261 = Builder->CreateExtractElement(TrueVal, 262 EI.getIndexOperand(), 263 TrueVal->getName() + ".elt"); 264 265 Value *V2Elem 266 = Builder->CreateExtractElement(FalseVal, 267 EI.getIndexOperand(), 268 FalseVal->getName() + ".elt"); 269 return SelectInst::Create(Cond, 270 V1Elem, 271 V2Elem, 272 SI->getName() + ".elt"); 273 } 274 } 275 } 276 return nullptr; 277 } 278 279 /// If V is a shuffle of values that ONLY returns elements from either LHS or 280 /// RHS, return the shuffle mask and true. Otherwise, return false. 281 static bool collectSingleShuffleElements(Value *V, Value *LHS, Value *RHS, 282 SmallVectorImpl<Constant*> &Mask) { 283 assert(LHS->getType() == RHS->getType() && 284 "Invalid CollectSingleShuffleElements"); 285 unsigned NumElts = V->getType()->getVectorNumElements(); 286 287 if (isa<UndefValue>(V)) { 288 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 289 return true; 290 } 291 292 if (V == LHS) { 293 for (unsigned i = 0; i != NumElts; ++i) 294 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 295 return true; 296 } 297 298 if (V == RHS) { 299 for (unsigned i = 0; i != NumElts; ++i) 300 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), 301 i+NumElts)); 302 return true; 303 } 304 305 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 306 // If this is an insert of an extract from some other vector, include it. 307 Value *VecOp = IEI->getOperand(0); 308 Value *ScalarOp = IEI->getOperand(1); 309 Value *IdxOp = IEI->getOperand(2); 310 311 if (!isa<ConstantInt>(IdxOp)) 312 return false; 313 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 314 315 if (isa<UndefValue>(ScalarOp)) { // inserting undef into vector. 316 // We can handle this if the vector we are inserting into is 317 // transitively ok. 318 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 319 // If so, update the mask to reflect the inserted undef. 320 Mask[InsertedIdx] = UndefValue::get(Type::getInt32Ty(V->getContext())); 321 return true; 322 } 323 } else if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)){ 324 if (isa<ConstantInt>(EI->getOperand(1))) { 325 unsigned ExtractedIdx = 326 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 327 unsigned NumLHSElts = LHS->getType()->getVectorNumElements(); 328 329 // This must be extracting from either LHS or RHS. 330 if (EI->getOperand(0) == LHS || EI->getOperand(0) == RHS) { 331 // We can handle this if the vector we are inserting into is 332 // transitively ok. 333 if (collectSingleShuffleElements(VecOp, LHS, RHS, Mask)) { 334 // If so, update the mask to reflect the inserted value. 335 if (EI->getOperand(0) == LHS) { 336 Mask[InsertedIdx % NumElts] = 337 ConstantInt::get(Type::getInt32Ty(V->getContext()), 338 ExtractedIdx); 339 } else { 340 assert(EI->getOperand(0) == RHS); 341 Mask[InsertedIdx % NumElts] = 342 ConstantInt::get(Type::getInt32Ty(V->getContext()), 343 ExtractedIdx + NumLHSElts); 344 } 345 return true; 346 } 347 } 348 } 349 } 350 } 351 352 return false; 353 } 354 355 356 /// We are building a shuffle to create V, which is a sequence of insertelement, 357 /// extractelement pairs. If PermittedRHS is set, then we must either use it or 358 /// not rely on the second vector source. Return a std::pair containing the 359 /// left and right vectors of the proposed shuffle (or 0), and set the Mask 360 /// parameter as required. 361 /// 362 /// Note: we intentionally don't try to fold earlier shuffles since they have 363 /// often been chosen carefully to be efficiently implementable on the target. 364 typedef std::pair<Value *, Value *> ShuffleOps; 365 366 static ShuffleOps collectShuffleElements(Value *V, 367 SmallVectorImpl<Constant *> &Mask, 368 Value *PermittedRHS) { 369 assert(V->getType()->isVectorTy() && "Invalid shuffle!"); 370 unsigned NumElts = cast<VectorType>(V->getType())->getNumElements(); 371 372 if (isa<UndefValue>(V)) { 373 Mask.assign(NumElts, UndefValue::get(Type::getInt32Ty(V->getContext()))); 374 return std::make_pair( 375 PermittedRHS ? UndefValue::get(PermittedRHS->getType()) : V, nullptr); 376 } 377 378 if (isa<ConstantAggregateZero>(V)) { 379 Mask.assign(NumElts, ConstantInt::get(Type::getInt32Ty(V->getContext()),0)); 380 return std::make_pair(V, nullptr); 381 } 382 383 if (InsertElementInst *IEI = dyn_cast<InsertElementInst>(V)) { 384 // If this is an insert of an extract from some other vector, include it. 385 Value *VecOp = IEI->getOperand(0); 386 Value *ScalarOp = IEI->getOperand(1); 387 Value *IdxOp = IEI->getOperand(2); 388 389 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 390 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 391 unsigned ExtractedIdx = 392 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 393 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 394 395 // Either the extracted from or inserted into vector must be RHSVec, 396 // otherwise we'd end up with a shuffle of three inputs. 397 if (EI->getOperand(0) == PermittedRHS || PermittedRHS == nullptr) { 398 Value *RHS = EI->getOperand(0); 399 ShuffleOps LR = collectShuffleElements(VecOp, Mask, RHS); 400 assert(LR.second == nullptr || LR.second == RHS); 401 402 if (LR.first->getType() != RHS->getType()) { 403 // We tried our best, but we can't find anything compatible with RHS 404 // further up the chain. Return a trivial shuffle. 405 for (unsigned i = 0; i < NumElts; ++i) 406 Mask[i] = ConstantInt::get(Type::getInt32Ty(V->getContext()), i); 407 return std::make_pair(V, nullptr); 408 } 409 410 unsigned NumLHSElts = RHS->getType()->getVectorNumElements(); 411 Mask[InsertedIdx % NumElts] = 412 ConstantInt::get(Type::getInt32Ty(V->getContext()), 413 NumLHSElts+ExtractedIdx); 414 return std::make_pair(LR.first, RHS); 415 } 416 417 if (VecOp == PermittedRHS) { 418 // We've gone as far as we can: anything on the other side of the 419 // extractelement will already have been converted into a shuffle. 420 unsigned NumLHSElts = 421 EI->getOperand(0)->getType()->getVectorNumElements(); 422 for (unsigned i = 0; i != NumElts; ++i) 423 Mask.push_back(ConstantInt::get( 424 Type::getInt32Ty(V->getContext()), 425 i == InsertedIdx ? ExtractedIdx : NumLHSElts + i)); 426 return std::make_pair(EI->getOperand(0), PermittedRHS); 427 } 428 429 // If this insertelement is a chain that comes from exactly these two 430 // vectors, return the vector and the effective shuffle. 431 if (EI->getOperand(0)->getType() == PermittedRHS->getType() && 432 collectSingleShuffleElements(IEI, EI->getOperand(0), PermittedRHS, 433 Mask)) 434 return std::make_pair(EI->getOperand(0), PermittedRHS); 435 } 436 } 437 } 438 439 // Otherwise, we can't do anything fancy. Return an identity vector. 440 for (unsigned i = 0; i != NumElts; ++i) 441 Mask.push_back(ConstantInt::get(Type::getInt32Ty(V->getContext()), i)); 442 return std::make_pair(V, nullptr); 443 } 444 445 /// Try to find redundant insertvalue instructions, like the following ones: 446 /// %0 = insertvalue { i8, i32 } undef, i8 %x, 0 447 /// %1 = insertvalue { i8, i32 } %0, i8 %y, 0 448 /// Here the second instruction inserts values at the same indices, as the 449 /// first one, making the first one redundant. 450 /// It should be transformed to: 451 /// %0 = insertvalue { i8, i32 } undef, i8 %y, 0 452 Instruction *InstCombiner::visitInsertValueInst(InsertValueInst &I) { 453 bool IsRedundant = false; 454 ArrayRef<unsigned int> FirstIndices = I.getIndices(); 455 456 // If there is a chain of insertvalue instructions (each of them except the 457 // last one has only one use and it's another insertvalue insn from this 458 // chain), check if any of the 'children' uses the same indices as the first 459 // instruction. In this case, the first one is redundant. 460 Value *V = &I; 461 unsigned Depth = 0; 462 while (V->hasOneUse() && Depth < 10) { 463 User *U = V->user_back(); 464 auto UserInsInst = dyn_cast<InsertValueInst>(U); 465 if (!UserInsInst || U->getOperand(0) != V) 466 break; 467 if (UserInsInst->getIndices() == FirstIndices) { 468 IsRedundant = true; 469 break; 470 } 471 V = UserInsInst; 472 Depth++; 473 } 474 475 if (IsRedundant) 476 return ReplaceInstUsesWith(I, I.getOperand(0)); 477 return nullptr; 478 } 479 480 Instruction *InstCombiner::visitInsertElementInst(InsertElementInst &IE) { 481 Value *VecOp = IE.getOperand(0); 482 Value *ScalarOp = IE.getOperand(1); 483 Value *IdxOp = IE.getOperand(2); 484 485 // Inserting an undef or into an undefined place, remove this. 486 if (isa<UndefValue>(ScalarOp) || isa<UndefValue>(IdxOp)) 487 ReplaceInstUsesWith(IE, VecOp); 488 489 // If the inserted element was extracted from some other vector, and if the 490 // indexes are constant, try to turn this into a shufflevector operation. 491 if (ExtractElementInst *EI = dyn_cast<ExtractElementInst>(ScalarOp)) { 492 if (isa<ConstantInt>(EI->getOperand(1)) && isa<ConstantInt>(IdxOp)) { 493 unsigned NumInsertVectorElts = IE.getType()->getNumElements(); 494 unsigned NumExtractVectorElts = 495 EI->getOperand(0)->getType()->getVectorNumElements(); 496 unsigned ExtractedIdx = 497 cast<ConstantInt>(EI->getOperand(1))->getZExtValue(); 498 unsigned InsertedIdx = cast<ConstantInt>(IdxOp)->getZExtValue(); 499 500 if (ExtractedIdx >= NumExtractVectorElts) // Out of range extract. 501 return ReplaceInstUsesWith(IE, VecOp); 502 503 if (InsertedIdx >= NumInsertVectorElts) // Out of range insert. 504 return ReplaceInstUsesWith(IE, UndefValue::get(IE.getType())); 505 506 // If we are extracting a value from a vector, then inserting it right 507 // back into the same place, just use the input vector. 508 if (EI->getOperand(0) == VecOp && ExtractedIdx == InsertedIdx) 509 return ReplaceInstUsesWith(IE, VecOp); 510 511 // If this insertelement isn't used by some other insertelement, turn it 512 // (and any insertelements it points to), into one big shuffle. 513 if (!IE.hasOneUse() || !isa<InsertElementInst>(IE.user_back())) { 514 SmallVector<Constant*, 16> Mask; 515 ShuffleOps LR = collectShuffleElements(&IE, Mask, nullptr); 516 517 // The proposed shuffle may be trivial, in which case we shouldn't 518 // perform the combine. 519 if (LR.first != &IE && LR.second != &IE) { 520 // We now have a shuffle of LHS, RHS, Mask. 521 if (LR.second == nullptr) 522 LR.second = UndefValue::get(LR.first->getType()); 523 return new ShuffleVectorInst(LR.first, LR.second, 524 ConstantVector::get(Mask)); 525 } 526 } 527 } 528 } 529 530 unsigned VWidth = cast<VectorType>(VecOp->getType())->getNumElements(); 531 APInt UndefElts(VWidth, 0); 532 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 533 if (Value *V = SimplifyDemandedVectorElts(&IE, AllOnesEltMask, UndefElts)) { 534 if (V != &IE) 535 return ReplaceInstUsesWith(IE, V); 536 return &IE; 537 } 538 539 return nullptr; 540 } 541 542 /// Return true if we can evaluate the specified expression tree if the vector 543 /// elements were shuffled in a different order. 544 static bool CanEvaluateShuffled(Value *V, ArrayRef<int> Mask, 545 unsigned Depth = 5) { 546 // We can always reorder the elements of a constant. 547 if (isa<Constant>(V)) 548 return true; 549 550 // We won't reorder vector arguments. No IPO here. 551 Instruction *I = dyn_cast<Instruction>(V); 552 if (!I) return false; 553 554 // Two users may expect different orders of the elements. Don't try it. 555 if (!I->hasOneUse()) 556 return false; 557 558 if (Depth == 0) return false; 559 560 switch (I->getOpcode()) { 561 case Instruction::Add: 562 case Instruction::FAdd: 563 case Instruction::Sub: 564 case Instruction::FSub: 565 case Instruction::Mul: 566 case Instruction::FMul: 567 case Instruction::UDiv: 568 case Instruction::SDiv: 569 case Instruction::FDiv: 570 case Instruction::URem: 571 case Instruction::SRem: 572 case Instruction::FRem: 573 case Instruction::Shl: 574 case Instruction::LShr: 575 case Instruction::AShr: 576 case Instruction::And: 577 case Instruction::Or: 578 case Instruction::Xor: 579 case Instruction::ICmp: 580 case Instruction::FCmp: 581 case Instruction::Trunc: 582 case Instruction::ZExt: 583 case Instruction::SExt: 584 case Instruction::FPToUI: 585 case Instruction::FPToSI: 586 case Instruction::UIToFP: 587 case Instruction::SIToFP: 588 case Instruction::FPTrunc: 589 case Instruction::FPExt: 590 case Instruction::GetElementPtr: { 591 for (Value *Operand : I->operands()) { 592 if (!CanEvaluateShuffled(Operand, Mask, Depth-1)) 593 return false; 594 } 595 return true; 596 } 597 case Instruction::InsertElement: { 598 ConstantInt *CI = dyn_cast<ConstantInt>(I->getOperand(2)); 599 if (!CI) return false; 600 int ElementNumber = CI->getLimitedValue(); 601 602 // Verify that 'CI' does not occur twice in Mask. A single 'insertelement' 603 // can't put an element into multiple indices. 604 bool SeenOnce = false; 605 for (int i = 0, e = Mask.size(); i != e; ++i) { 606 if (Mask[i] == ElementNumber) { 607 if (SeenOnce) 608 return false; 609 SeenOnce = true; 610 } 611 } 612 return CanEvaluateShuffled(I->getOperand(0), Mask, Depth-1); 613 } 614 } 615 return false; 616 } 617 618 /// Rebuild a new instruction just like 'I' but with the new operands given. 619 /// In the event of type mismatch, the type of the operands is correct. 620 static Value *buildNew(Instruction *I, ArrayRef<Value*> NewOps) { 621 // We don't want to use the IRBuilder here because we want the replacement 622 // instructions to appear next to 'I', not the builder's insertion point. 623 switch (I->getOpcode()) { 624 case Instruction::Add: 625 case Instruction::FAdd: 626 case Instruction::Sub: 627 case Instruction::FSub: 628 case Instruction::Mul: 629 case Instruction::FMul: 630 case Instruction::UDiv: 631 case Instruction::SDiv: 632 case Instruction::FDiv: 633 case Instruction::URem: 634 case Instruction::SRem: 635 case Instruction::FRem: 636 case Instruction::Shl: 637 case Instruction::LShr: 638 case Instruction::AShr: 639 case Instruction::And: 640 case Instruction::Or: 641 case Instruction::Xor: { 642 BinaryOperator *BO = cast<BinaryOperator>(I); 643 assert(NewOps.size() == 2 && "binary operator with #ops != 2"); 644 BinaryOperator *New = 645 BinaryOperator::Create(cast<BinaryOperator>(I)->getOpcode(), 646 NewOps[0], NewOps[1], "", BO); 647 if (isa<OverflowingBinaryOperator>(BO)) { 648 New->setHasNoUnsignedWrap(BO->hasNoUnsignedWrap()); 649 New->setHasNoSignedWrap(BO->hasNoSignedWrap()); 650 } 651 if (isa<PossiblyExactOperator>(BO)) { 652 New->setIsExact(BO->isExact()); 653 } 654 if (isa<FPMathOperator>(BO)) 655 New->copyFastMathFlags(I); 656 return New; 657 } 658 case Instruction::ICmp: 659 assert(NewOps.size() == 2 && "icmp with #ops != 2"); 660 return new ICmpInst(I, cast<ICmpInst>(I)->getPredicate(), 661 NewOps[0], NewOps[1]); 662 case Instruction::FCmp: 663 assert(NewOps.size() == 2 && "fcmp with #ops != 2"); 664 return new FCmpInst(I, cast<FCmpInst>(I)->getPredicate(), 665 NewOps[0], NewOps[1]); 666 case Instruction::Trunc: 667 case Instruction::ZExt: 668 case Instruction::SExt: 669 case Instruction::FPToUI: 670 case Instruction::FPToSI: 671 case Instruction::UIToFP: 672 case Instruction::SIToFP: 673 case Instruction::FPTrunc: 674 case Instruction::FPExt: { 675 // It's possible that the mask has a different number of elements from 676 // the original cast. We recompute the destination type to match the mask. 677 Type *DestTy = 678 VectorType::get(I->getType()->getScalarType(), 679 NewOps[0]->getType()->getVectorNumElements()); 680 assert(NewOps.size() == 1 && "cast with #ops != 1"); 681 return CastInst::Create(cast<CastInst>(I)->getOpcode(), NewOps[0], DestTy, 682 "", I); 683 } 684 case Instruction::GetElementPtr: { 685 Value *Ptr = NewOps[0]; 686 ArrayRef<Value*> Idx = NewOps.slice(1); 687 GetElementPtrInst *GEP = GetElementPtrInst::Create( 688 cast<GetElementPtrInst>(I)->getSourceElementType(), Ptr, Idx, "", I); 689 GEP->setIsInBounds(cast<GetElementPtrInst>(I)->isInBounds()); 690 return GEP; 691 } 692 } 693 llvm_unreachable("failed to rebuild vector instructions"); 694 } 695 696 Value * 697 InstCombiner::EvaluateInDifferentElementOrder(Value *V, ArrayRef<int> Mask) { 698 // Mask.size() does not need to be equal to the number of vector elements. 699 700 assert(V->getType()->isVectorTy() && "can't reorder non-vector elements"); 701 if (isa<UndefValue>(V)) { 702 return UndefValue::get(VectorType::get(V->getType()->getScalarType(), 703 Mask.size())); 704 } 705 if (isa<ConstantAggregateZero>(V)) { 706 return ConstantAggregateZero::get( 707 VectorType::get(V->getType()->getScalarType(), 708 Mask.size())); 709 } 710 if (Constant *C = dyn_cast<Constant>(V)) { 711 SmallVector<Constant *, 16> MaskValues; 712 for (int i = 0, e = Mask.size(); i != e; ++i) { 713 if (Mask[i] == -1) 714 MaskValues.push_back(UndefValue::get(Builder->getInt32Ty())); 715 else 716 MaskValues.push_back(Builder->getInt32(Mask[i])); 717 } 718 return ConstantExpr::getShuffleVector(C, UndefValue::get(C->getType()), 719 ConstantVector::get(MaskValues)); 720 } 721 722 Instruction *I = cast<Instruction>(V); 723 switch (I->getOpcode()) { 724 case Instruction::Add: 725 case Instruction::FAdd: 726 case Instruction::Sub: 727 case Instruction::FSub: 728 case Instruction::Mul: 729 case Instruction::FMul: 730 case Instruction::UDiv: 731 case Instruction::SDiv: 732 case Instruction::FDiv: 733 case Instruction::URem: 734 case Instruction::SRem: 735 case Instruction::FRem: 736 case Instruction::Shl: 737 case Instruction::LShr: 738 case Instruction::AShr: 739 case Instruction::And: 740 case Instruction::Or: 741 case Instruction::Xor: 742 case Instruction::ICmp: 743 case Instruction::FCmp: 744 case Instruction::Trunc: 745 case Instruction::ZExt: 746 case Instruction::SExt: 747 case Instruction::FPToUI: 748 case Instruction::FPToSI: 749 case Instruction::UIToFP: 750 case Instruction::SIToFP: 751 case Instruction::FPTrunc: 752 case Instruction::FPExt: 753 case Instruction::Select: 754 case Instruction::GetElementPtr: { 755 SmallVector<Value*, 8> NewOps; 756 bool NeedsRebuild = (Mask.size() != I->getType()->getVectorNumElements()); 757 for (int i = 0, e = I->getNumOperands(); i != e; ++i) { 758 Value *V = EvaluateInDifferentElementOrder(I->getOperand(i), Mask); 759 NewOps.push_back(V); 760 NeedsRebuild |= (V != I->getOperand(i)); 761 } 762 if (NeedsRebuild) { 763 return buildNew(I, NewOps); 764 } 765 return I; 766 } 767 case Instruction::InsertElement: { 768 int Element = cast<ConstantInt>(I->getOperand(2))->getLimitedValue(); 769 770 // The insertelement was inserting at Element. Figure out which element 771 // that becomes after shuffling. The answer is guaranteed to be unique 772 // by CanEvaluateShuffled. 773 bool Found = false; 774 int Index = 0; 775 for (int e = Mask.size(); Index != e; ++Index) { 776 if (Mask[Index] == Element) { 777 Found = true; 778 break; 779 } 780 } 781 782 // If element is not in Mask, no need to handle the operand 1 (element to 783 // be inserted). Just evaluate values in operand 0 according to Mask. 784 if (!Found) 785 return EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 786 787 Value *V = EvaluateInDifferentElementOrder(I->getOperand(0), Mask); 788 return InsertElementInst::Create(V, I->getOperand(1), 789 Builder->getInt32(Index), "", I); 790 } 791 } 792 llvm_unreachable("failed to reorder elements of vector instruction!"); 793 } 794 795 static void recognizeIdentityMask(const SmallVectorImpl<int> &Mask, 796 bool &isLHSID, bool &isRHSID) { 797 isLHSID = isRHSID = true; 798 799 for (unsigned i = 0, e = Mask.size(); i != e; ++i) { 800 if (Mask[i] < 0) continue; // Ignore undef values. 801 // Is this an identity shuffle of the LHS value? 802 isLHSID &= (Mask[i] == (int)i); 803 804 // Is this an identity shuffle of the RHS value? 805 isRHSID &= (Mask[i]-e == i); 806 } 807 } 808 809 // Returns true if the shuffle is extracting a contiguous range of values from 810 // LHS, for example: 811 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 812 // Input: |AA|BB|CC|DD|EE|FF|GG|HH|II|JJ|KK|LL|MM|NN|OO|PP| 813 // Shuffles to: |EE|FF|GG|HH| 814 // +--+--+--+--+ 815 static bool isShuffleExtractingFromLHS(ShuffleVectorInst &SVI, 816 SmallVector<int, 16> &Mask) { 817 unsigned LHSElems = 818 cast<VectorType>(SVI.getOperand(0)->getType())->getNumElements(); 819 unsigned MaskElems = Mask.size(); 820 unsigned BegIdx = Mask.front(); 821 unsigned EndIdx = Mask.back(); 822 if (BegIdx > EndIdx || EndIdx >= LHSElems || EndIdx - BegIdx != MaskElems - 1) 823 return false; 824 for (unsigned I = 0; I != MaskElems; ++I) 825 if (static_cast<unsigned>(Mask[I]) != BegIdx + I) 826 return false; 827 return true; 828 } 829 830 Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) { 831 Value *LHS = SVI.getOperand(0); 832 Value *RHS = SVI.getOperand(1); 833 SmallVector<int, 16> Mask = SVI.getShuffleMask(); 834 Type *Int32Ty = Type::getInt32Ty(SVI.getContext()); 835 836 bool MadeChange = false; 837 838 // Undefined shuffle mask -> undefined value. 839 if (isa<UndefValue>(SVI.getOperand(2))) 840 return ReplaceInstUsesWith(SVI, UndefValue::get(SVI.getType())); 841 842 unsigned VWidth = cast<VectorType>(SVI.getType())->getNumElements(); 843 844 APInt UndefElts(VWidth, 0); 845 APInt AllOnesEltMask(APInt::getAllOnesValue(VWidth)); 846 if (Value *V = SimplifyDemandedVectorElts(&SVI, AllOnesEltMask, UndefElts)) { 847 if (V != &SVI) 848 return ReplaceInstUsesWith(SVI, V); 849 LHS = SVI.getOperand(0); 850 RHS = SVI.getOperand(1); 851 MadeChange = true; 852 } 853 854 unsigned LHSWidth = cast<VectorType>(LHS->getType())->getNumElements(); 855 856 // Canonicalize shuffle(x ,x,mask) -> shuffle(x, undef,mask') 857 // Canonicalize shuffle(undef,x,mask) -> shuffle(x, undef,mask'). 858 if (LHS == RHS || isa<UndefValue>(LHS)) { 859 if (isa<UndefValue>(LHS) && LHS == RHS) { 860 // shuffle(undef,undef,mask) -> undef. 861 Value *Result = (VWidth == LHSWidth) 862 ? LHS : UndefValue::get(SVI.getType()); 863 return ReplaceInstUsesWith(SVI, Result); 864 } 865 866 // Remap any references to RHS to use LHS. 867 SmallVector<Constant*, 16> Elts; 868 for (unsigned i = 0, e = LHSWidth; i != VWidth; ++i) { 869 if (Mask[i] < 0) { 870 Elts.push_back(UndefValue::get(Int32Ty)); 871 continue; 872 } 873 874 if ((Mask[i] >= (int)e && isa<UndefValue>(RHS)) || 875 (Mask[i] < (int)e && isa<UndefValue>(LHS))) { 876 Mask[i] = -1; // Turn into undef. 877 Elts.push_back(UndefValue::get(Int32Ty)); 878 } else { 879 Mask[i] = Mask[i] % e; // Force to LHS. 880 Elts.push_back(ConstantInt::get(Int32Ty, Mask[i])); 881 } 882 } 883 SVI.setOperand(0, SVI.getOperand(1)); 884 SVI.setOperand(1, UndefValue::get(RHS->getType())); 885 SVI.setOperand(2, ConstantVector::get(Elts)); 886 LHS = SVI.getOperand(0); 887 RHS = SVI.getOperand(1); 888 MadeChange = true; 889 } 890 891 if (VWidth == LHSWidth) { 892 // Analyze the shuffle, are the LHS or RHS and identity shuffles? 893 bool isLHSID, isRHSID; 894 recognizeIdentityMask(Mask, isLHSID, isRHSID); 895 896 // Eliminate identity shuffles. 897 if (isLHSID) return ReplaceInstUsesWith(SVI, LHS); 898 if (isRHSID) return ReplaceInstUsesWith(SVI, RHS); 899 } 900 901 if (isa<UndefValue>(RHS) && CanEvaluateShuffled(LHS, Mask)) { 902 Value *V = EvaluateInDifferentElementOrder(LHS, Mask); 903 return ReplaceInstUsesWith(SVI, V); 904 } 905 906 // SROA generates shuffle+bitcast when the extracted sub-vector is bitcast to 907 // a non-vector type. We can instead bitcast the original vector followed by 908 // an extract of the desired element: 909 // 910 // %sroa = shufflevector <16 x i8> %in, <16 x i8> undef, 911 // <4 x i32> <i32 0, i32 1, i32 2, i32 3> 912 // %1 = bitcast <4 x i8> %sroa to i32 913 // Becomes: 914 // %bc = bitcast <16 x i8> %in to <4 x i32> 915 // %ext = extractelement <4 x i32> %bc, i32 0 916 // 917 // If the shuffle is extracting a contiguous range of values from the input 918 // vector then each use which is a bitcast of the extracted size can be 919 // replaced. This will work if the vector types are compatible, and the begin 920 // index is aligned to a value in the casted vector type. If the begin index 921 // isn't aligned then we can shuffle the original vector (keeping the same 922 // vector type) before extracting. 923 // 924 // This code will bail out if the target type is fundamentally incompatible 925 // with vectors of the source type. 926 // 927 // Example of <16 x i8>, target type i32: 928 // Index range [4,8): v-----------v Will work. 929 // +--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+--+ 930 // <16 x i8>: | | | | | | | | | | | | | | | | | 931 // <4 x i32>: | | | | | 932 // +-----------+-----------+-----------+-----------+ 933 // Index range [6,10): ^-----------^ Needs an extra shuffle. 934 // Target type i40: ^--------------^ Won't work, bail. 935 if (isShuffleExtractingFromLHS(SVI, Mask)) { 936 Value *V = LHS; 937 unsigned MaskElems = Mask.size(); 938 unsigned BegIdx = Mask.front(); 939 VectorType *SrcTy = cast<VectorType>(V->getType()); 940 unsigned VecBitWidth = SrcTy->getBitWidth(); 941 unsigned SrcElemBitWidth = DL.getTypeSizeInBits(SrcTy->getElementType()); 942 assert(SrcElemBitWidth && "vector elements must have a bitwidth"); 943 unsigned SrcNumElems = SrcTy->getNumElements(); 944 SmallVector<BitCastInst *, 8> BCs; 945 DenseMap<Type *, Value *> NewBCs; 946 for (User *U : SVI.users()) 947 if (BitCastInst *BC = dyn_cast<BitCastInst>(U)) 948 if (!BC->use_empty()) 949 // Only visit bitcasts that weren't previously handled. 950 BCs.push_back(BC); 951 for (BitCastInst *BC : BCs) { 952 Type *TgtTy = BC->getDestTy(); 953 unsigned TgtElemBitWidth = DL.getTypeSizeInBits(TgtTy); 954 if (!TgtElemBitWidth) 955 continue; 956 unsigned TgtNumElems = VecBitWidth / TgtElemBitWidth; 957 bool VecBitWidthsEqual = VecBitWidth == TgtNumElems * TgtElemBitWidth; 958 bool BegIsAligned = 0 == ((SrcElemBitWidth * BegIdx) % TgtElemBitWidth); 959 if (!VecBitWidthsEqual) 960 continue; 961 if (!VectorType::isValidElementType(TgtTy)) 962 continue; 963 VectorType *CastSrcTy = VectorType::get(TgtTy, TgtNumElems); 964 if (!BegIsAligned) { 965 // Shuffle the input so [0,NumElements) contains the output, and 966 // [NumElems,SrcNumElems) is undef. 967 SmallVector<Constant *, 16> ShuffleMask(SrcNumElems, 968 UndefValue::get(Int32Ty)); 969 for (unsigned I = 0, E = MaskElems, Idx = BegIdx; I != E; ++Idx, ++I) 970 ShuffleMask[I] = ConstantInt::get(Int32Ty, Idx); 971 V = Builder->CreateShuffleVector(V, UndefValue::get(V->getType()), 972 ConstantVector::get(ShuffleMask), 973 SVI.getName() + ".extract"); 974 BegIdx = 0; 975 } 976 unsigned SrcElemsPerTgtElem = TgtElemBitWidth / SrcElemBitWidth; 977 assert(SrcElemsPerTgtElem); 978 BegIdx /= SrcElemsPerTgtElem; 979 bool BCAlreadyExists = NewBCs.find(CastSrcTy) != NewBCs.end(); 980 auto *NewBC = 981 BCAlreadyExists 982 ? NewBCs[CastSrcTy] 983 : Builder->CreateBitCast(V, CastSrcTy, SVI.getName() + ".bc"); 984 if (!BCAlreadyExists) 985 NewBCs[CastSrcTy] = NewBC; 986 auto *Ext = Builder->CreateExtractElement( 987 NewBC, ConstantInt::get(Int32Ty, BegIdx), SVI.getName() + ".extract"); 988 // The shufflevector isn't being replaced: the bitcast that used it 989 // is. InstCombine will visit the newly-created instructions. 990 ReplaceInstUsesWith(*BC, Ext); 991 MadeChange = true; 992 } 993 } 994 995 // If the LHS is a shufflevector itself, see if we can combine it with this 996 // one without producing an unusual shuffle. 997 // Cases that might be simplified: 998 // 1. 999 // x1=shuffle(v1,v2,mask1) 1000 // x=shuffle(x1,undef,mask) 1001 // ==> 1002 // x=shuffle(v1,undef,newMask) 1003 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : -1 1004 // 2. 1005 // x1=shuffle(v1,undef,mask1) 1006 // x=shuffle(x1,x2,mask) 1007 // where v1.size() == mask1.size() 1008 // ==> 1009 // x=shuffle(v1,x2,newMask) 1010 // newMask[i] = (mask[i] < x1.size()) ? mask1[mask[i]] : mask[i] 1011 // 3. 1012 // x2=shuffle(v2,undef,mask2) 1013 // x=shuffle(x1,x2,mask) 1014 // where v2.size() == mask2.size() 1015 // ==> 1016 // x=shuffle(x1,v2,newMask) 1017 // newMask[i] = (mask[i] < x1.size()) 1018 // ? mask[i] : mask2[mask[i]-x1.size()]+x1.size() 1019 // 4. 1020 // x1=shuffle(v1,undef,mask1) 1021 // x2=shuffle(v2,undef,mask2) 1022 // x=shuffle(x1,x2,mask) 1023 // where v1.size() == v2.size() 1024 // ==> 1025 // x=shuffle(v1,v2,newMask) 1026 // newMask[i] = (mask[i] < x1.size()) 1027 // ? mask1[mask[i]] : mask2[mask[i]-x1.size()]+v1.size() 1028 // 1029 // Here we are really conservative: 1030 // we are absolutely afraid of producing a shuffle mask not in the input 1031 // program, because the code gen may not be smart enough to turn a merged 1032 // shuffle into two specific shuffles: it may produce worse code. As such, 1033 // we only merge two shuffles if the result is either a splat or one of the 1034 // input shuffle masks. In this case, merging the shuffles just removes 1035 // one instruction, which we know is safe. This is good for things like 1036 // turning: (splat(splat)) -> splat, or 1037 // merge(V[0..n], V[n+1..2n]) -> V[0..2n] 1038 ShuffleVectorInst* LHSShuffle = dyn_cast<ShuffleVectorInst>(LHS); 1039 ShuffleVectorInst* RHSShuffle = dyn_cast<ShuffleVectorInst>(RHS); 1040 if (LHSShuffle) 1041 if (!isa<UndefValue>(LHSShuffle->getOperand(1)) && !isa<UndefValue>(RHS)) 1042 LHSShuffle = nullptr; 1043 if (RHSShuffle) 1044 if (!isa<UndefValue>(RHSShuffle->getOperand(1))) 1045 RHSShuffle = nullptr; 1046 if (!LHSShuffle && !RHSShuffle) 1047 return MadeChange ? &SVI : nullptr; 1048 1049 Value* LHSOp0 = nullptr; 1050 Value* LHSOp1 = nullptr; 1051 Value* RHSOp0 = nullptr; 1052 unsigned LHSOp0Width = 0; 1053 unsigned RHSOp0Width = 0; 1054 if (LHSShuffle) { 1055 LHSOp0 = LHSShuffle->getOperand(0); 1056 LHSOp1 = LHSShuffle->getOperand(1); 1057 LHSOp0Width = cast<VectorType>(LHSOp0->getType())->getNumElements(); 1058 } 1059 if (RHSShuffle) { 1060 RHSOp0 = RHSShuffle->getOperand(0); 1061 RHSOp0Width = cast<VectorType>(RHSOp0->getType())->getNumElements(); 1062 } 1063 Value* newLHS = LHS; 1064 Value* newRHS = RHS; 1065 if (LHSShuffle) { 1066 // case 1 1067 if (isa<UndefValue>(RHS)) { 1068 newLHS = LHSOp0; 1069 newRHS = LHSOp1; 1070 } 1071 // case 2 or 4 1072 else if (LHSOp0Width == LHSWidth) { 1073 newLHS = LHSOp0; 1074 } 1075 } 1076 // case 3 or 4 1077 if (RHSShuffle && RHSOp0Width == LHSWidth) { 1078 newRHS = RHSOp0; 1079 } 1080 // case 4 1081 if (LHSOp0 == RHSOp0) { 1082 newLHS = LHSOp0; 1083 newRHS = nullptr; 1084 } 1085 1086 if (newLHS == LHS && newRHS == RHS) 1087 return MadeChange ? &SVI : nullptr; 1088 1089 SmallVector<int, 16> LHSMask; 1090 SmallVector<int, 16> RHSMask; 1091 if (newLHS != LHS) 1092 LHSMask = LHSShuffle->getShuffleMask(); 1093 if (RHSShuffle && newRHS != RHS) 1094 RHSMask = RHSShuffle->getShuffleMask(); 1095 1096 unsigned newLHSWidth = (newLHS != LHS) ? LHSOp0Width : LHSWidth; 1097 SmallVector<int, 16> newMask; 1098 bool isSplat = true; 1099 int SplatElt = -1; 1100 // Create a new mask for the new ShuffleVectorInst so that the new 1101 // ShuffleVectorInst is equivalent to the original one. 1102 for (unsigned i = 0; i < VWidth; ++i) { 1103 int eltMask; 1104 if (Mask[i] < 0) { 1105 // This element is an undef value. 1106 eltMask = -1; 1107 } else if (Mask[i] < (int)LHSWidth) { 1108 // This element is from left hand side vector operand. 1109 // 1110 // If LHS is going to be replaced (case 1, 2, or 4), calculate the 1111 // new mask value for the element. 1112 if (newLHS != LHS) { 1113 eltMask = LHSMask[Mask[i]]; 1114 // If the value selected is an undef value, explicitly specify it 1115 // with a -1 mask value. 1116 if (eltMask >= (int)LHSOp0Width && isa<UndefValue>(LHSOp1)) 1117 eltMask = -1; 1118 } else 1119 eltMask = Mask[i]; 1120 } else { 1121 // This element is from right hand side vector operand 1122 // 1123 // If the value selected is an undef value, explicitly specify it 1124 // with a -1 mask value. (case 1) 1125 if (isa<UndefValue>(RHS)) 1126 eltMask = -1; 1127 // If RHS is going to be replaced (case 3 or 4), calculate the 1128 // new mask value for the element. 1129 else if (newRHS != RHS) { 1130 eltMask = RHSMask[Mask[i]-LHSWidth]; 1131 // If the value selected is an undef value, explicitly specify it 1132 // with a -1 mask value. 1133 if (eltMask >= (int)RHSOp0Width) { 1134 assert(isa<UndefValue>(RHSShuffle->getOperand(1)) 1135 && "should have been check above"); 1136 eltMask = -1; 1137 } 1138 } else 1139 eltMask = Mask[i]-LHSWidth; 1140 1141 // If LHS's width is changed, shift the mask value accordingly. 1142 // If newRHS == NULL, i.e. LHSOp0 == RHSOp0, we want to remap any 1143 // references from RHSOp0 to LHSOp0, so we don't need to shift the mask. 1144 // If newRHS == newLHS, we want to remap any references from newRHS to 1145 // newLHS so that we can properly identify splats that may occur due to 1146 // obfuscation across the two vectors. 1147 if (eltMask >= 0 && newRHS != nullptr && newLHS != newRHS) 1148 eltMask += newLHSWidth; 1149 } 1150 1151 // Check if this could still be a splat. 1152 if (eltMask >= 0) { 1153 if (SplatElt >= 0 && SplatElt != eltMask) 1154 isSplat = false; 1155 SplatElt = eltMask; 1156 } 1157 1158 newMask.push_back(eltMask); 1159 } 1160 1161 // If the result mask is equal to one of the original shuffle masks, 1162 // or is a splat, do the replacement. 1163 if (isSplat || newMask == LHSMask || newMask == RHSMask || newMask == Mask) { 1164 SmallVector<Constant*, 16> Elts; 1165 for (unsigned i = 0, e = newMask.size(); i != e; ++i) { 1166 if (newMask[i] < 0) { 1167 Elts.push_back(UndefValue::get(Int32Ty)); 1168 } else { 1169 Elts.push_back(ConstantInt::get(Int32Ty, newMask[i])); 1170 } 1171 } 1172 if (!newRHS) 1173 newRHS = UndefValue::get(newLHS->getType()); 1174 return new ShuffleVectorInst(newLHS, newRHS, ConstantVector::get(Elts)); 1175 } 1176 1177 // If the result mask is an identity, replace uses of this instruction with 1178 // corresponding argument. 1179 bool isLHSID, isRHSID; 1180 recognizeIdentityMask(newMask, isLHSID, isRHSID); 1181 if (isLHSID && VWidth == LHSOp0Width) return ReplaceInstUsesWith(SVI, newLHS); 1182 if (isRHSID && VWidth == RHSOp0Width) return ReplaceInstUsesWith(SVI, newRHS); 1183 1184 return MadeChange ? &SVI : nullptr; 1185 } 1186