Home | History | Annotate | Download | only in Scalar
      1 //===--- Scalarizer.cpp - Scalarize vector operations ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass converts vector operations into scalar operations, in order
     11 // to expose optimization opportunities on the individual scalar operations.
     12 // It is mainly intended for targets that do not have vector units, but it
     13 // may also be useful for revectorizing code to different vector widths.
     14 //
     15 //===----------------------------------------------------------------------===//
     16 
     17 #include "llvm/Transforms/Scalar.h"
     18 #include "llvm/ADT/STLExtras.h"
     19 #include "llvm/IR/IRBuilder.h"
     20 #include "llvm/IR/InstVisitor.h"
     21 #include "llvm/Pass.h"
     22 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
     23 
     24 using namespace llvm;
     25 
     26 #define DEBUG_TYPE "scalarizer"
     27 
     28 namespace {
     29 // Used to store the scattered form of a vector.
     30 typedef SmallVector<Value *, 8> ValueVector;
     31 
     32 // Used to map a vector Value to its scattered form.  We use std::map
     33 // because we want iterators to persist across insertion and because the
     34 // values are relatively large.
     35 typedef std::map<Value *, ValueVector> ScatterMap;
     36 
     37 // Lists Instructions that have been replaced with scalar implementations,
     38 // along with a pointer to their scattered forms.
     39 typedef SmallVector<std::pair<Instruction *, ValueVector *>, 16> GatherList;
     40 
     41 // Provides a very limited vector-like interface for lazily accessing one
     42 // component of a scattered vector or vector pointer.
     43 class Scatterer {
     44 public:
     45   Scatterer() {}
     46 
     47   // Scatter V into Size components.  If new instructions are needed,
     48   // insert them before BBI in BB.  If Cache is nonnull, use it to cache
     49   // the results.
     50   Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
     51             ValueVector *cachePtr = nullptr);
     52 
     53   // Return component I, creating a new Value for it if necessary.
     54   Value *operator[](unsigned I);
     55 
     56   // Return the number of components.
     57   unsigned size() const { return Size; }
     58 
     59 private:
     60   BasicBlock *BB;
     61   BasicBlock::iterator BBI;
     62   Value *V;
     63   ValueVector *CachePtr;
     64   PointerType *PtrTy;
     65   ValueVector Tmp;
     66   unsigned Size;
     67 };
     68 
     69 // FCmpSpliiter(FCI)(Builder, X, Y, Name) uses Builder to create an FCmp
     70 // called Name that compares X and Y in the same way as FCI.
     71 struct FCmpSplitter {
     72   FCmpSplitter(FCmpInst &fci) : FCI(fci) {}
     73   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
     74                     const Twine &Name) const {
     75     return Builder.CreateFCmp(FCI.getPredicate(), Op0, Op1, Name);
     76   }
     77   FCmpInst &FCI;
     78 };
     79 
     80 // ICmpSpliiter(ICI)(Builder, X, Y, Name) uses Builder to create an ICmp
     81 // called Name that compares X and Y in the same way as ICI.
     82 struct ICmpSplitter {
     83   ICmpSplitter(ICmpInst &ici) : ICI(ici) {}
     84   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
     85                     const Twine &Name) const {
     86     return Builder.CreateICmp(ICI.getPredicate(), Op0, Op1, Name);
     87   }
     88   ICmpInst &ICI;
     89 };
     90 
     91 // BinarySpliiter(BO)(Builder, X, Y, Name) uses Builder to create
     92 // a binary operator like BO called Name with operands X and Y.
     93 struct BinarySplitter {
     94   BinarySplitter(BinaryOperator &bo) : BO(bo) {}
     95   Value *operator()(IRBuilder<> &Builder, Value *Op0, Value *Op1,
     96                     const Twine &Name) const {
     97     return Builder.CreateBinOp(BO.getOpcode(), Op0, Op1, Name);
     98   }
     99   BinaryOperator &BO;
    100 };
    101 
    102 // Information about a load or store that we're scalarizing.
    103 struct VectorLayout {
    104   VectorLayout() : VecTy(nullptr), ElemTy(nullptr), VecAlign(0), ElemSize(0) {}
    105 
    106   // Return the alignment of element I.
    107   uint64_t getElemAlign(unsigned I) {
    108     return MinAlign(VecAlign, I * ElemSize);
    109   }
    110 
    111   // The type of the vector.
    112   VectorType *VecTy;
    113 
    114   // The type of each element.
    115   Type *ElemTy;
    116 
    117   // The alignment of the vector.
    118   uint64_t VecAlign;
    119 
    120   // The size of each element.
    121   uint64_t ElemSize;
    122 };
    123 
    124 class Scalarizer : public FunctionPass,
    125                    public InstVisitor<Scalarizer, bool> {
    126 public:
    127   static char ID;
    128 
    129   Scalarizer() :
    130     FunctionPass(ID) {
    131     initializeScalarizerPass(*PassRegistry::getPassRegistry());
    132   }
    133 
    134   bool doInitialization(Module &M) override;
    135   bool runOnFunction(Function &F) override;
    136 
    137   // InstVisitor methods.  They return true if the instruction was scalarized,
    138   // false if nothing changed.
    139   bool visitInstruction(Instruction &) { return false; }
    140   bool visitSelectInst(SelectInst &SI);
    141   bool visitICmpInst(ICmpInst &);
    142   bool visitFCmpInst(FCmpInst &);
    143   bool visitBinaryOperator(BinaryOperator &);
    144   bool visitGetElementPtrInst(GetElementPtrInst &);
    145   bool visitCastInst(CastInst &);
    146   bool visitBitCastInst(BitCastInst &);
    147   bool visitShuffleVectorInst(ShuffleVectorInst &);
    148   bool visitPHINode(PHINode &);
    149   bool visitLoadInst(LoadInst &);
    150   bool visitStoreInst(StoreInst &);
    151 
    152   static void registerOptions() {
    153     // This is disabled by default because having separate loads and stores
    154     // makes it more likely that the -combiner-alias-analysis limits will be
    155     // reached.
    156     OptionRegistry::registerOption<bool, Scalarizer,
    157                                  &Scalarizer::ScalarizeLoadStore>(
    158         "scalarize-load-store",
    159         "Allow the scalarizer pass to scalarize loads and store", false);
    160   }
    161 
    162 private:
    163   Scatterer scatter(Instruction *, Value *);
    164   void gather(Instruction *, const ValueVector &);
    165   bool canTransferMetadata(unsigned Kind);
    166   void transferMetadata(Instruction *, const ValueVector &);
    167   bool getVectorLayout(Type *, unsigned, VectorLayout &, const DataLayout &);
    168   bool finish();
    169 
    170   template<typename T> bool splitBinary(Instruction &, const T &);
    171 
    172   ScatterMap Scattered;
    173   GatherList Gathered;
    174   unsigned ParallelLoopAccessMDKind;
    175   bool ScalarizeLoadStore;
    176 };
    177 
    178 char Scalarizer::ID = 0;
    179 } // end anonymous namespace
    180 
    181 INITIALIZE_PASS_WITH_OPTIONS(Scalarizer, "scalarizer",
    182                              "Scalarize vector operations", false, false)
    183 
    184 Scatterer::Scatterer(BasicBlock *bb, BasicBlock::iterator bbi, Value *v,
    185                      ValueVector *cachePtr)
    186   : BB(bb), BBI(bbi), V(v), CachePtr(cachePtr) {
    187   Type *Ty = V->getType();
    188   PtrTy = dyn_cast<PointerType>(Ty);
    189   if (PtrTy)
    190     Ty = PtrTy->getElementType();
    191   Size = Ty->getVectorNumElements();
    192   if (!CachePtr)
    193     Tmp.resize(Size, nullptr);
    194   else if (CachePtr->empty())
    195     CachePtr->resize(Size, nullptr);
    196   else
    197     assert(Size == CachePtr->size() && "Inconsistent vector sizes");
    198 }
    199 
    200 // Return component I, creating a new Value for it if necessary.
    201 Value *Scatterer::operator[](unsigned I) {
    202   ValueVector &CV = (CachePtr ? *CachePtr : Tmp);
    203   // Try to reuse a previous value.
    204   if (CV[I])
    205     return CV[I];
    206   IRBuilder<> Builder(BB, BBI);
    207   if (PtrTy) {
    208     if (!CV[0]) {
    209       Type *Ty =
    210         PointerType::get(PtrTy->getElementType()->getVectorElementType(),
    211                          PtrTy->getAddressSpace());
    212       CV[0] = Builder.CreateBitCast(V, Ty, V->getName() + ".i0");
    213     }
    214     if (I != 0)
    215       CV[I] = Builder.CreateConstGEP1_32(nullptr, CV[0], I,
    216                                          V->getName() + ".i" + Twine(I));
    217   } else {
    218     // Search through a chain of InsertElementInsts looking for element I.
    219     // Record other elements in the cache.  The new V is still suitable
    220     // for all uncached indices.
    221     for (;;) {
    222       InsertElementInst *Insert = dyn_cast<InsertElementInst>(V);
    223       if (!Insert)
    224         break;
    225       ConstantInt *Idx = dyn_cast<ConstantInt>(Insert->getOperand(2));
    226       if (!Idx)
    227         break;
    228       unsigned J = Idx->getZExtValue();
    229       V = Insert->getOperand(0);
    230       if (I == J) {
    231         CV[J] = Insert->getOperand(1);
    232         return CV[J];
    233       } else if (!CV[J]) {
    234         // Only cache the first entry we find for each index we're not actively
    235         // searching for. This prevents us from going too far up the chain and
    236         // caching incorrect entries.
    237         CV[J] = Insert->getOperand(1);
    238       }
    239     }
    240     CV[I] = Builder.CreateExtractElement(V, Builder.getInt32(I),
    241                                          V->getName() + ".i" + Twine(I));
    242   }
    243   return CV[I];
    244 }
    245 
    246 bool Scalarizer::doInitialization(Module &M) {
    247   ParallelLoopAccessMDKind =
    248       M.getContext().getMDKindID("llvm.mem.parallel_loop_access");
    249   ScalarizeLoadStore =
    250       M.getContext().getOption<bool, Scalarizer, &Scalarizer::ScalarizeLoadStore>();
    251   return false;
    252 }
    253 
    254 bool Scalarizer::runOnFunction(Function &F) {
    255   if (skipFunction(F))
    256     return false;
    257   assert(Gathered.empty() && Scattered.empty());
    258   for (BasicBlock &BB : F) {
    259     for (BasicBlock::iterator II = BB.begin(), IE = BB.end(); II != IE;) {
    260       Instruction *I = &*II;
    261       bool Done = visit(I);
    262       ++II;
    263       if (Done && I->getType()->isVoidTy())
    264         I->eraseFromParent();
    265     }
    266   }
    267   return finish();
    268 }
    269 
    270 // Return a scattered form of V that can be accessed by Point.  V must be a
    271 // vector or a pointer to a vector.
    272 Scatterer Scalarizer::scatter(Instruction *Point, Value *V) {
    273   if (Argument *VArg = dyn_cast<Argument>(V)) {
    274     // Put the scattered form of arguments in the entry block,
    275     // so that it can be used everywhere.
    276     Function *F = VArg->getParent();
    277     BasicBlock *BB = &F->getEntryBlock();
    278     return Scatterer(BB, BB->begin(), V, &Scattered[V]);
    279   }
    280   if (Instruction *VOp = dyn_cast<Instruction>(V)) {
    281     // Put the scattered form of an instruction directly after the
    282     // instruction.
    283     BasicBlock *BB = VOp->getParent();
    284     return Scatterer(BB, std::next(BasicBlock::iterator(VOp)),
    285                      V, &Scattered[V]);
    286   }
    287   // In the fallback case, just put the scattered before Point and
    288   // keep the result local to Point.
    289   return Scatterer(Point->getParent(), Point->getIterator(), V);
    290 }
    291 
    292 // Replace Op with the gathered form of the components in CV.  Defer the
    293 // deletion of Op and creation of the gathered form to the end of the pass,
    294 // so that we can avoid creating the gathered form if all uses of Op are
    295 // replaced with uses of CV.
    296 void Scalarizer::gather(Instruction *Op, const ValueVector &CV) {
    297   // Since we're not deleting Op yet, stub out its operands, so that it
    298   // doesn't make anything live unnecessarily.
    299   for (unsigned I = 0, E = Op->getNumOperands(); I != E; ++I)
    300     Op->setOperand(I, UndefValue::get(Op->getOperand(I)->getType()));
    301 
    302   transferMetadata(Op, CV);
    303 
    304   // If we already have a scattered form of Op (created from ExtractElements
    305   // of Op itself), replace them with the new form.
    306   ValueVector &SV = Scattered[Op];
    307   if (!SV.empty()) {
    308     for (unsigned I = 0, E = SV.size(); I != E; ++I) {
    309       Value *V = SV[I];
    310       if (V == nullptr)
    311         continue;
    312 
    313       Instruction *Old = cast<Instruction>(V);
    314       CV[I]->takeName(Old);
    315       Old->replaceAllUsesWith(CV[I]);
    316       Old->eraseFromParent();
    317     }
    318   }
    319   SV = CV;
    320   Gathered.push_back(GatherList::value_type(Op, &SV));
    321 }
    322 
    323 // Return true if it is safe to transfer the given metadata tag from
    324 // vector to scalar instructions.
    325 bool Scalarizer::canTransferMetadata(unsigned Tag) {
    326   return (Tag == LLVMContext::MD_tbaa
    327           || Tag == LLVMContext::MD_fpmath
    328           || Tag == LLVMContext::MD_tbaa_struct
    329           || Tag == LLVMContext::MD_invariant_load
    330           || Tag == LLVMContext::MD_alias_scope
    331           || Tag == LLVMContext::MD_noalias
    332           || Tag == ParallelLoopAccessMDKind);
    333 }
    334 
    335 // Transfer metadata from Op to the instructions in CV if it is known
    336 // to be safe to do so.
    337 void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
    338   SmallVector<std::pair<unsigned, MDNode *>, 4> MDs;
    339   Op->getAllMetadataOtherThanDebugLoc(MDs);
    340   for (unsigned I = 0, E = CV.size(); I != E; ++I) {
    341     if (Instruction *New = dyn_cast<Instruction>(CV[I])) {
    342       for (const auto &MD : MDs)
    343         if (canTransferMetadata(MD.first))
    344           New->setMetadata(MD.first, MD.second);
    345       if (Op->getDebugLoc() && !New->getDebugLoc())
    346         New->setDebugLoc(Op->getDebugLoc());
    347     }
    348   }
    349 }
    350 
    351 // Try to fill in Layout from Ty, returning true on success.  Alignment is
    352 // the alignment of the vector, or 0 if the ABI default should be used.
    353 bool Scalarizer::getVectorLayout(Type *Ty, unsigned Alignment,
    354                                  VectorLayout &Layout, const DataLayout &DL) {
    355   // Make sure we're dealing with a vector.
    356   Layout.VecTy = dyn_cast<VectorType>(Ty);
    357   if (!Layout.VecTy)
    358     return false;
    359 
    360   // Check that we're dealing with full-byte elements.
    361   Layout.ElemTy = Layout.VecTy->getElementType();
    362   if (DL.getTypeSizeInBits(Layout.ElemTy) !=
    363       DL.getTypeStoreSizeInBits(Layout.ElemTy))
    364     return false;
    365 
    366   if (Alignment)
    367     Layout.VecAlign = Alignment;
    368   else
    369     Layout.VecAlign = DL.getABITypeAlignment(Layout.VecTy);
    370   Layout.ElemSize = DL.getTypeStoreSize(Layout.ElemTy);
    371   return true;
    372 }
    373 
    374 // Scalarize two-operand instruction I, using Split(Builder, X, Y, Name)
    375 // to create an instruction like I with operands X and Y and name Name.
    376 template<typename Splitter>
    377 bool Scalarizer::splitBinary(Instruction &I, const Splitter &Split) {
    378   VectorType *VT = dyn_cast<VectorType>(I.getType());
    379   if (!VT)
    380     return false;
    381 
    382   unsigned NumElems = VT->getNumElements();
    383   IRBuilder<> Builder(&I);
    384   Scatterer Op0 = scatter(&I, I.getOperand(0));
    385   Scatterer Op1 = scatter(&I, I.getOperand(1));
    386   assert(Op0.size() == NumElems && "Mismatched binary operation");
    387   assert(Op1.size() == NumElems && "Mismatched binary operation");
    388   ValueVector Res;
    389   Res.resize(NumElems);
    390   for (unsigned Elem = 0; Elem < NumElems; ++Elem)
    391     Res[Elem] = Split(Builder, Op0[Elem], Op1[Elem],
    392                       I.getName() + ".i" + Twine(Elem));
    393   gather(&I, Res);
    394   return true;
    395 }
    396 
    397 bool Scalarizer::visitSelectInst(SelectInst &SI) {
    398   VectorType *VT = dyn_cast<VectorType>(SI.getType());
    399   if (!VT)
    400     return false;
    401 
    402   unsigned NumElems = VT->getNumElements();
    403   IRBuilder<> Builder(&SI);
    404   Scatterer Op1 = scatter(&SI, SI.getOperand(1));
    405   Scatterer Op2 = scatter(&SI, SI.getOperand(2));
    406   assert(Op1.size() == NumElems && "Mismatched select");
    407   assert(Op2.size() == NumElems && "Mismatched select");
    408   ValueVector Res;
    409   Res.resize(NumElems);
    410 
    411   if (SI.getOperand(0)->getType()->isVectorTy()) {
    412     Scatterer Op0 = scatter(&SI, SI.getOperand(0));
    413     assert(Op0.size() == NumElems && "Mismatched select");
    414     for (unsigned I = 0; I < NumElems; ++I)
    415       Res[I] = Builder.CreateSelect(Op0[I], Op1[I], Op2[I],
    416                                     SI.getName() + ".i" + Twine(I));
    417   } else {
    418     Value *Op0 = SI.getOperand(0);
    419     for (unsigned I = 0; I < NumElems; ++I)
    420       Res[I] = Builder.CreateSelect(Op0, Op1[I], Op2[I],
    421                                     SI.getName() + ".i" + Twine(I));
    422   }
    423   gather(&SI, Res);
    424   return true;
    425 }
    426 
    427 bool Scalarizer::visitICmpInst(ICmpInst &ICI) {
    428   return splitBinary(ICI, ICmpSplitter(ICI));
    429 }
    430 
    431 bool Scalarizer::visitFCmpInst(FCmpInst &FCI) {
    432   return splitBinary(FCI, FCmpSplitter(FCI));
    433 }
    434 
    435 bool Scalarizer::visitBinaryOperator(BinaryOperator &BO) {
    436   return splitBinary(BO, BinarySplitter(BO));
    437 }
    438 
    439 bool Scalarizer::visitGetElementPtrInst(GetElementPtrInst &GEPI) {
    440   VectorType *VT = dyn_cast<VectorType>(GEPI.getType());
    441   if (!VT)
    442     return false;
    443 
    444   IRBuilder<> Builder(&GEPI);
    445   unsigned NumElems = VT->getNumElements();
    446   unsigned NumIndices = GEPI.getNumIndices();
    447 
    448   Scatterer Base = scatter(&GEPI, GEPI.getOperand(0));
    449 
    450   SmallVector<Scatterer, 8> Ops;
    451   Ops.resize(NumIndices);
    452   for (unsigned I = 0; I < NumIndices; ++I)
    453     Ops[I] = scatter(&GEPI, GEPI.getOperand(I + 1));
    454 
    455   ValueVector Res;
    456   Res.resize(NumElems);
    457   for (unsigned I = 0; I < NumElems; ++I) {
    458     SmallVector<Value *, 8> Indices;
    459     Indices.resize(NumIndices);
    460     for (unsigned J = 0; J < NumIndices; ++J)
    461       Indices[J] = Ops[J][I];
    462     Res[I] = Builder.CreateGEP(GEPI.getSourceElementType(), Base[I], Indices,
    463                                GEPI.getName() + ".i" + Twine(I));
    464     if (GEPI.isInBounds())
    465       if (GetElementPtrInst *NewGEPI = dyn_cast<GetElementPtrInst>(Res[I]))
    466         NewGEPI->setIsInBounds();
    467   }
    468   gather(&GEPI, Res);
    469   return true;
    470 }
    471 
    472 bool Scalarizer::visitCastInst(CastInst &CI) {
    473   VectorType *VT = dyn_cast<VectorType>(CI.getDestTy());
    474   if (!VT)
    475     return false;
    476 
    477   unsigned NumElems = VT->getNumElements();
    478   IRBuilder<> Builder(&CI);
    479   Scatterer Op0 = scatter(&CI, CI.getOperand(0));
    480   assert(Op0.size() == NumElems && "Mismatched cast");
    481   ValueVector Res;
    482   Res.resize(NumElems);
    483   for (unsigned I = 0; I < NumElems; ++I)
    484     Res[I] = Builder.CreateCast(CI.getOpcode(), Op0[I], VT->getElementType(),
    485                                 CI.getName() + ".i" + Twine(I));
    486   gather(&CI, Res);
    487   return true;
    488 }
    489 
    490 bool Scalarizer::visitBitCastInst(BitCastInst &BCI) {
    491   VectorType *DstVT = dyn_cast<VectorType>(BCI.getDestTy());
    492   VectorType *SrcVT = dyn_cast<VectorType>(BCI.getSrcTy());
    493   if (!DstVT || !SrcVT)
    494     return false;
    495 
    496   unsigned DstNumElems = DstVT->getNumElements();
    497   unsigned SrcNumElems = SrcVT->getNumElements();
    498   IRBuilder<> Builder(&BCI);
    499   Scatterer Op0 = scatter(&BCI, BCI.getOperand(0));
    500   ValueVector Res;
    501   Res.resize(DstNumElems);
    502 
    503   if (DstNumElems == SrcNumElems) {
    504     for (unsigned I = 0; I < DstNumElems; ++I)
    505       Res[I] = Builder.CreateBitCast(Op0[I], DstVT->getElementType(),
    506                                      BCI.getName() + ".i" + Twine(I));
    507   } else if (DstNumElems > SrcNumElems) {
    508     // <M x t1> -> <N*M x t2>.  Convert each t1 to <N x t2> and copy the
    509     // individual elements to the destination.
    510     unsigned FanOut = DstNumElems / SrcNumElems;
    511     Type *MidTy = VectorType::get(DstVT->getElementType(), FanOut);
    512     unsigned ResI = 0;
    513     for (unsigned Op0I = 0; Op0I < SrcNumElems; ++Op0I) {
    514       Value *V = Op0[Op0I];
    515       Instruction *VI;
    516       // Look through any existing bitcasts before converting to <N x t2>.
    517       // In the best case, the resulting conversion might be a no-op.
    518       while ((VI = dyn_cast<Instruction>(V)) &&
    519              VI->getOpcode() == Instruction::BitCast)
    520         V = VI->getOperand(0);
    521       V = Builder.CreateBitCast(V, MidTy, V->getName() + ".cast");
    522       Scatterer Mid = scatter(&BCI, V);
    523       for (unsigned MidI = 0; MidI < FanOut; ++MidI)
    524         Res[ResI++] = Mid[MidI];
    525     }
    526   } else {
    527     // <N*M x t1> -> <M x t2>.  Convert each group of <N x t1> into a t2.
    528     unsigned FanIn = SrcNumElems / DstNumElems;
    529     Type *MidTy = VectorType::get(SrcVT->getElementType(), FanIn);
    530     unsigned Op0I = 0;
    531     for (unsigned ResI = 0; ResI < DstNumElems; ++ResI) {
    532       Value *V = UndefValue::get(MidTy);
    533       for (unsigned MidI = 0; MidI < FanIn; ++MidI)
    534         V = Builder.CreateInsertElement(V, Op0[Op0I++], Builder.getInt32(MidI),
    535                                         BCI.getName() + ".i" + Twine(ResI)
    536                                         + ".upto" + Twine(MidI));
    537       Res[ResI] = Builder.CreateBitCast(V, DstVT->getElementType(),
    538                                         BCI.getName() + ".i" + Twine(ResI));
    539     }
    540   }
    541   gather(&BCI, Res);
    542   return true;
    543 }
    544 
    545 bool Scalarizer::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
    546   VectorType *VT = dyn_cast<VectorType>(SVI.getType());
    547   if (!VT)
    548     return false;
    549 
    550   unsigned NumElems = VT->getNumElements();
    551   Scatterer Op0 = scatter(&SVI, SVI.getOperand(0));
    552   Scatterer Op1 = scatter(&SVI, SVI.getOperand(1));
    553   ValueVector Res;
    554   Res.resize(NumElems);
    555 
    556   for (unsigned I = 0; I < NumElems; ++I) {
    557     int Selector = SVI.getMaskValue(I);
    558     if (Selector < 0)
    559       Res[I] = UndefValue::get(VT->getElementType());
    560     else if (unsigned(Selector) < Op0.size())
    561       Res[I] = Op0[Selector];
    562     else
    563       Res[I] = Op1[Selector - Op0.size()];
    564   }
    565   gather(&SVI, Res);
    566   return true;
    567 }
    568 
    569 bool Scalarizer::visitPHINode(PHINode &PHI) {
    570   VectorType *VT = dyn_cast<VectorType>(PHI.getType());
    571   if (!VT)
    572     return false;
    573 
    574   unsigned NumElems = VT->getNumElements();
    575   IRBuilder<> Builder(&PHI);
    576   ValueVector Res;
    577   Res.resize(NumElems);
    578 
    579   unsigned NumOps = PHI.getNumOperands();
    580   for (unsigned I = 0; I < NumElems; ++I)
    581     Res[I] = Builder.CreatePHI(VT->getElementType(), NumOps,
    582                                PHI.getName() + ".i" + Twine(I));
    583 
    584   for (unsigned I = 0; I < NumOps; ++I) {
    585     Scatterer Op = scatter(&PHI, PHI.getIncomingValue(I));
    586     BasicBlock *IncomingBlock = PHI.getIncomingBlock(I);
    587     for (unsigned J = 0; J < NumElems; ++J)
    588       cast<PHINode>(Res[J])->addIncoming(Op[J], IncomingBlock);
    589   }
    590   gather(&PHI, Res);
    591   return true;
    592 }
    593 
    594 bool Scalarizer::visitLoadInst(LoadInst &LI) {
    595   if (!ScalarizeLoadStore)
    596     return false;
    597   if (!LI.isSimple())
    598     return false;
    599 
    600   VectorLayout Layout;
    601   if (!getVectorLayout(LI.getType(), LI.getAlignment(), Layout,
    602                        LI.getModule()->getDataLayout()))
    603     return false;
    604 
    605   unsigned NumElems = Layout.VecTy->getNumElements();
    606   IRBuilder<> Builder(&LI);
    607   Scatterer Ptr = scatter(&LI, LI.getPointerOperand());
    608   ValueVector Res;
    609   Res.resize(NumElems);
    610 
    611   for (unsigned I = 0; I < NumElems; ++I)
    612     Res[I] = Builder.CreateAlignedLoad(Ptr[I], Layout.getElemAlign(I),
    613                                        LI.getName() + ".i" + Twine(I));
    614   gather(&LI, Res);
    615   return true;
    616 }
    617 
    618 bool Scalarizer::visitStoreInst(StoreInst &SI) {
    619   if (!ScalarizeLoadStore)
    620     return false;
    621   if (!SI.isSimple())
    622     return false;
    623 
    624   VectorLayout Layout;
    625   Value *FullValue = SI.getValueOperand();
    626   if (!getVectorLayout(FullValue->getType(), SI.getAlignment(), Layout,
    627                        SI.getModule()->getDataLayout()))
    628     return false;
    629 
    630   unsigned NumElems = Layout.VecTy->getNumElements();
    631   IRBuilder<> Builder(&SI);
    632   Scatterer Ptr = scatter(&SI, SI.getPointerOperand());
    633   Scatterer Val = scatter(&SI, FullValue);
    634 
    635   ValueVector Stores;
    636   Stores.resize(NumElems);
    637   for (unsigned I = 0; I < NumElems; ++I) {
    638     unsigned Align = Layout.getElemAlign(I);
    639     Stores[I] = Builder.CreateAlignedStore(Val[I], Ptr[I], Align);
    640   }
    641   transferMetadata(&SI, Stores);
    642   return true;
    643 }
    644 
    645 // Delete the instructions that we scalarized.  If a full vector result
    646 // is still needed, recreate it using InsertElements.
    647 bool Scalarizer::finish() {
    648   // The presence of data in Gathered or Scattered indicates changes
    649   // made to the Function.
    650   if (Gathered.empty() && Scattered.empty())
    651     return false;
    652   for (const auto &GMI : Gathered) {
    653     Instruction *Op = GMI.first;
    654     ValueVector &CV = *GMI.second;
    655     if (!Op->use_empty()) {
    656       // The value is still needed, so recreate it using a series of
    657       // InsertElements.
    658       Type *Ty = Op->getType();
    659       Value *Res = UndefValue::get(Ty);
    660       BasicBlock *BB = Op->getParent();
    661       unsigned Count = Ty->getVectorNumElements();
    662       IRBuilder<> Builder(Op);
    663       if (isa<PHINode>(Op))
    664         Builder.SetInsertPoint(BB, BB->getFirstInsertionPt());
    665       for (unsigned I = 0; I < Count; ++I)
    666         Res = Builder.CreateInsertElement(Res, CV[I], Builder.getInt32(I),
    667                                           Op->getName() + ".upto" + Twine(I));
    668       Res->takeName(Op);
    669       Op->replaceAllUsesWith(Res);
    670     }
    671     Op->eraseFromParent();
    672   }
    673   Gathered.clear();
    674   Scattered.clear();
    675   return true;
    676 }
    677 
    678 FunctionPass *llvm::createScalarizerPass() {
    679   return new Scalarizer();
    680 }
    681