Home | History | Annotate | Download | only in CodeGen
      1 //=----------------------- InterleavedAccessPass.cpp -----------------------==//
      2 //
      3 // The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the Interleaved Access pass, which identifies
     11 // interleaved memory accesses and transforms into target specific intrinsics.
     12 //
     13 // An interleaved load reads data from memory into several vectors, with
     14 // DE-interleaving the data on a factor. An interleaved store writes several
     15 // vectors to memory with RE-interleaving the data on a factor.
     16 //
     17 // As interleaved accesses are hard to be identified in CodeGen (mainly because
     18 // the VECTOR_SHUFFLE DAG node is quite different from the shufflevector IR),
     19 // we identify and transform them to intrinsics in this pass. So the intrinsics
     20 // can be easily matched into target specific instructions later in CodeGen.
     21 //
     22 // E.g. An interleaved load (Factor = 2):
     23 //        %wide.vec = load <8 x i32>, <8 x i32>* %ptr
     24 //        %v0 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <0, 2, 4, 6>
     25 //        %v1 = shuffle <8 x i32> %wide.vec, <8 x i32> undef, <1, 3, 5, 7>
     26 //
     27 // It could be transformed into a ld2 intrinsic in AArch64 backend or a vld2
     28 // intrinsic in ARM backend.
     29 //
     30 // E.g. An interleaved store (Factor = 3):
     31 //        %i.vec = shuffle <8 x i32> %v0, <8 x i32> %v1,
     32 //                                    <0, 4, 8, 1, 5, 9, 2, 6, 10, 3, 7, 11>
     33 //        store <12 x i32> %i.vec, <12 x i32>* %ptr
     34 //
     35 // It could be transformed into a st3 intrinsic in AArch64 backend or a vst3
     36 // intrinsic in ARM backend.
     37 //
     38 //===----------------------------------------------------------------------===//
     39 
     40 #include "llvm/CodeGen/Passes.h"
     41 #include "llvm/IR/InstIterator.h"
     42 #include "llvm/Support/Debug.h"
     43 #include "llvm/Support/MathExtras.h"
     44 #include "llvm/Support/raw_ostream.h"
     45 #include "llvm/Target/TargetLowering.h"
     46 #include "llvm/Target/TargetSubtargetInfo.h"
     47 
     48 using namespace llvm;
     49 
     50 #define DEBUG_TYPE "interleaved-access"
     51 
     52 static cl::opt<bool> LowerInterleavedAccesses(
     53     "lower-interleaved-accesses",
     54     cl::desc("Enable lowering interleaved accesses to intrinsics"),
     55     cl::init(true), cl::Hidden);
     56 
     57 static unsigned MaxFactor; // The maximum supported interleave factor.
     58 
     59 namespace llvm {
     60 static void initializeInterleavedAccessPass(PassRegistry &);
     61 }
     62 
     63 namespace {
     64 
     65 class InterleavedAccess : public FunctionPass {
     66 
     67 public:
     68   static char ID;
     69   InterleavedAccess(const TargetMachine *TM = nullptr)
     70       : FunctionPass(ID), TM(TM), TLI(nullptr) {
     71     initializeInterleavedAccessPass(*PassRegistry::getPassRegistry());
     72   }
     73 
     74   const char *getPassName() const override { return "Interleaved Access Pass"; }
     75 
     76   bool runOnFunction(Function &F) override;
     77 
     78 private:
     79   const TargetMachine *TM;
     80   const TargetLowering *TLI;
     81 
     82   /// \brief Transform an interleaved load into target specific intrinsics.
     83   bool lowerInterleavedLoad(LoadInst *LI,
     84                             SmallVector<Instruction *, 32> &DeadInsts);
     85 
     86   /// \brief Transform an interleaved store into target specific intrinsics.
     87   bool lowerInterleavedStore(StoreInst *SI,
     88                              SmallVector<Instruction *, 32> &DeadInsts);
     89 };
     90 } // end anonymous namespace.
     91 
     92 char InterleavedAccess::ID = 0;
     93 INITIALIZE_TM_PASS(InterleavedAccess, "interleaved-access",
     94     "Lower interleaved memory accesses to target specific intrinsics",
     95     false, false)
     96 
     97 FunctionPass *llvm::createInterleavedAccessPass(const TargetMachine *TM) {
     98   return new InterleavedAccess(TM);
     99 }
    100 
    101 /// \brief Check if the mask is a DE-interleave mask of the given factor
    102 /// \p Factor like:
    103 ///     <Index, Index+Factor, ..., Index+(NumElts-1)*Factor>
    104 static bool isDeInterleaveMaskOfFactor(ArrayRef<int> Mask, unsigned Factor,
    105                                        unsigned &Index) {
    106   // Check all potential start indices from 0 to (Factor - 1).
    107   for (Index = 0; Index < Factor; Index++) {
    108     unsigned i = 0;
    109 
    110     // Check that elements are in ascending order by Factor. Ignore undef
    111     // elements.
    112     for (; i < Mask.size(); i++)
    113       if (Mask[i] >= 0 && static_cast<unsigned>(Mask[i]) != Index + i * Factor)
    114         break;
    115 
    116     if (i == Mask.size())
    117       return true;
    118   }
    119 
    120   return false;
    121 }
    122 
    123 /// \brief Check if the mask is a DE-interleave mask for an interleaved load.
    124 ///
    125 /// E.g. DE-interleave masks (Factor = 2) could be:
    126 ///     <0, 2, 4, 6>    (mask of index 0 to extract even elements)
    127 ///     <1, 3, 5, 7>    (mask of index 1 to extract odd elements)
    128 static bool isDeInterleaveMask(ArrayRef<int> Mask, unsigned &Factor,
    129                                unsigned &Index) {
    130   if (Mask.size() < 2)
    131     return false;
    132 
    133   // Check potential Factors.
    134   for (Factor = 2; Factor <= MaxFactor; Factor++)
    135     if (isDeInterleaveMaskOfFactor(Mask, Factor, Index))
    136       return true;
    137 
    138   return false;
    139 }
    140 
    141 /// \brief Check if the mask is RE-interleave mask for an interleaved store.
    142 ///
    143 /// I.e. <0, NumSubElts, ... , NumSubElts*(Factor - 1), 1, NumSubElts + 1, ...>
    144 ///
    145 /// E.g. The RE-interleave mask (Factor = 2) could be:
    146 ///     <0, 4, 1, 5, 2, 6, 3, 7>
    147 static bool isReInterleaveMask(ArrayRef<int> Mask, unsigned &Factor) {
    148   unsigned NumElts = Mask.size();
    149   if (NumElts < 4)
    150     return false;
    151 
    152   // Check potential Factors.
    153   for (Factor = 2; Factor <= MaxFactor; Factor++) {
    154     if (NumElts % Factor)
    155       continue;
    156 
    157     unsigned NumSubElts = NumElts / Factor;
    158     if (!isPowerOf2_32(NumSubElts))
    159       continue;
    160 
    161     // Check whether each element matchs the RE-interleaved rule. Ignore undef
    162     // elements.
    163     unsigned i = 0;
    164     for (; i < NumElts; i++)
    165       if (Mask[i] >= 0 &&
    166           static_cast<unsigned>(Mask[i]) !=
    167               (i % Factor) * NumSubElts + i / Factor)
    168         break;
    169 
    170     // Find a RE-interleaved mask of current factor.
    171     if (i == NumElts)
    172       return true;
    173   }
    174 
    175   return false;
    176 }
    177 
    178 bool InterleavedAccess::lowerInterleavedLoad(
    179     LoadInst *LI, SmallVector<Instruction *, 32> &DeadInsts) {
    180   if (!LI->isSimple())
    181     return false;
    182 
    183   SmallVector<ShuffleVectorInst *, 4> Shuffles;
    184 
    185   // Check if all users of this load are shufflevectors.
    186   for (auto UI = LI->user_begin(), E = LI->user_end(); UI != E; UI++) {
    187     ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(*UI);
    188     if (!SVI || !isa<UndefValue>(SVI->getOperand(1)))
    189       return false;
    190 
    191     Shuffles.push_back(SVI);
    192   }
    193 
    194   if (Shuffles.empty())
    195     return false;
    196 
    197   unsigned Factor, Index;
    198 
    199   // Check if the first shufflevector is DE-interleave shuffle.
    200   if (!isDeInterleaveMask(Shuffles[0]->getShuffleMask(), Factor, Index))
    201     return false;
    202 
    203   // Holds the corresponding index for each DE-interleave shuffle.
    204   SmallVector<unsigned, 4> Indices;
    205   Indices.push_back(Index);
    206 
    207   Type *VecTy = Shuffles[0]->getType();
    208 
    209   // Check if other shufflevectors are also DE-interleaved of the same type
    210   // and factor as the first shufflevector.
    211   for (unsigned i = 1; i < Shuffles.size(); i++) {
    212     if (Shuffles[i]->getType() != VecTy)
    213       return false;
    214 
    215     if (!isDeInterleaveMaskOfFactor(Shuffles[i]->getShuffleMask(), Factor,
    216                                     Index))
    217       return false;
    218 
    219     Indices.push_back(Index);
    220   }
    221 
    222   DEBUG(dbgs() << "IA: Found an interleaved load: " << *LI << "\n");
    223 
    224   // Try to create target specific intrinsics to replace the load and shuffles.
    225   if (!TLI->lowerInterleavedLoad(LI, Shuffles, Indices, Factor))
    226     return false;
    227 
    228   for (auto SVI : Shuffles)
    229     DeadInsts.push_back(SVI);
    230 
    231   DeadInsts.push_back(LI);
    232   return true;
    233 }
    234 
    235 bool InterleavedAccess::lowerInterleavedStore(
    236     StoreInst *SI, SmallVector<Instruction *, 32> &DeadInsts) {
    237   if (!SI->isSimple())
    238     return false;
    239 
    240   ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(SI->getValueOperand());
    241   if (!SVI || !SVI->hasOneUse())
    242     return false;
    243 
    244   // Check if the shufflevector is RE-interleave shuffle.
    245   unsigned Factor;
    246   if (!isReInterleaveMask(SVI->getShuffleMask(), Factor))
    247     return false;
    248 
    249   DEBUG(dbgs() << "IA: Found an interleaved store: " << *SI << "\n");
    250 
    251   // Try to create target specific intrinsics to replace the store and shuffle.
    252   if (!TLI->lowerInterleavedStore(SI, SVI, Factor))
    253     return false;
    254 
    255   // Already have a new target specific interleaved store. Erase the old store.
    256   DeadInsts.push_back(SI);
    257   DeadInsts.push_back(SVI);
    258   return true;
    259 }
    260 
    261 bool InterleavedAccess::runOnFunction(Function &F) {
    262   if (!TM || !LowerInterleavedAccesses)
    263     return false;
    264 
    265   DEBUG(dbgs() << "*** " << getPassName() << ": " << F.getName() << "\n");
    266 
    267   TLI = TM->getSubtargetImpl(F)->getTargetLowering();
    268   MaxFactor = TLI->getMaxSupportedInterleaveFactor();
    269 
    270   // Holds dead instructions that will be erased later.
    271   SmallVector<Instruction *, 32> DeadInsts;
    272   bool Changed = false;
    273 
    274   for (auto &I : instructions(F)) {
    275     if (LoadInst *LI = dyn_cast<LoadInst>(&I))
    276       Changed |= lowerInterleavedLoad(LI, DeadInsts);
    277 
    278     if (StoreInst *SI = dyn_cast<StoreInst>(&I))
    279       Changed |= lowerInterleavedStore(SI, DeadInsts);
    280   }
    281 
    282   for (auto I : DeadInsts)
    283     I->eraseFromParent();
    284 
    285   return Changed;
    286 }
    287