Home | History | Annotate | Download | only in X86
      1 //===-- X86ShuffleDecodeConstantPool.cpp - X86 shuffle decode -------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Define several functions to decode x86 specific shuffle semantics using
     11 // constants from the constant pool.
     12 //
     13 //===----------------------------------------------------------------------===//
     14 
     15 #include "X86ShuffleDecodeConstantPool.h"
     16 #include "Utils/X86ShuffleDecode.h"
     17 #include "llvm/CodeGen/MachineValueType.h"
     18 #include "llvm/IR/Constants.h"
     19 
     20 //===----------------------------------------------------------------------===//
     21 //  Vector Mask Decoding
     22 //===----------------------------------------------------------------------===//
     23 
     24 namespace llvm {
     25 
     26 void DecodePSHUFBMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
     27   Type *MaskTy = C->getType();
     28   // It is not an error for the PSHUFB mask to not be a vector of i8 because the
     29   // constant pool uniques constants by their bit representation.
     30   // e.g. the following take up the same space in the constant pool:
     31   //   i128 -170141183420855150465331762880109871104
     32   //
     33   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
     34   //
     35   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
     36   //              i32 -2147483648, i32 -2147483648>
     37 
     38 #ifndef NDEBUG
     39   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
     40   assert(MaskTySize == 128 || MaskTySize == 256 || MaskTySize == 512);
     41 #endif
     42 
     43   if (!MaskTy->isVectorTy())
     44     return;
     45   int NumElts = MaskTy->getVectorNumElements();
     46 
     47   Type *EltTy = MaskTy->getVectorElementType();
     48   if (!EltTy->isIntegerTy())
     49     return;
     50 
     51   // The shuffle mask requires a byte vector - decode cases with
     52   // wider elements as well.
     53   unsigned BitWidth = cast<IntegerType>(EltTy)->getBitWidth();
     54   if ((BitWidth % 8) != 0)
     55     return;
     56 
     57   int Scale = BitWidth / 8;
     58   int NumBytes = NumElts * Scale;
     59   ShuffleMask.reserve(NumBytes);
     60 
     61   for (int i = 0; i != NumElts; ++i) {
     62     Constant *COp = C->getAggregateElement(i);
     63     if (!COp) {
     64       ShuffleMask.clear();
     65       return;
     66     } else if (isa<UndefValue>(COp)) {
     67       ShuffleMask.append(Scale, SM_SentinelUndef);
     68       continue;
     69     }
     70 
     71     APInt APElt = cast<ConstantInt>(COp)->getValue();
     72     for (int j = 0; j != Scale; ++j) {
     73       // For AVX vectors with 32 bytes the base of the shuffle is the 16-byte
     74       // lane of the vector we're inside.
     75       int Base = ((i * Scale) + j) & ~0xf;
     76 
     77       uint64_t Element = APElt.getLoBits(8).getZExtValue();
     78       APElt = APElt.lshr(8);
     79 
     80       // If the high bit (7) of the byte is set, the element is zeroed.
     81       if (Element & (1 << 7))
     82         ShuffleMask.push_back(SM_SentinelZero);
     83       else {
     84         // Only the least significant 4 bits of the byte are used.
     85         int Index = Base + (Element & 0xf);
     86         ShuffleMask.push_back(Index);
     87       }
     88     }
     89   }
     90 
     91   assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
     92 }
     93 
     94 void DecodeVPERMILPMask(const Constant *C, unsigned ElSize,
     95                         SmallVectorImpl<int> &ShuffleMask) {
     96   Type *MaskTy = C->getType();
     97   // It is not an error for the PSHUFB mask to not be a vector of i8 because the
     98   // constant pool uniques constants by their bit representation.
     99   // e.g. the following take up the same space in the constant pool:
    100   //   i128 -170141183420855150465331762880109871104
    101   //
    102   //   <2 x i64> <i64 -9223372034707292160, i64 -9223372034707292160>
    103   //
    104   //   <4 x i32> <i32 -2147483648, i32 -2147483648,
    105   //              i32 -2147483648, i32 -2147483648>
    106 
    107   if (ElSize != 32 && ElSize != 64)
    108     return;
    109 
    110   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
    111   if (MaskTySize != 128 && MaskTySize != 256 && MaskTySize != 512)
    112     return;
    113 
    114   // Only support vector types.
    115   if (!MaskTy->isVectorTy())
    116     return;
    117 
    118   // Make sure its an integer type.
    119   Type *VecEltTy = MaskTy->getVectorElementType();
    120   if (!VecEltTy->isIntegerTy())
    121     return;
    122 
    123   // Support any element type from byte up to element size.
    124   // This is necessary primarily because 64-bit elements get split to 32-bit
    125   // in the constant pool on 32-bit target.
    126   unsigned EltTySize = VecEltTy->getIntegerBitWidth();
    127   if (EltTySize < 8 || EltTySize > ElSize)
    128     return;
    129 
    130   unsigned NumElements = MaskTySize / ElSize;
    131   assert((NumElements == 2 || NumElements == 4 || NumElements == 8 ||
    132           NumElements == 16) &&
    133          "Unexpected number of vector elements.");
    134   ShuffleMask.reserve(NumElements);
    135   unsigned NumElementsPerLane = 128 / ElSize;
    136   unsigned Factor = ElSize / EltTySize;
    137 
    138   for (unsigned i = 0; i < NumElements; ++i) {
    139     Constant *COp = C->getAggregateElement(i * Factor);
    140     if (!COp) {
    141       ShuffleMask.clear();
    142       return;
    143     } else if (isa<UndefValue>(COp)) {
    144       ShuffleMask.push_back(SM_SentinelUndef);
    145       continue;
    146     }
    147     int Index = i & ~(NumElementsPerLane - 1);
    148     uint64_t Element = cast<ConstantInt>(COp)->getZExtValue();
    149     if (ElSize == 64)
    150       Index += (Element >> 1) & 0x1;
    151     else
    152       Index += Element & 0x3;
    153     ShuffleMask.push_back(Index);
    154   }
    155 
    156   // TODO: Handle funny-looking vectors too.
    157 }
    158 
    159 void DecodeVPERMIL2PMask(const Constant *C, unsigned M2Z, unsigned ElSize,
    160                          SmallVectorImpl<int> &ShuffleMask) {
    161   Type *MaskTy = C->getType();
    162 
    163   unsigned MaskTySize = MaskTy->getPrimitiveSizeInBits();
    164   if (MaskTySize != 128 && MaskTySize != 256)
    165     return;
    166 
    167   // Only support vector types.
    168   if (!MaskTy->isVectorTy())
    169     return;
    170 
    171   // Make sure its an integer type.
    172   Type *VecEltTy = MaskTy->getVectorElementType();
    173   if (!VecEltTy->isIntegerTy())
    174     return;
    175 
    176   // Support any element type from byte up to element size.
    177   // This is necessary primarily because 64-bit elements get split to 32-bit
    178   // in the constant pool on 32-bit target.
    179   unsigned EltTySize = VecEltTy->getIntegerBitWidth();
    180   if (EltTySize < 8 || EltTySize > ElSize)
    181     return;
    182 
    183   unsigned NumElements = MaskTySize / ElSize;
    184   assert((NumElements == 2 || NumElements == 4 || NumElements == 8) &&
    185          "Unexpected number of vector elements.");
    186   ShuffleMask.reserve(NumElements);
    187   unsigned NumElementsPerLane = 128 / ElSize;
    188   unsigned Factor = ElSize / EltTySize;
    189 
    190   for (unsigned i = 0; i < NumElements; ++i) {
    191     Constant *COp = C->getAggregateElement(i * Factor);
    192     if (!COp) {
    193       ShuffleMask.clear();
    194       return;
    195     } else if (isa<UndefValue>(COp)) {
    196       ShuffleMask.push_back(SM_SentinelUndef);
    197       continue;
    198     }
    199 
    200     // VPERMIL2 Operation.
    201     // Bits[3] - Match Bit.
    202     // Bits[2:1] - (Per Lane) PD Shuffle Mask.
    203     // Bits[2:0] - (Per Lane) PS Shuffle Mask.
    204     uint64_t Selector = cast<ConstantInt>(COp)->getZExtValue();
    205     unsigned MatchBit = (Selector >> 3) & 0x1;
    206 
    207     // M2Z[0:1]     MatchBit
    208     //   0Xb           X        Source selected by Selector index.
    209     //   10b           0        Source selected by Selector index.
    210     //   10b           1        Zero.
    211     //   11b           0        Zero.
    212     //   11b           1        Source selected by Selector index.
    213     if ((M2Z & 0x2) != 0u && MatchBit != (M2Z & 0x1)) {
    214       ShuffleMask.push_back(SM_SentinelZero);
    215       continue;
    216     }
    217 
    218     int Index = i & ~(NumElementsPerLane - 1);
    219     if (ElSize == 64)
    220       Index += (Selector >> 1) & 0x1;
    221     else
    222       Index += Selector & 0x3;
    223 
    224     int Src = (Selector >> 2) & 0x1;
    225     Index += Src * NumElements;
    226     ShuffleMask.push_back(Index);
    227   }
    228 
    229   // TODO: Handle funny-looking vectors too.
    230 }
    231 
    232 void DecodeVPPERMMask(const Constant *C, SmallVectorImpl<int> &ShuffleMask) {
    233   Type *MaskTy = C->getType();
    234   assert(MaskTy->getPrimitiveSizeInBits() == 128);
    235 
    236   // Only support vector types.
    237   if (!MaskTy->isVectorTy())
    238     return;
    239 
    240   // Make sure its an integer type.
    241   Type *VecEltTy = MaskTy->getVectorElementType();
    242   if (!VecEltTy->isIntegerTy())
    243     return;
    244 
    245   // The shuffle mask requires a byte vector - decode cases with
    246   // wider elements as well.
    247   unsigned BitWidth = cast<IntegerType>(VecEltTy)->getBitWidth();
    248   if ((BitWidth % 8) != 0)
    249     return;
    250 
    251   int NumElts = MaskTy->getVectorNumElements();
    252   int Scale = BitWidth / 8;
    253   int NumBytes = NumElts * Scale;
    254   ShuffleMask.reserve(NumBytes);
    255 
    256   for (int i = 0; i != NumElts; ++i) {
    257     Constant *COp = C->getAggregateElement(i);
    258     if (!COp) {
    259       ShuffleMask.clear();
    260       return;
    261     } else if (isa<UndefValue>(COp)) {
    262       ShuffleMask.append(Scale, SM_SentinelUndef);
    263       continue;
    264     }
    265 
    266     // VPPERM Operation
    267     // Bits[4:0] - Byte Index (0 - 31)
    268     // Bits[7:5] - Permute Operation
    269     //
    270     // Permute Operation:
    271     // 0 - Source byte (no logical operation).
    272     // 1 - Invert source byte.
    273     // 2 - Bit reverse of source byte.
    274     // 3 - Bit reverse of inverted source byte.
    275     // 4 - 00h (zero - fill).
    276     // 5 - FFh (ones - fill).
    277     // 6 - Most significant bit of source byte replicated in all bit positions.
    278     // 7 - Invert most significant bit of source byte and replicate in all bit positions.
    279     APInt MaskElt = cast<ConstantInt>(COp)->getValue();
    280     for (int j = 0; j != Scale; ++j) {
    281       APInt Index = MaskElt.getLoBits(5);
    282       APInt PermuteOp = MaskElt.lshr(5).getLoBits(3);
    283       MaskElt = MaskElt.lshr(8);
    284 
    285       if (PermuteOp == 4) {
    286         ShuffleMask.push_back(SM_SentinelZero);
    287         continue;
    288       }
    289       if (PermuteOp != 0) {
    290         ShuffleMask.clear();
    291         return;
    292       }
    293       ShuffleMask.push_back((int)Index.getZExtValue());
    294     }
    295   }
    296 
    297   assert(NumBytes == (int)ShuffleMask.size() && "Unexpected shuffle mask size");
    298 }
    299 
    300 void DecodeVPERMVMask(const Constant *C, MVT VT,
    301                       SmallVectorImpl<int> &ShuffleMask) {
    302   Type *MaskTy = C->getType();
    303   if (MaskTy->isVectorTy()) {
    304     unsigned NumElements = MaskTy->getVectorNumElements();
    305     if (NumElements == VT.getVectorNumElements()) {
    306       unsigned EltMaskSize = Log2_64(NumElements);
    307       for (unsigned i = 0; i < NumElements; ++i) {
    308         Constant *COp = C->getAggregateElement(i);
    309         if (!COp || (!isa<UndefValue>(COp) && !isa<ConstantInt>(COp))) {
    310           ShuffleMask.clear();
    311           return;
    312         }
    313         if (isa<UndefValue>(COp))
    314           ShuffleMask.push_back(SM_SentinelUndef);
    315         else {
    316           APInt Element = cast<ConstantInt>(COp)->getValue();
    317           Element = Element.getLoBits(EltMaskSize);
    318           ShuffleMask.push_back(Element.getZExtValue());
    319         }
    320       }
    321     }
    322     return;
    323   }
    324   // Scalar value; just broadcast it
    325   if (!isa<ConstantInt>(C))
    326     return;
    327   uint64_t Element = cast<ConstantInt>(C)->getZExtValue();
    328   int NumElements = VT.getVectorNumElements();
    329   Element &= (1 << NumElements) - 1;
    330   for (int i = 0; i < NumElements; ++i)
    331     ShuffleMask.push_back(Element);
    332 }
    333 
    334 void DecodeVPERMV3Mask(const Constant *C, MVT VT,
    335                        SmallVectorImpl<int> &ShuffleMask) {
    336   Type *MaskTy = C->getType();
    337   unsigned NumElements = MaskTy->getVectorNumElements();
    338   if (NumElements == VT.getVectorNumElements()) {
    339     unsigned EltMaskSize = Log2_64(NumElements * 2);
    340     for (unsigned i = 0; i < NumElements; ++i) {
    341       Constant *COp = C->getAggregateElement(i);
    342       if (!COp) {
    343         ShuffleMask.clear();
    344         return;
    345       }
    346       if (isa<UndefValue>(COp))
    347         ShuffleMask.push_back(SM_SentinelUndef);
    348       else {
    349         APInt Element = cast<ConstantInt>(COp)->getValue();
    350         Element = Element.getLoBits(EltMaskSize);
    351         ShuffleMask.push_back(Element.getZExtValue());
    352       }
    353     }
    354   }
    355 }
    356 } // llvm namespace
    357