1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Define several functions to decode x86 specific shuffle semantics into a 11 // generic vector mask. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86ShuffleDecode.h" 16 17 //===----------------------------------------------------------------------===// 18 // Vector Mask Decoding 19 //===----------------------------------------------------------------------===// 20 21 namespace llvm { 22 23 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4+CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44 } 45 46 // <3,1> or <6,7,2,3> 47 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 48 for (unsigned i = NElts/2; i != NElts; ++i) 49 ShuffleMask.push_back(NElts+i); 50 51 for (unsigned i = NElts/2; i != NElts; ++i) 52 ShuffleMask.push_back(i); 53 } 54 55 // <0,2> or <0,1,4,5> 56 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 57 for (unsigned i = 0; i != NElts/2; ++i) 58 ShuffleMask.push_back(i); 59 60 for (unsigned i = 0; i != NElts/2; ++i) 61 ShuffleMask.push_back(NElts+i); 62 } 63 64 void DecodePALIGNRMask(MVT VT, unsigned Imm, 65 SmallVectorImpl<int> &ShuffleMask) { 66 unsigned NumElts = VT.getVectorNumElements(); 67 unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 68 69 unsigned NumLanes = VT.getSizeInBits() / 128; 70 unsigned NumLaneElts = NumElts / NumLanes; 71 72 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 73 for (unsigned i = 0; i != NumLaneElts; ++i) { 74 unsigned Base = i + Offset; 75 // if i+offset is out of this lane then we actually need the other source 76 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 77 ShuffleMask.push_back(Base + l); 78 } 79 } 80 } 81 82 /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 83 /// VT indicates the type of the vector allowing it to handle different 84 /// datatypes and vector widths. 85 void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 86 unsigned NumElts = VT.getVectorNumElements(); 87 88 unsigned NumLanes = VT.getSizeInBits() / 128; 89 unsigned NumLaneElts = NumElts / NumLanes; 90 91 unsigned NewImm = Imm; 92 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 93 for (unsigned i = 0; i != NumLaneElts; ++i) { 94 ShuffleMask.push_back(NewImm % NumLaneElts + l); 95 NewImm /= NumLaneElts; 96 } 97 if (NumLaneElts == 4) NewImm = Imm; // reload imm 98 } 99 } 100 101 void DecodePSHUFHWMask(MVT VT, unsigned Imm, 102 SmallVectorImpl<int> &ShuffleMask) { 103 unsigned NumElts = VT.getVectorNumElements(); 104 105 for (unsigned l = 0; l != NumElts; l += 8) { 106 unsigned NewImm = Imm; 107 for (unsigned i = 0, e = 4; i != e; ++i) { 108 ShuffleMask.push_back(l + i); 109 } 110 for (unsigned i = 4, e = 8; i != e; ++i) { 111 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 112 NewImm >>= 2; 113 } 114 } 115 } 116 117 void DecodePSHUFLWMask(MVT VT, unsigned Imm, 118 SmallVectorImpl<int> &ShuffleMask) { 119 unsigned NumElts = VT.getVectorNumElements(); 120 121 for (unsigned l = 0; l != NumElts; l += 8) { 122 unsigned NewImm = Imm; 123 for (unsigned i = 0, e = 4; i != e; ++i) { 124 ShuffleMask.push_back(l + (NewImm & 3)); 125 NewImm >>= 2; 126 } 127 for (unsigned i = 4, e = 8; i != e; ++i) { 128 ShuffleMask.push_back(l + i); 129 } 130 } 131 } 132 133 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 134 /// the type of the vector allowing it to handle different datatypes and vector 135 /// widths. 136 void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 137 unsigned NumElts = VT.getVectorNumElements(); 138 139 unsigned NumLanes = VT.getSizeInBits() / 128; 140 unsigned NumLaneElts = NumElts / NumLanes; 141 142 unsigned NewImm = Imm; 143 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 144 // each half of a lane comes from different source 145 for (unsigned s = 0; s != NumElts*2; s += NumElts) { 146 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 147 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 148 NewImm /= NumLaneElts; 149 } 150 } 151 if (NumLaneElts == 4) NewImm = Imm; // reload imm 152 } 153 } 154 155 /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 156 /// and punpckh*. VT indicates the type of the vector allowing it to handle 157 /// different datatypes and vector widths. 158 void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 159 unsigned NumElts = VT.getVectorNumElements(); 160 161 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 162 // independently on 128-bit lanes. 163 unsigned NumLanes = VT.getSizeInBits() / 128; 164 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 165 unsigned NumLaneElts = NumElts / NumLanes; 166 167 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 168 for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 169 ShuffleMask.push_back(i); // Reads from dest/src1 170 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 171 } 172 } 173 } 174 175 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 176 /// and punpckl*. VT indicates the type of the vector allowing it to handle 177 /// different datatypes and vector widths. 178 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 179 unsigned NumElts = VT.getVectorNumElements(); 180 181 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 182 // independently on 128-bit lanes. 183 unsigned NumLanes = VT.getSizeInBits() / 128; 184 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 185 unsigned NumLaneElts = NumElts / NumLanes; 186 187 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 188 for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 189 ShuffleMask.push_back(i); // Reads from dest/src1 190 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 191 } 192 } 193 } 194 195 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 196 SmallVectorImpl<int> &ShuffleMask) { 197 if (Imm & 0x88) 198 return; // Not a shuffle 199 200 unsigned HalfSize = VT.getVectorNumElements()/2; 201 202 for (unsigned l = 0; l != 2; ++l) { 203 unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 204 for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 205 ShuffleMask.push_back(i); 206 } 207 } 208 209 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 210 /// No VT provided since it only works on 256-bit, 4 element vectors. 211 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 212 for (unsigned i = 0; i != 4; ++i) { 213 ShuffleMask.push_back((Imm >> (2*i)) & 3); 214 } 215 } 216 217 } // llvm namespace 218