1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Define several functions to decode x86 specific shuffle semantics into a 11 // generic vector mask. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86ShuffleDecode.h" 16 #include "llvm/CodeGen/MachineValueType.h" 17 18 //===----------------------------------------------------------------------===// 19 // Vector Mask Decoding 20 //===----------------------------------------------------------------------===// 21 22 namespace llvm { 23 24 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 25 // Defaults the copying the dest value. 26 ShuffleMask.push_back(0); 27 ShuffleMask.push_back(1); 28 ShuffleMask.push_back(2); 29 ShuffleMask.push_back(3); 30 31 // Decode the immediate. 32 unsigned ZMask = Imm & 15; 33 unsigned CountD = (Imm >> 4) & 3; 34 unsigned CountS = (Imm >> 6) & 3; 35 36 // CountS selects which input element to use. 37 unsigned InVal = 4+CountS; 38 // CountD specifies which element of destination to update. 39 ShuffleMask[CountD] = InVal; 40 // ZMask zaps values, potentially overriding the CountD elt. 41 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 42 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 43 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 44 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 45 } 46 47 // <3,1> or <6,7,2,3> 48 void DecodeMOVHLPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 49 for (unsigned i = NElts/2; i != NElts; ++i) 50 ShuffleMask.push_back(NElts+i); 51 52 for (unsigned i = NElts/2; i != NElts; ++i) 53 ShuffleMask.push_back(i); 54 } 55 56 // <0,2> or <0,1,4,5> 57 void DecodeMOVLHPSMask(unsigned NElts, SmallVectorImpl<int> &ShuffleMask) { 58 for (unsigned i = 0; i != NElts/2; ++i) 59 ShuffleMask.push_back(i); 60 61 for (unsigned i = 0; i != NElts/2; ++i) 62 ShuffleMask.push_back(NElts+i); 63 } 64 65 void DecodePALIGNRMask(MVT VT, unsigned Imm, 66 SmallVectorImpl<int> &ShuffleMask) { 67 unsigned NumElts = VT.getVectorNumElements(); 68 unsigned Offset = Imm * (VT.getVectorElementType().getSizeInBits() / 8); 69 70 unsigned NumLanes = VT.getSizeInBits() / 128; 71 unsigned NumLaneElts = NumElts / NumLanes; 72 73 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 74 for (unsigned i = 0; i != NumLaneElts; ++i) { 75 unsigned Base = i + Offset; 76 // if i+offset is out of this lane then we actually need the other source 77 if (Base >= NumLaneElts) Base += NumElts - NumLaneElts; 78 ShuffleMask.push_back(Base + l); 79 } 80 } 81 } 82 83 /// DecodePSHUFMask - This decodes the shuffle masks for pshufd, and vpermilp*. 84 /// VT indicates the type of the vector allowing it to handle different 85 /// datatypes and vector widths. 86 void DecodePSHUFMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 87 unsigned NumElts = VT.getVectorNumElements(); 88 89 unsigned NumLanes = VT.getSizeInBits() / 128; 90 unsigned NumLaneElts = NumElts / NumLanes; 91 92 unsigned NewImm = Imm; 93 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 94 for (unsigned i = 0; i != NumLaneElts; ++i) { 95 ShuffleMask.push_back(NewImm % NumLaneElts + l); 96 NewImm /= NumLaneElts; 97 } 98 if (NumLaneElts == 4) NewImm = Imm; // reload imm 99 } 100 } 101 102 void DecodePSHUFHWMask(MVT VT, unsigned Imm, 103 SmallVectorImpl<int> &ShuffleMask) { 104 unsigned NumElts = VT.getVectorNumElements(); 105 106 for (unsigned l = 0; l != NumElts; l += 8) { 107 unsigned NewImm = Imm; 108 for (unsigned i = 0, e = 4; i != e; ++i) { 109 ShuffleMask.push_back(l + i); 110 } 111 for (unsigned i = 4, e = 8; i != e; ++i) { 112 ShuffleMask.push_back(l + 4 + (NewImm & 3)); 113 NewImm >>= 2; 114 } 115 } 116 } 117 118 void DecodePSHUFLWMask(MVT VT, unsigned Imm, 119 SmallVectorImpl<int> &ShuffleMask) { 120 unsigned NumElts = VT.getVectorNumElements(); 121 122 for (unsigned l = 0; l != NumElts; l += 8) { 123 unsigned NewImm = Imm; 124 for (unsigned i = 0, e = 4; i != e; ++i) { 125 ShuffleMask.push_back(l + (NewImm & 3)); 126 NewImm >>= 2; 127 } 128 for (unsigned i = 4, e = 8; i != e; ++i) { 129 ShuffleMask.push_back(l + i); 130 } 131 } 132 } 133 134 /// DecodeSHUFPMask - This decodes the shuffle masks for shufp*. VT indicates 135 /// the type of the vector allowing it to handle different datatypes and vector 136 /// widths. 137 void DecodeSHUFPMask(MVT VT, unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 138 unsigned NumElts = VT.getVectorNumElements(); 139 140 unsigned NumLanes = VT.getSizeInBits() / 128; 141 unsigned NumLaneElts = NumElts / NumLanes; 142 143 unsigned NewImm = Imm; 144 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 145 // each half of a lane comes from different source 146 for (unsigned s = 0; s != NumElts*2; s += NumElts) { 147 for (unsigned i = 0; i != NumLaneElts/2; ++i) { 148 ShuffleMask.push_back(NewImm % NumLaneElts + s + l); 149 NewImm /= NumLaneElts; 150 } 151 } 152 if (NumLaneElts == 4) NewImm = Imm; // reload imm 153 } 154 } 155 156 /// DecodeUNPCKHMask - This decodes the shuffle masks for unpckhps/unpckhpd 157 /// and punpckh*. VT indicates the type of the vector allowing it to handle 158 /// different datatypes and vector widths. 159 void DecodeUNPCKHMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 160 unsigned NumElts = VT.getVectorNumElements(); 161 162 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 163 // independently on 128-bit lanes. 164 unsigned NumLanes = VT.getSizeInBits() / 128; 165 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 166 unsigned NumLaneElts = NumElts / NumLanes; 167 168 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 169 for (unsigned i = l + NumLaneElts/2, e = l + NumLaneElts; i != e; ++i) { 170 ShuffleMask.push_back(i); // Reads from dest/src1 171 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 172 } 173 } 174 } 175 176 /// DecodeUNPCKLMask - This decodes the shuffle masks for unpcklps/unpcklpd 177 /// and punpckl*. VT indicates the type of the vector allowing it to handle 178 /// different datatypes and vector widths. 179 void DecodeUNPCKLMask(MVT VT, SmallVectorImpl<int> &ShuffleMask) { 180 unsigned NumElts = VT.getVectorNumElements(); 181 182 // Handle 128 and 256-bit vector lengths. AVX defines UNPCK* to operate 183 // independently on 128-bit lanes. 184 unsigned NumLanes = VT.getSizeInBits() / 128; 185 if (NumLanes == 0 ) NumLanes = 1; // Handle MMX 186 unsigned NumLaneElts = NumElts / NumLanes; 187 188 for (unsigned l = 0; l != NumElts; l += NumLaneElts) { 189 for (unsigned i = l, e = l + NumLaneElts/2; i != e; ++i) { 190 ShuffleMask.push_back(i); // Reads from dest/src1 191 ShuffleMask.push_back(i+NumElts); // Reads from src/src2 192 } 193 } 194 } 195 196 void DecodeVPERM2X128Mask(MVT VT, unsigned Imm, 197 SmallVectorImpl<int> &ShuffleMask) { 198 if (Imm & 0x88) 199 return; // Not a shuffle 200 201 unsigned HalfSize = VT.getVectorNumElements()/2; 202 203 for (unsigned l = 0; l != 2; ++l) { 204 unsigned HalfBegin = ((Imm >> (l*4)) & 0x3) * HalfSize; 205 for (unsigned i = HalfBegin, e = HalfBegin+HalfSize; i != e; ++i) 206 ShuffleMask.push_back(i); 207 } 208 } 209 210 /// DecodeVPERMMask - this decodes the shuffle masks for VPERMQ/VPERMPD. 211 /// No VT provided since it only works on 256-bit, 4 element vectors. 212 void DecodeVPERMMask(unsigned Imm, SmallVectorImpl<int> &ShuffleMask) { 213 for (unsigned i = 0; i != 4; ++i) { 214 ShuffleMask.push_back((Imm >> (2*i)) & 3); 215 } 216 } 217 218 } // llvm namespace 219