1 //===-- X86ShuffleDecode.cpp - X86 shuffle decode logic -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Define several functions to decode x86 specific shuffle semantics into a 11 // generic vector mask. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "X86ShuffleDecode.h" 16 17 //===----------------------------------------------------------------------===// 18 // Vector Mask Decoding 19 //===----------------------------------------------------------------------===// 20 21 namespace llvm { 22 23 void DecodeINSERTPSMask(unsigned Imm, SmallVectorImpl<unsigned> &ShuffleMask) { 24 // Defaults the copying the dest value. 25 ShuffleMask.push_back(0); 26 ShuffleMask.push_back(1); 27 ShuffleMask.push_back(2); 28 ShuffleMask.push_back(3); 29 30 // Decode the immediate. 31 unsigned ZMask = Imm & 15; 32 unsigned CountD = (Imm >> 4) & 3; 33 unsigned CountS = (Imm >> 6) & 3; 34 35 // CountS selects which input element to use. 36 unsigned InVal = 4+CountS; 37 // CountD specifies which element of destination to update. 38 ShuffleMask[CountD] = InVal; 39 // ZMask zaps values, potentially overriding the CountD elt. 40 if (ZMask & 1) ShuffleMask[0] = SM_SentinelZero; 41 if (ZMask & 2) ShuffleMask[1] = SM_SentinelZero; 42 if (ZMask & 4) ShuffleMask[2] = SM_SentinelZero; 43 if (ZMask & 8) ShuffleMask[3] = SM_SentinelZero; 44 } 45 46 // <3,1> or <6,7,2,3> 47 void DecodeMOVHLPSMask(unsigned NElts, 48 SmallVectorImpl<unsigned> &ShuffleMask) { 49 for (unsigned i = NElts/2; i != NElts; ++i) 50 ShuffleMask.push_back(NElts+i); 51 52 for (unsigned i = NElts/2; i != NElts; ++i) 53 ShuffleMask.push_back(i); 54 } 55 56 // <0,2> or <0,1,4,5> 57 void DecodeMOVLHPSMask(unsigned NElts, 58 SmallVectorImpl<unsigned> &ShuffleMask) { 59 for (unsigned i = 0; i != NElts/2; ++i) 60 ShuffleMask.push_back(i); 61 62 for (unsigned i = 0; i != NElts/2; ++i) 63 ShuffleMask.push_back(NElts+i); 64 } 65 66 void DecodePSHUFMask(unsigned NElts, unsigned Imm, 67 SmallVectorImpl<unsigned> &ShuffleMask) { 68 for (unsigned i = 0; i != NElts; ++i) { 69 ShuffleMask.push_back(Imm % NElts); 70 Imm /= NElts; 71 } 72 } 73 74 void DecodePSHUFHWMask(unsigned Imm, 75 SmallVectorImpl<unsigned> &ShuffleMask) { 76 ShuffleMask.push_back(0); 77 ShuffleMask.push_back(1); 78 ShuffleMask.push_back(2); 79 ShuffleMask.push_back(3); 80 for (unsigned i = 0; i != 4; ++i) { 81 ShuffleMask.push_back(4+(Imm & 3)); 82 Imm >>= 2; 83 } 84 } 85 86 void DecodePSHUFLWMask(unsigned Imm, 87 SmallVectorImpl<unsigned> &ShuffleMask) { 88 for (unsigned i = 0; i != 4; ++i) { 89 ShuffleMask.push_back((Imm & 3)); 90 Imm >>= 2; 91 } 92 ShuffleMask.push_back(4); 93 ShuffleMask.push_back(5); 94 ShuffleMask.push_back(6); 95 ShuffleMask.push_back(7); 96 } 97 98 void DecodePUNPCKLBWMask(unsigned NElts, 99 SmallVectorImpl<unsigned> &ShuffleMask) { 100 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i8, NElts), ShuffleMask); 101 } 102 103 void DecodePUNPCKLWDMask(unsigned NElts, 104 SmallVectorImpl<unsigned> &ShuffleMask) { 105 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i16, NElts), ShuffleMask); 106 } 107 108 void DecodePUNPCKLDQMask(unsigned NElts, 109 SmallVectorImpl<unsigned> &ShuffleMask) { 110 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); 111 } 112 113 void DecodePUNPCKLQDQMask(unsigned NElts, 114 SmallVectorImpl<unsigned> &ShuffleMask) { 115 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); 116 } 117 118 void DecodePUNPCKLMask(EVT VT, 119 SmallVectorImpl<unsigned> &ShuffleMask) { 120 DecodeUNPCKLPMask(VT, ShuffleMask); 121 } 122 123 void DecodePUNPCKHMask(unsigned NElts, 124 SmallVectorImpl<unsigned> &ShuffleMask) { 125 for (unsigned i = 0; i != NElts/2; ++i) { 126 ShuffleMask.push_back(i+NElts/2); 127 ShuffleMask.push_back(i+NElts+NElts/2); 128 } 129 } 130 131 void DecodeSHUFPSMask(unsigned NElts, unsigned Imm, 132 SmallVectorImpl<unsigned> &ShuffleMask) { 133 // Part that reads from dest. 134 for (unsigned i = 0; i != NElts/2; ++i) { 135 ShuffleMask.push_back(Imm % NElts); 136 Imm /= NElts; 137 } 138 // Part that reads from src. 139 for (unsigned i = 0; i != NElts/2; ++i) { 140 ShuffleMask.push_back(Imm % NElts + NElts); 141 Imm /= NElts; 142 } 143 } 144 145 void DecodeUNPCKHPMask(unsigned NElts, 146 SmallVectorImpl<unsigned> &ShuffleMask) { 147 for (unsigned i = 0; i != NElts/2; ++i) { 148 ShuffleMask.push_back(i+NElts/2); // Reads from dest 149 ShuffleMask.push_back(i+NElts+NElts/2); // Reads from src 150 } 151 } 152 153 void DecodeUNPCKLPSMask(unsigned NElts, 154 SmallVectorImpl<unsigned> &ShuffleMask) { 155 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i32, NElts), ShuffleMask); 156 } 157 158 void DecodeUNPCKLPDMask(unsigned NElts, 159 SmallVectorImpl<unsigned> &ShuffleMask) { 160 DecodeUNPCKLPMask(MVT::getVectorVT(MVT::i64, NElts), ShuffleMask); 161 } 162 163 /// DecodeUNPCKLPMask - This decodes the shuffle masks for unpcklps/unpcklpd 164 /// etc. VT indicates the type of the vector allowing it to handle different 165 /// datatypes and vector widths. 166 void DecodeUNPCKLPMask(EVT VT, 167 SmallVectorImpl<unsigned> &ShuffleMask) { 168 unsigned NumElts = VT.getVectorNumElements(); 169 170 // Handle vector lengths > 128 bits. Define a "section" as a set of 171 // 128 bits. AVX defines UNPCK* to operate independently on 128-bit 172 // sections. 173 unsigned NumSections = VT.getSizeInBits() / 128; 174 if (NumSections == 0 ) NumSections = 1; // Handle MMX 175 unsigned NumSectionElts = NumElts / NumSections; 176 177 unsigned Start = 0; 178 unsigned End = NumSectionElts / 2; 179 for (unsigned s = 0; s < NumSections; ++s) { 180 for (unsigned i = Start; i != End; ++i) { 181 ShuffleMask.push_back(i); // Reads from dest/src1 182 ShuffleMask.push_back(i+NumSectionElts); // Reads from src/src2 183 } 184 // Process the next 128 bits. 185 Start += NumSectionElts; 186 End += NumSectionElts; 187 } 188 } 189 190 } // llvm namespace 191