1 ; RUN: opt < %s -instcombine -S | FileCheck %s 2 3 ; This should never happen, but make sure we don't crash handling a non-constant immediate byte. 4 5 define <4 x double> @perm2pd_non_const_imm(<4 x double> %a0, <4 x double> %a1, i8 %b) { 6 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) 7 ret <4 x double> %res 8 9 ; CHECK-LABEL: @perm2pd_non_const_imm 10 ; CHECK-NEXT: call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 %b) 11 ; CHECK-NEXT: ret <4 x double> 12 } 13 14 15 ; In the following 4 tests, both zero mask bits of the immediate are set. 16 17 define <4 x double> @perm2pd_0x88(<4 x double> %a0, <4 x double> %a1) { 18 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 136) 19 ret <4 x double> %res 20 21 ; CHECK-LABEL: @perm2pd_0x88 22 ; CHECK-NEXT: ret <4 x double> zeroinitializer 23 } 24 25 define <8 x float> @perm2ps_0x88(<8 x float> %a0, <8 x float> %a1) { 26 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 136) 27 ret <8 x float> %res 28 29 ; CHECK-LABEL: @perm2ps_0x88 30 ; CHECK-NEXT: ret <8 x float> zeroinitializer 31 } 32 33 define <8 x i32> @perm2si_0x88(<8 x i32> %a0, <8 x i32> %a1) { 34 %res = call <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32> %a0, <8 x i32> %a1, i8 136) 35 ret <8 x i32> %res 36 37 ; CHECK-LABEL: @perm2si_0x88 38 ; CHECK-NEXT: ret <8 x i32> zeroinitializer 39 } 40 41 define <4 x i64> @perm2i_0x88(<4 x i64> %a0, <4 x i64> %a1) { 42 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 136) 43 ret <4 x i64> %res 44 45 ; CHECK-LABEL: @perm2i_0x88 46 ; CHECK-NEXT: ret <4 x i64> zeroinitializer 47 } 48 49 50 ; The other control bits are ignored when zero mask bits of the immediate are set. 51 52 define <4 x double> @perm2pd_0xff(<4 x double> %a0, <4 x double> %a1) { 53 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 255) 54 ret <4 x double> %res 55 56 ; CHECK-LABEL: @perm2pd_0xff 57 ; CHECK-NEXT: ret <4 x double> zeroinitializer 58 } 59 60 61 ; The following 16 tests are simple shuffles, except for 2 cases where we can just return one of the 62 ; source vectors. Verify that we generate the right shuffle masks and undef source operand where possible.. 63 64 define <4 x double> @perm2pd_0x00(<4 x double> %a0, <4 x double> %a1) { 65 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 0) 66 ret <4 x double> %res 67 68 ; CHECK-LABEL: @perm2pd_0x00 69 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 70 ; CHECK-NEXT: ret <4 x double> %1 71 } 72 73 define <4 x double> @perm2pd_0x01(<4 x double> %a0, <4 x double> %a1) { 74 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 1) 75 ret <4 x double> %res 76 77 ; CHECK-LABEL: @perm2pd_0x01 78 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 79 ; CHECK-NEXT: ret <4 x double> %1 80 } 81 82 define <4 x double> @perm2pd_0x02(<4 x double> %a0, <4 x double> %a1) { 83 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 2) 84 ret <4 x double> %res 85 86 ; CHECK-LABEL: @perm2pd_0x02 87 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 88 ; CHECK-NEXT: ret <4 x double> %1 89 } 90 91 define <4 x double> @perm2pd_0x03(<4 x double> %a0, <4 x double> %a1) { 92 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 3) 93 ret <4 x double> %res 94 95 ; CHECK-LABEL: @perm2pd_0x03 96 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 97 ; CHECK-NEXT: ret <4 x double> %1 98 } 99 100 define <4 x double> @perm2pd_0x10(<4 x double> %a0, <4 x double> %a1) { 101 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 16) 102 ret <4 x double> %res 103 104 ; CHECK-LABEL: @perm2pd_0x10 105 ; CHECK-NEXT: ret <4 x double> %a0 106 } 107 108 define <4 x double> @perm2pd_0x11(<4 x double> %a0, <4 x double> %a1) { 109 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 17) 110 ret <4 x double> %res 111 112 ; CHECK-LABEL: @perm2pd_0x11 113 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 114 ; CHECK-NEXT: ret <4 x double> %1 115 } 116 117 define <4 x double> @perm2pd_0x12(<4 x double> %a0, <4 x double> %a1) { 118 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 18) 119 ret <4 x double> %res 120 121 ; CHECK-LABEL: @perm2pd_0x12 122 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 123 ; CHECK-NEXT: ret <4 x double> %1 124 } 125 126 define <4 x double> @perm2pd_0x13(<4 x double> %a0, <4 x double> %a1) { 127 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 19) 128 ret <4 x double> %res 129 130 ; CHECK-LABEL: @perm2pd_0x13 131 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> %a0, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 132 ; CHECK-NEXT: ret <4 x double> %1 133 } 134 135 define <4 x double> @perm2pd_0x20(<4 x double> %a0, <4 x double> %a1) { 136 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 32) 137 ret <4 x double> %res 138 139 ; CHECK-LABEL: @perm2pd_0x20 140 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 141 ; CHECK-NEXT: ret <4 x double> %1 142 } 143 144 define <4 x double> @perm2pd_0x21(<4 x double> %a0, <4 x double> %a1) { 145 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 33) 146 ret <4 x double> %res 147 148 ; CHECK-LABEL: @perm2pd_0x21 149 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 150 ; CHECK-NEXT: ret <4 x double> %1 151 } 152 153 define <4 x double> @perm2pd_0x22(<4 x double> %a0, <4 x double> %a1) { 154 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 34) 155 ret <4 x double> %res 156 157 ; CHECK-LABEL: @perm2pd_0x22 158 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1> 159 ; CHECK-NEXT: ret <4 x double> %1 160 } 161 162 define <4 x double> @perm2pd_0x23(<4 x double> %a0, <4 x double> %a1) { 163 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 35) 164 ret <4 x double> %res 165 166 ; CHECK-LABEL: @perm2pd_0x23 167 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 168 ; CHECK-NEXT: ret <4 x double> %1 169 } 170 171 define <4 x double> @perm2pd_0x30(<4 x double> %a0, <4 x double> %a1) { 172 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 48) 173 ret <4 x double> %res 174 175 ; CHECK-LABEL: @perm2pd_0x30 176 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 6, i32 7> 177 ; CHECK-NEXT: ret <4 x double> %1 178 } 179 180 define <4 x double> @perm2pd_0x31(<4 x double> %a0, <4 x double> %a1) { 181 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 49) 182 ret <4 x double> %res 183 184 ; CHECK-LABEL: @perm2pd_0x31 185 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a0, <4 x double> %a1, <4 x i32> <i32 2, i32 3, i32 6, i32 7> 186 ; CHECK-NEXT: ret <4 x double> %1 187 } 188 189 define <4 x double> @perm2pd_0x32(<4 x double> %a0, <4 x double> %a1) { 190 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 50) 191 ret <4 x double> %res 192 193 ; CHECK-LABEL: @perm2pd_0x32 194 ; CHECK-NEXT: ret <4 x double> %a1 195 } 196 197 define <4 x double> @perm2pd_0x33(<4 x double> %a0, <4 x double> %a1) { 198 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 51) 199 ret <4 x double> %res 200 201 ; CHECK-LABEL: @perm2pd_0x33 202 ; CHECK-NEXT: %1 = shufflevector <4 x double> %a1, <4 x double> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 203 ; CHECK-NEXT: ret <4 x double> %1 204 } 205 206 ; Confirm that a mask for 32-bit elements is also correct. 207 208 define <8 x float> @perm2ps_0x31(<8 x float> %a0, <8 x float> %a1) { 209 %res = call <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float> %a0, <8 x float> %a1, i8 49) 210 ret <8 x float> %res 211 212 ; CHECK-LABEL: @perm2ps_0x31 213 ; CHECK-NEXT: %1 = shufflevector <8 x float> %a0, <8 x float> %a1, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15> 214 ; CHECK-NEXT: ret <8 x float> %1 215 } 216 217 218 ; Confirm that the AVX2 version works the same. 219 220 define <4 x i64> @perm2i_0x33(<4 x i64> %a0, <4 x i64> %a1) { 221 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 51) 222 ret <4 x i64> %res 223 224 ; CHECK-LABEL: @perm2i_0x33 225 ; CHECK-NEXT: %1 = shufflevector <4 x i64> %a1, <4 x i64> undef, <4 x i32> <i32 2, i32 3, i32 2, i32 3> 226 ; CHECK-NEXT: ret <4 x i64> %1 227 } 228 229 230 ; Confirm that when a single zero mask bit is set, we replace a source vector with zeros. 231 232 define <4 x double> @perm2pd_0x81(<4 x double> %a0, <4 x double> %a1) { 233 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 129) 234 ret <4 x double> %res 235 236 ; CHECK-LABEL: @perm2pd_0x81 237 ; CHECK-NEXT: shufflevector <4 x double> %a0, <4 x double> <double 0.0{{.*}}<4 x i32> <i32 2, i32 3, i32 4, i32 5> 238 ; CHECK-NEXT: ret <4 x double> 239 } 240 241 define <4 x double> @perm2pd_0x83(<4 x double> %a0, <4 x double> %a1) { 242 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 131) 243 ret <4 x double> %res 244 245 ; CHECK-LABEL: @perm2pd_0x83 246 ; CHECK-NEXT: shufflevector <4 x double> %a1, <4 x double> <double 0.0{{.*}}, <4 x i32> <i32 2, i32 3, i32 4, i32 5> 247 ; CHECK-NEXT: ret <4 x double> 248 } 249 250 define <4 x double> @perm2pd_0x28(<4 x double> %a0, <4 x double> %a1) { 251 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 40) 252 ret <4 x double> %res 253 254 ; CHECK-LABEL: @perm2pd_0x28 255 ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 256 ; CHECK-NEXT: ret <4 x double> 257 } 258 259 define <4 x double> @perm2pd_0x08(<4 x double> %a0, <4 x double> %a1) { 260 %res = call <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double> %a0, <4 x double> %a1, i8 8) 261 ret <4 x double> %res 262 263 ; CHECK-LABEL: @perm2pd_0x08 264 ; CHECK-NEXT: shufflevector <4 x double> <double 0.0{{.*}}, <4 x double> %a0, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 265 ; CHECK-NEXT: ret <4 x double> 266 } 267 268 ; Check one more with the AVX2 version. 269 270 define <4 x i64> @perm2i_0x28(<4 x i64> %a0, <4 x i64> %a1) { 271 %res = call <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64> %a0, <4 x i64> %a1, i8 40) 272 ret <4 x i64> %res 273 274 ; CHECK-LABEL: @perm2i_0x28 275 ; CHECK-NEXT: shufflevector <4 x i64> <i64 0{{.*}}, <4 x i64> %a1, <4 x i32> <i32 0, i32 1, i32 4, i32 5> 276 ; CHECK-NEXT: ret <4 x i64> 277 } 278 279 declare <4 x double> @llvm.x86.avx.vperm2f128.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone 280 declare <8 x float> @llvm.x86.avx.vperm2f128.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone 281 declare <8 x i32> @llvm.x86.avx.vperm2f128.si.256(<8 x i32>, <8 x i32>, i8) nounwind readnone 282 declare <4 x i64> @llvm.x86.avx2.vperm2i128(<4 x i64>, <4 x i64>, i8) nounwind readnone 283 284