1 ; check AVX2 instructions that are disabled in case avx512VL/avx512BW present 2 3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=core-avx2 -mattr=+avx2 -o /dev/null 4 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -o /dev/null 5 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -o /dev/null 6 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512bw -o /dev/null 7 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=knl -mattr=+avx512vl -mattr=+avx512bw -o /dev/null 8 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -show-mc-encoding -mcpu=skx -o /dev/null 9 10 define <4 x i64> @vpand_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 11 ; Force the execution domain with an add. 12 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 13 %x = and <4 x i64> %a2, %b 14 ret <4 x i64> %x 15 } 16 17 define <2 x i64> @vpand_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 18 ; Force the execution domain with an add. 19 %a2 = add <2 x i64> %a, <i64 1, i64 1> 20 %x = and <2 x i64> %a2, %b 21 ret <2 x i64> %x 22 } 23 24 define <4 x i64> @vpandn_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 25 ; Force the execution domain with an add. 26 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 27 %y = xor <4 x i64> %a2, <i64 -1, i64 -1, i64 -1, i64 -1> 28 %x = and <4 x i64> %a, %y 29 ret <4 x i64> %x 30 } 31 32 define <2 x i64> @vpandn_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 33 ; Force the execution domain with an add. 34 %a2 = add <2 x i64> %a, <i64 1, i64 1> 35 %y = xor <2 x i64> %a2, <i64 -1, i64 -1> 36 %x = and <2 x i64> %a, %y 37 ret <2 x i64> %x 38 } 39 40 define <4 x i64> @vpor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 41 ; Force the execution domain with an add. 42 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 43 %x = or <4 x i64> %a2, %b 44 ret <4 x i64> %x 45 } 46 47 define <4 x i64> @vpxor_256(<4 x i64> %a, <4 x i64> %b) nounwind uwtable readnone ssp { 48 ; Force the execution domain with an add. 49 %a2 = add <4 x i64> %a, <i64 1, i64 1, i64 1, i64 1> 50 %x = xor <4 x i64> %a2, %b 51 ret <4 x i64> %x 52 } 53 54 define <2 x i64> @vpor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 55 ; Force the execution domain with an add. 56 %a2 = add <2 x i64> %a, <i64 1, i64 1> 57 %x = or <2 x i64> %a2, %b 58 ret <2 x i64> %x 59 } 60 61 define <2 x i64> @vpxor_128(<2 x i64> %a, <2 x i64> %b) nounwind uwtable readnone ssp { 62 ; Force the execution domain with an add. 63 %a2 = add <2 x i64> %a, <i64 1, i64 1> 64 %x = xor <2 x i64> %a2, %b 65 ret <2 x i64> %x 66 } 67 68 define <4 x i64> @test_vpaddq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 69 %x = add <4 x i64> %i, %j 70 ret <4 x i64> %x 71 } 72 73 define <8 x i32> @test_vpaddd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 74 %x = add <8 x i32> %i, %j 75 ret <8 x i32> %x 76 } 77 78 define <16 x i16> @test_vpaddw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 79 %x = add <16 x i16> %i, %j 80 ret <16 x i16> %x 81 } 82 83 define <32 x i8> @test_vpaddb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 84 %x = add <32 x i8> %i, %j 85 ret <32 x i8> %x 86 } 87 88 define <4 x i64> @test_vpsubq_256(<4 x i64> %i, <4 x i64> %j) nounwind readnone { 89 %x = sub <4 x i64> %i, %j 90 ret <4 x i64> %x 91 } 92 93 define <8 x i32> @test_vpsubd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 94 %x = sub <8 x i32> %i, %j 95 ret <8 x i32> %x 96 } 97 98 define <16 x i16> @test_vpsubw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 99 %x = sub <16 x i16> %i, %j 100 ret <16 x i16> %x 101 } 102 103 define <32 x i8> @test_vpsubb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 104 %x = sub <32 x i8> %i, %j 105 ret <32 x i8> %x 106 } 107 108 define <16 x i16> @test_vpmullw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 109 %x = mul <16 x i16> %i, %j 110 ret <16 x i16> %x 111 } 112 113 define <8 x i32> @test_vpcmpgtd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 114 %bincmp = icmp slt <8 x i32> %i, %j 115 %x = sext <8 x i1> %bincmp to <8 x i32> 116 ret <8 x i32> %x 117 } 118 119 define <32 x i8> @test_vpcmpeqb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 120 %bincmp = icmp eq <32 x i8> %i, %j 121 %x = sext <32 x i1> %bincmp to <32 x i8> 122 ret <32 x i8> %x 123 } 124 125 define <16 x i16> @test_vpcmpeqw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 126 %bincmp = icmp eq <16 x i16> %i, %j 127 %x = sext <16 x i1> %bincmp to <16 x i16> 128 ret <16 x i16> %x 129 } 130 131 define <32 x i8> @test_vpcmpgtb_256(<32 x i8> %i, <32 x i8> %j) nounwind readnone { 132 %bincmp = icmp slt <32 x i8> %i, %j 133 %x = sext <32 x i1> %bincmp to <32 x i8> 134 ret <32 x i8> %x 135 } 136 137 define <16 x i16> @test_vpcmpgtw_256(<16 x i16> %i, <16 x i16> %j) nounwind readnone { 138 %bincmp = icmp slt <16 x i16> %i, %j 139 %x = sext <16 x i1> %bincmp to <16 x i16> 140 ret <16 x i16> %x 141 } 142 143 define <8 x i32> @test_vpcmpeqd_256(<8 x i32> %i, <8 x i32> %j) nounwind readnone { 144 %bincmp = icmp eq <8 x i32> %i, %j 145 %x = sext <8 x i1> %bincmp to <8 x i32> 146 ret <8 x i32> %x 147 } 148 149 define <2 x i64> @test_vpaddq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 150 %x = add <2 x i64> %i, %j 151 ret <2 x i64> %x 152 } 153 154 define <4 x i32> @test_vpaddd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 155 %x = add <4 x i32> %i, %j 156 ret <4 x i32> %x 157 } 158 159 define <8 x i16> @test_vpaddw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 160 %x = add <8 x i16> %i, %j 161 ret <8 x i16> %x 162 } 163 164 define <16 x i8> @test_vpaddb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 165 %x = add <16 x i8> %i, %j 166 ret <16 x i8> %x 167 } 168 169 define <2 x i64> @test_vpsubq_128(<2 x i64> %i, <2 x i64> %j) nounwind readnone { 170 %x = sub <2 x i64> %i, %j 171 ret <2 x i64> %x 172 } 173 174 define <4 x i32> @test_vpsubd_128(<4 x i32> %i, <4 x i32> %j) nounwind readnone { 175 %x = sub <4 x i32> %i, %j 176 ret <4 x i32> %x 177 } 178 179 define <8 x i16> @test_vpsubw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 180 %x = sub <8 x i16> %i, %j 181 ret <8 x i16> %x 182 } 183 184 define <16 x i8> @test_vpsubb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 185 %x = sub <16 x i8> %i, %j 186 ret <16 x i8> %x 187 } 188 189 define <8 x i16> @test_vpmullw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 190 %x = mul <8 x i16> %i, %j 191 ret <8 x i16> %x 192 } 193 194 define <8 x i16> @test_vpcmpgtw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 195 %bincmp = icmp slt <8 x i16> %i, %j 196 %x = sext <8 x i1> %bincmp to <8 x i16> 197 ret <8 x i16> %x 198 } 199 200 define <16 x i8> @test_vpcmpgtb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 201 %bincmp = icmp slt <16 x i8> %i, %j 202 %x = sext <16 x i1> %bincmp to <16 x i8> 203 ret <16 x i8> %x 204 } 205 206 define <8 x i16> @test_vpcmpeqw_128(<8 x i16> %i, <8 x i16> %j) nounwind readnone { 207 %bincmp = icmp eq <8 x i16> %i, %j 208 %x = sext <8 x i1> %bincmp to <8 x i16> 209 ret <8 x i16> %x 210 } 211 212 define <16 x i8> @test_vpcmpeqb_128(<16 x i8> %i, <16 x i8> %j) nounwind readnone { 213 %bincmp = icmp eq <16 x i8> %i, %j 214 %x = sext <16 x i1> %bincmp to <16 x i8> 215 ret <16 x i8> %x 216 } 217 218 define <8 x i16> @shuffle_v8i16_vpalignr(<8 x i16> %a, <8 x i16> %b) { 219 %shuffle = shufflevector <8 x i16> %a, <8 x i16> %b, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11> 220 ret <8 x i16> %shuffle 221 } 222 223 define <16 x i16> @shuffle_v16i16_vpalignr(<16 x i16> %a, <16 x i16> %b) { 224 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 23, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 31, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 225 ret <16 x i16> %shuffle 226 } 227 228 define <16 x i8> @shuffle_v16i8_vpalignr(<16 x i8> %a, <16 x i8> %b) { 229 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 31, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14> 230 ret <16 x i8> %shuffle 231 } 232 233 define <32 x i8> @shuffle_v32i8_vpalignr(<32 x i8> %a, <32 x i8> %b) { 234 %shuffle = shufflevector <32 x i8> %a, <32 x i8> %b, <32 x i32> <i32 undef, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 63, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef> 235 ret <32 x i8> %shuffle 236 } 237 238 define <2 x i64> @shuffle_v2i64_vpalignr(<2 x i64> %a, <2 x i64> %b) { 239 %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2> 240 ret <2 x i64> %shuffle 241 } 242 243 define <4 x i32> @shuffle_v4i32_vpalignr(<4 x i32> %a, <4 x i32> %b) { 244 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 7, i32 0, i32 1, i32 2> 245 ret <4 x i32> %shuffle 246 } 247 248 define <8 x i32> @shuffle_v8i32_vpalignr(<8 x i32> %a, <8 x i32> %b) { 249 %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6> 250 ret <8 x i32> %shuffle 251 } 252 253 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) { 254 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3> 255 ret <4 x double> %shuffle 256 } 257 258 define <2 x double> @shuffle_v2f64_bitcast_1z(<2 x double> %a) { 259 %shuffle64 = shufflevector <2 x double> %a, <2 x double> zeroinitializer, <2 x i32> <i32 2, i32 1> 260 %bitcast32 = bitcast <2 x double> %shuffle64 to <4 x float> 261 %shuffle32 = shufflevector <4 x float> %bitcast32, <4 x float> undef, <4 x i32> <i32 2, i32 3, i32 0, i32 1> 262 %bitcast64 = bitcast <4 x float> %shuffle32 to <2 x double> 263 ret <2 x double> %bitcast64 264 } 265 266 define <16 x i16> @shuffle_v16i16_zz_zz_zz_zz_zz_zz_zz_16_zz_zz_zz_zz_zz_zz_zz_24(<16 x i16> %a) { 267 %shuffle = shufflevector <16 x i16> zeroinitializer, <16 x i16> %a, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 16, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 24> 268 ret <16 x i16> %shuffle 269 } 270 271 define i64 @extract_v2i64(<2 x i64> %x, i64* %dst) { 272 %r1 = extractelement <2 x i64> %x, i32 0 273 %r2 = extractelement <2 x i64> %x, i32 1 274 store i64 %r2, i64* %dst, align 1 275 ret i64 %r1 276 } 277 278 define i32 @extract_v4i32(<4 x i32> %x, i32* %dst) { 279 %r1 = extractelement <4 x i32> %x, i32 1 280 %r2 = extractelement <4 x i32> %x, i32 3 281 store i32 %r2, i32* %dst, align 1 282 ret i32 %r1 283 } 284 285 define i16 @extract_v8i16(<8 x i16> %x, i16* %dst) { 286 %r1 = extractelement <8 x i16> %x, i32 1 287 %r2 = extractelement <8 x i16> %x, i32 3 288 store i16 %r2, i16* %dst, align 1 289 ret i16 %r1 290 } 291 292 define i8 @extract_v16i8(<16 x i8> %x, i8* %dst) { 293 %r1 = extractelement <16 x i8> %x, i32 1 294 %r2 = extractelement <16 x i8> %x, i32 3 295 store i8 %r2, i8* %dst, align 1 296 ret i8 %r1 297 } 298 299 define <2 x i64> @insert_v2i64(<2 x i64> %x, i64 %y , i64* %ptr) { 300 %val = load i64, i64* %ptr 301 %r1 = insertelement <2 x i64> %x, i64 %val, i32 1 302 %r2 = insertelement <2 x i64> %r1, i64 %y, i32 3 303 ret <2 x i64> %r2 304 } 305 306 define <4 x i32> @insert_v4i32(<4 x i32> %x, i32 %y, i32* %ptr) { 307 %val = load i32, i32* %ptr 308 %r1 = insertelement <4 x i32> %x, i32 %val, i32 1 309 %r2 = insertelement <4 x i32> %r1, i32 %y, i32 3 310 ret <4 x i32> %r2 311 } 312 313 define <8 x i16> @insert_v8i16(<8 x i16> %x, i16 %y, i16* %ptr) { 314 %val = load i16, i16* %ptr 315 %r1 = insertelement <8 x i16> %x, i16 %val, i32 1 316 %r2 = insertelement <8 x i16> %r1, i16 %y, i32 5 317 ret <8 x i16> %r2 318 } 319 320 define <16 x i8> @insert_v16i8(<16 x i8> %x, i8 %y, i8* %ptr) { 321 %val = load i8, i8* %ptr 322 %r1 = insertelement <16 x i8> %x, i8 %val, i32 3 323 %r2 = insertelement <16 x i8> %r1, i8 %y, i32 10 324 ret <16 x i8> %r2 325 } 326 327 define <4 x i32> @shuffle_v4i32_0451(<4 x i32> %a, <4 x i32> %b) { 328 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1> 329 ret <4 x i32> %shuffle 330 } 331 332 define <4 x i32> @shuffle_v4i32_0142(<4 x i32> %a, <4 x i32> %b) { 333 %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2> 334 ret <4 x i32> %shuffle 335 } 336 337 define <16 x i8> @shuffle_v16i8_0101010101010101(<16 x i8> %a, <16 x i8> %b) { 338 %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1> 339 ret <16 x i8> %shuffle 340 } 341 342 define <16 x i16> @shuffle_v16i16_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00_00(<16 x i16> %a, <16 x i16> %b) { 343 %shuffle = shufflevector <16 x i16> %a, <16 x i16> %b, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0> 344 ret <16 x i16> %shuffle 345 } 346 347 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) { 348 ; vmovshdup 256 test 349 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7> 350 ret <8 x float> %shuffle 351 } 352 353 define <4 x float> @shuffle_v4f32_1133(<4 x float> %a, <4 x float> %b) { 354 ; vmovshdup 128 test 355 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3> 356 ret <4 x float> %shuffle 357 } 358 359 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) { 360 ; vmovsldup 256 test 361 %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6> 362 ret <8 x float> %shuffle 363 } 364 365 define <4 x float> @shuffle_v4f32_0022(<4 x float> %a, <4 x float> %b) { 366 ; vmovsldup 128 test 367 %shuffle = shufflevector <4 x float> %a, <4 x float> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 368 ret <4 x float> %shuffle 369 } 370 371 define <2 x double> @insert_mem_lo_v2f64(double* %ptr, <2 x double> %b) { 372 %a = load double, double* %ptr 373 %v = insertelement <2 x double> undef, double %a, i32 0 374 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 0, i32 3> 375 ret <2 x double> %shuffle 376 } 377 378 define <2 x double> @insert_mem_hi_v2f64(double* %ptr, <2 x double> %b) { 379 %a = load double, double* %ptr 380 %v = insertelement <2 x double> undef, double %a, i32 0 381 %shuffle = shufflevector <2 x double> %v, <2 x double> %b, <2 x i32> <i32 2, i32 0> 382 ret <2 x double> %shuffle 383 } 384 385 define void @store_floats(<4 x float> %x, i64* %p) { 386 %a = fadd <4 x float> %x, %x 387 %b = shufflevector <4 x float> %a, <4 x float> undef, <2 x i32> <i32 0, i32 1> 388 %c = bitcast <2 x float> %b to i64 389 store i64 %c, i64* %p 390 ret void 391 } 392 393 define void @store_double(<2 x double> %x, i64* %p) { 394 %a = fadd <2 x double> %x, %x 395 %b = extractelement <2 x double> %a, i32 0 396 %c = bitcast double %b to i64 397 store i64 %c, i64* %p 398 ret void 399 } 400 401 define void @store_h_double(<2 x double> %x, i64* %p) { 402 %a = fadd <2 x double> %x, %x 403 %b = extractelement <2 x double> %a, i32 1 404 %c = bitcast double %b to i64 405 store i64 %c, i64* %p 406 ret void 407 } 408 409 define <2 x double> @test39(double* %ptr) nounwind { 410 %a = load double, double* %ptr 411 %v = insertelement <2 x double> undef, double %a, i32 0 412 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 413 ret <2 x double> %shuffle 414 } 415 416 define <2 x double> @test40(<2 x double>* %ptr) nounwind { 417 %v = load <2 x double>, <2 x double>* %ptr 418 %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <2 x i32> <i32 0, i32 0> 419 ret <2 x double> %shuffle 420 } 421 422 define <2 x double> @shuffle_v2f64_00(<2 x double> %a, <2 x double> %b) { 423 %shuffle = shufflevector <2 x double> %a, <2 x double> %b, <2 x i32> <i32 0, i32 0> 424 ret <2 x double> %shuffle 425 } 426 427 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) { 428 %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2> 429 ret <4 x double> %shuffle 430 } 431 432