1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c 6 7 define zeroext i16 @test_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) { 8 ; CHECK-LABEL: test_mm_test_epi8_mask: 9 ; CHECK: # %bb.0: # %entry 10 ; CHECK-NEXT: vptestmb %xmm0, %xmm1, %k0 11 ; CHECK-NEXT: kmovd %k0, %eax 12 ; CHECK-NEXT: movzwl %ax, %eax 13 ; CHECK-NEXT: ret{{[l|q]}} 14 entry: 15 %and.i.i = and <2 x i64> %__B, %__A 16 %0 = bitcast <2 x i64> %and.i.i to <16 x i8> 17 %1 = icmp ne <16 x i8> %0, zeroinitializer 18 %2 = bitcast <16 x i1> %1 to i16 19 ret i16 %2 20 } 21 22 define zeroext i16 @test_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 23 ; X86-LABEL: test_mm_mask_test_epi8_mask: 24 ; X86: # %bb.0: # %entry 25 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 26 ; X86-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1} 27 ; X86-NEXT: kmovd %k0, %eax 28 ; X86-NEXT: movzwl %ax, %eax 29 ; X86-NEXT: retl 30 ; 31 ; X64-LABEL: test_mm_mask_test_epi8_mask: 32 ; X64: # %bb.0: # %entry 33 ; X64-NEXT: kmovd %edi, %k1 34 ; X64-NEXT: vptestmb %xmm0, %xmm1, %k0 {%k1} 35 ; X64-NEXT: kmovd %k0, %eax 36 ; X64-NEXT: movzwl %ax, %eax 37 ; X64-NEXT: retq 38 entry: 39 %and.i.i = and <2 x i64> %__B, %__A 40 %0 = bitcast <2 x i64> %and.i.i to <16 x i8> 41 %1 = icmp ne <16 x i8> %0, zeroinitializer 42 %2 = bitcast i16 %__U to <16 x i1> 43 %3 = and <16 x i1> %1, %2 44 %4 = bitcast <16 x i1> %3 to i16 45 ret i16 %4 46 } 47 48 define i32 @test_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) { 49 ; CHECK-LABEL: test_mm256_test_epi8_mask: 50 ; CHECK: # %bb.0: # %entry 51 ; CHECK-NEXT: vptestmb %ymm0, %ymm1, %k0 52 ; CHECK-NEXT: kmovd %k0, %eax 53 ; CHECK-NEXT: vzeroupper 54 ; CHECK-NEXT: ret{{[l|q]}} 55 entry: 56 %and.i.i = and <4 x i64> %__B, %__A 57 %0 = bitcast <4 x i64> %and.i.i to <32 x i8> 58 %1 = icmp ne <32 x i8> %0, zeroinitializer 59 %2 = bitcast <32 x i1> %1 to i32 60 ret i32 %2 61 } 62 63 define i32 @test_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) { 64 ; X86-LABEL: test_mm256_mask_test_epi8_mask: 65 ; X86: # %bb.0: # %entry 66 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 67 ; X86-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1} 68 ; X86-NEXT: kmovd %k0, %eax 69 ; X86-NEXT: vzeroupper 70 ; X86-NEXT: retl 71 ; 72 ; X64-LABEL: test_mm256_mask_test_epi8_mask: 73 ; X64: # %bb.0: # %entry 74 ; X64-NEXT: kmovd %edi, %k1 75 ; X64-NEXT: vptestmb %ymm0, %ymm1, %k0 {%k1} 76 ; X64-NEXT: kmovd %k0, %eax 77 ; X64-NEXT: vzeroupper 78 ; X64-NEXT: retq 79 entry: 80 %and.i.i = and <4 x i64> %__B, %__A 81 %0 = bitcast <4 x i64> %and.i.i to <32 x i8> 82 %1 = icmp ne <32 x i8> %0, zeroinitializer 83 %2 = bitcast i32 %__U to <32 x i1> 84 %3 = and <32 x i1> %1, %2 85 %4 = bitcast <32 x i1> %3 to i32 86 ret i32 %4 87 } 88 89 define zeroext i8 @test_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) { 90 ; CHECK-LABEL: test_mm_test_epi16_mask: 91 ; CHECK: # %bb.0: # %entry 92 ; CHECK-NEXT: vptestmw %xmm0, %xmm1, %k0 93 ; CHECK-NEXT: kmovd %k0, %eax 94 ; CHECK-NEXT: movzbl %al, %eax 95 ; CHECK-NEXT: ret{{[l|q]}} 96 entry: 97 %and.i.i = and <2 x i64> %__B, %__A 98 %0 = bitcast <2 x i64> %and.i.i to <8 x i16> 99 %1 = icmp ne <8 x i16> %0, zeroinitializer 100 %2 = bitcast <8 x i1> %1 to i8 101 ret i8 %2 102 } 103 104 define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 105 ; X86-LABEL: test_mm_mask_test_epi16_mask: 106 ; X86: # %bb.0: # %entry 107 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 108 ; X86-NEXT: kmovd %eax, %k1 109 ; X86-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1} 110 ; X86-NEXT: kmovd %k0, %eax 111 ; X86-NEXT: movzbl %al, %eax 112 ; X86-NEXT: retl 113 ; 114 ; X64-LABEL: test_mm_mask_test_epi16_mask: 115 ; X64: # %bb.0: # %entry 116 ; X64-NEXT: kmovd %edi, %k1 117 ; X64-NEXT: vptestmw %xmm0, %xmm1, %k0 {%k1} 118 ; X64-NEXT: kmovd %k0, %eax 119 ; X64-NEXT: movzbl %al, %eax 120 ; X64-NEXT: retq 121 entry: 122 %and.i.i = and <2 x i64> %__B, %__A 123 %0 = bitcast <2 x i64> %and.i.i to <8 x i16> 124 %1 = icmp ne <8 x i16> %0, zeroinitializer 125 %2 = bitcast i8 %__U to <8 x i1> 126 %3 = and <8 x i1> %1, %2 127 %4 = bitcast <8 x i1> %3 to i8 128 ret i8 %4 129 } 130 131 define zeroext i16 @test_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) { 132 ; CHECK-LABEL: test_mm256_test_epi16_mask: 133 ; CHECK: # %bb.0: # %entry 134 ; CHECK-NEXT: vptestmw %ymm0, %ymm1, %k0 135 ; CHECK-NEXT: kmovd %k0, %eax 136 ; CHECK-NEXT: movzwl %ax, %eax 137 ; CHECK-NEXT: vzeroupper 138 ; CHECK-NEXT: ret{{[l|q]}} 139 entry: 140 %and.i.i = and <4 x i64> %__B, %__A 141 %0 = bitcast <4 x i64> %and.i.i to <16 x i16> 142 %1 = icmp ne <16 x i16> %0, zeroinitializer 143 %2 = bitcast <16 x i1> %1 to i16 144 ret i16 %2 145 } 146 147 define zeroext i16 @test_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 148 ; X86-LABEL: test_mm256_mask_test_epi16_mask: 149 ; X86: # %bb.0: # %entry 150 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 151 ; X86-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1} 152 ; X86-NEXT: kmovd %k0, %eax 153 ; X86-NEXT: movzwl %ax, %eax 154 ; X86-NEXT: vzeroupper 155 ; X86-NEXT: retl 156 ; 157 ; X64-LABEL: test_mm256_mask_test_epi16_mask: 158 ; X64: # %bb.0: # %entry 159 ; X64-NEXT: kmovd %edi, %k1 160 ; X64-NEXT: vptestmw %ymm0, %ymm1, %k0 {%k1} 161 ; X64-NEXT: kmovd %k0, %eax 162 ; X64-NEXT: movzwl %ax, %eax 163 ; X64-NEXT: vzeroupper 164 ; X64-NEXT: retq 165 entry: 166 %and.i.i = and <4 x i64> %__B, %__A 167 %0 = bitcast <4 x i64> %and.i.i to <16 x i16> 168 %1 = icmp ne <16 x i16> %0, zeroinitializer 169 %2 = bitcast i16 %__U to <16 x i1> 170 %3 = and <16 x i1> %1, %2 171 %4 = bitcast <16 x i1> %3 to i16 172 ret i16 %4 173 } 174 175 define zeroext i16 @test_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) { 176 ; CHECK-LABEL: test_mm_testn_epi8_mask: 177 ; CHECK: # %bb.0: # %entry 178 ; CHECK-NEXT: vptestnmb %xmm0, %xmm1, %k0 179 ; CHECK-NEXT: kmovd %k0, %eax 180 ; CHECK-NEXT: movzwl %ax, %eax 181 ; CHECK-NEXT: ret{{[l|q]}} 182 entry: 183 %and.i.i = and <2 x i64> %__B, %__A 184 %0 = bitcast <2 x i64> %and.i.i to <16 x i8> 185 %1 = icmp eq <16 x i8> %0, zeroinitializer 186 %2 = bitcast <16 x i1> %1 to i16 187 ret i16 %2 188 } 189 190 define zeroext i16 @test_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 191 ; X86-LABEL: test_mm_mask_testn_epi8_mask: 192 ; X86: # %bb.0: # %entry 193 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 194 ; X86-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1} 195 ; X86-NEXT: kmovd %k0, %eax 196 ; X86-NEXT: movzwl %ax, %eax 197 ; X86-NEXT: retl 198 ; 199 ; X64-LABEL: test_mm_mask_testn_epi8_mask: 200 ; X64: # %bb.0: # %entry 201 ; X64-NEXT: kmovd %edi, %k1 202 ; X64-NEXT: vptestnmb %xmm0, %xmm1, %k0 {%k1} 203 ; X64-NEXT: kmovd %k0, %eax 204 ; X64-NEXT: movzwl %ax, %eax 205 ; X64-NEXT: retq 206 entry: 207 %and.i.i = and <2 x i64> %__B, %__A 208 %0 = bitcast <2 x i64> %and.i.i to <16 x i8> 209 %1 = icmp eq <16 x i8> %0, zeroinitializer 210 %2 = bitcast i16 %__U to <16 x i1> 211 %3 = and <16 x i1> %1, %2 212 %4 = bitcast <16 x i1> %3 to i16 213 ret i16 %4 214 } 215 216 define i32 @test_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) { 217 ; CHECK-LABEL: test_mm256_testn_epi8_mask: 218 ; CHECK: # %bb.0: # %entry 219 ; CHECK-NEXT: vptestnmb %ymm0, %ymm1, %k0 220 ; CHECK-NEXT: kmovd %k0, %eax 221 ; CHECK-NEXT: vzeroupper 222 ; CHECK-NEXT: ret{{[l|q]}} 223 entry: 224 %and.i.i = and <4 x i64> %__B, %__A 225 %0 = bitcast <4 x i64> %and.i.i to <32 x i8> 226 %1 = icmp eq <32 x i8> %0, zeroinitializer 227 %2 = bitcast <32 x i1> %1 to i32 228 ret i32 %2 229 } 230 231 define i32 @test_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) { 232 ; X86-LABEL: test_mm256_mask_testn_epi8_mask: 233 ; X86: # %bb.0: # %entry 234 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 235 ; X86-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1} 236 ; X86-NEXT: kmovd %k0, %eax 237 ; X86-NEXT: vzeroupper 238 ; X86-NEXT: retl 239 ; 240 ; X64-LABEL: test_mm256_mask_testn_epi8_mask: 241 ; X64: # %bb.0: # %entry 242 ; X64-NEXT: kmovd %edi, %k1 243 ; X64-NEXT: vptestnmb %ymm0, %ymm1, %k0 {%k1} 244 ; X64-NEXT: kmovd %k0, %eax 245 ; X64-NEXT: vzeroupper 246 ; X64-NEXT: retq 247 entry: 248 %and.i.i = and <4 x i64> %__B, %__A 249 %0 = bitcast <4 x i64> %and.i.i to <32 x i8> 250 %1 = icmp eq <32 x i8> %0, zeroinitializer 251 %2 = bitcast i32 %__U to <32 x i1> 252 %3 = and <32 x i1> %1, %2 253 %4 = bitcast <32 x i1> %3 to i32 254 ret i32 %4 255 } 256 257 define zeroext i8 @test_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) { 258 ; CHECK-LABEL: test_mm_testn_epi16_mask: 259 ; CHECK: # %bb.0: # %entry 260 ; CHECK-NEXT: vptestnmw %xmm0, %xmm1, %k0 261 ; CHECK-NEXT: kmovd %k0, %eax 262 ; CHECK-NEXT: movzbl %al, %eax 263 ; CHECK-NEXT: ret{{[l|q]}} 264 entry: 265 %and.i.i = and <2 x i64> %__B, %__A 266 %0 = bitcast <2 x i64> %and.i.i to <8 x i16> 267 %1 = icmp eq <8 x i16> %0, zeroinitializer 268 %2 = bitcast <8 x i1> %1 to i8 269 ret i8 %2 270 } 271 272 define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) { 273 ; X86-LABEL: test_mm_mask_testn_epi16_mask: 274 ; X86: # %bb.0: # %entry 275 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 276 ; X86-NEXT: kmovd %eax, %k1 277 ; X86-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1} 278 ; X86-NEXT: kmovd %k0, %eax 279 ; X86-NEXT: movzbl %al, %eax 280 ; X86-NEXT: retl 281 ; 282 ; X64-LABEL: test_mm_mask_testn_epi16_mask: 283 ; X64: # %bb.0: # %entry 284 ; X64-NEXT: kmovd %edi, %k1 285 ; X64-NEXT: vptestnmw %xmm0, %xmm1, %k0 {%k1} 286 ; X64-NEXT: kmovd %k0, %eax 287 ; X64-NEXT: movzbl %al, %eax 288 ; X64-NEXT: retq 289 entry: 290 %and.i.i = and <2 x i64> %__B, %__A 291 %0 = bitcast <2 x i64> %and.i.i to <8 x i16> 292 %1 = icmp eq <8 x i16> %0, zeroinitializer 293 %2 = bitcast i8 %__U to <8 x i1> 294 %3 = and <8 x i1> %1, %2 295 %4 = bitcast <8 x i1> %3 to i8 296 ret i8 %4 297 } 298 299 define zeroext i16 @test_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) { 300 ; CHECK-LABEL: test_mm256_testn_epi16_mask: 301 ; CHECK: # %bb.0: # %entry 302 ; CHECK-NEXT: vptestnmw %ymm0, %ymm1, %k0 303 ; CHECK-NEXT: kmovd %k0, %eax 304 ; CHECK-NEXT: movzwl %ax, %eax 305 ; CHECK-NEXT: vzeroupper 306 ; CHECK-NEXT: ret{{[l|q]}} 307 entry: 308 %and.i.i = and <4 x i64> %__B, %__A 309 %0 = bitcast <4 x i64> %and.i.i to <16 x i16> 310 %1 = icmp eq <16 x i16> %0, zeroinitializer 311 %2 = bitcast <16 x i1> %1 to i16 312 ret i16 %2 313 } 314 315 define zeroext i16 @test_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) { 316 ; X86-LABEL: test_mm256_mask_testn_epi16_mask: 317 ; X86: # %bb.0: # %entry 318 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 319 ; X86-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1} 320 ; X86-NEXT: kmovd %k0, %eax 321 ; X86-NEXT: movzwl %ax, %eax 322 ; X86-NEXT: vzeroupper 323 ; X86-NEXT: retl 324 ; 325 ; X64-LABEL: test_mm256_mask_testn_epi16_mask: 326 ; X64: # %bb.0: # %entry 327 ; X64-NEXT: kmovd %edi, %k1 328 ; X64-NEXT: vptestnmw %ymm0, %ymm1, %k0 {%k1} 329 ; X64-NEXT: kmovd %k0, %eax 330 ; X64-NEXT: movzwl %ax, %eax 331 ; X64-NEXT: vzeroupper 332 ; X64-NEXT: retq 333 entry: 334 %and.i.i = and <4 x i64> %__B, %__A 335 %0 = bitcast <4 x i64> %and.i.i to <16 x i16> 336 %1 = icmp eq <16 x i16> %0, zeroinitializer 337 %2 = bitcast i16 %__U to <16 x i1> 338 %3 = and <16 x i1> %1, %2 339 %4 = bitcast <16 x i1> %3 to i16 340 ret i16 %4 341 } 342 343 define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 { 344 ; X86-LABEL: test_mm_mask_set1_epi8: 345 ; X86: # %bb.0: # %entry 346 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 347 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 348 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} 349 ; X86-NEXT: retl 350 ; 351 ; X64-LABEL: test_mm_mask_set1_epi8: 352 ; X64: # %bb.0: # %entry 353 ; X64-NEXT: kmovd %edi, %k1 354 ; X64-NEXT: vpbroadcastb %esi, %xmm0 {%k1} 355 ; X64-NEXT: retq 356 entry: 357 %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 358 %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer 359 %0 = bitcast <2 x i64> %__O to <16 x i8> 360 %1 = bitcast i16 %__M to <16 x i1> 361 %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i, <16 x i8> %0 362 %3 = bitcast <16 x i8> %2 to <2 x i64> 363 ret <2 x i64> %3 364 } 365 366 define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A) { 367 ; X86-LABEL: test_mm_maskz_set1_epi8: 368 ; X86: # %bb.0: # %entry 369 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 370 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 371 ; X86-NEXT: vpbroadcastb %eax, %xmm0 {%k1} {z} 372 ; X86-NEXT: retl 373 ; 374 ; X64-LABEL: test_mm_maskz_set1_epi8: 375 ; X64: # %bb.0: # %entry 376 ; X64-NEXT: kmovd %edi, %k1 377 ; X64-NEXT: vpbroadcastb %esi, %xmm0 {%k1} {z} 378 ; X64-NEXT: retq 379 entry: 380 %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0 381 %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer 382 %0 = bitcast i16 %__M to <16 x i1> 383 %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i, <16 x i8> zeroinitializer 384 %2 = bitcast <16 x i8> %1 to <2 x i64> 385 ret <2 x i64> %2 386 } 387 388 define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){ 389 ; X86-LABEL: test_mm256_mask_set1_epi8: 390 ; X86: # %bb.0: # %entry 391 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 392 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 393 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} 394 ; X86-NEXT: retl 395 ; 396 ; X64-LABEL: test_mm256_mask_set1_epi8: 397 ; X64: # %bb.0: # %entry 398 ; X64-NEXT: kmovd %edi, %k1 399 ; X64-NEXT: vpbroadcastb %esi, %ymm0 {%k1} 400 ; X64-NEXT: retq 401 entry: 402 %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 403 %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer 404 %0 = bitcast <4 x i64> %__O to <32 x i8> 405 %1 = bitcast i32 %__M to <32 x i1> 406 %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i, <32 x i8> %0 407 %3 = bitcast <32 x i8> %2 to <4 x i64> 408 ret <4 x i64> %3 409 } 410 411 define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A) { 412 ; X86-LABEL: test_mm256_maskz_set1_epi8: 413 ; X86: # %bb.0: # %entry 414 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 415 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 416 ; X86-NEXT: vpbroadcastb %eax, %ymm0 {%k1} {z} 417 ; X86-NEXT: retl 418 ; 419 ; X64-LABEL: test_mm256_maskz_set1_epi8: 420 ; X64: # %bb.0: # %entry 421 ; X64-NEXT: kmovd %edi, %k1 422 ; X64-NEXT: vpbroadcastb %esi, %ymm0 {%k1} {z} 423 ; X64-NEXT: retq 424 entry: 425 %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0 426 %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer 427 %0 = bitcast i32 %__M to <32 x i1> 428 %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i, <32 x i8> zeroinitializer 429 %2 = bitcast <32 x i8> %1 to <4 x i64> 430 ret <4 x i64> %2 431 } 432 433 define <4 x i64> @test_mm256_mask_set1_epi16(<4 x i64> %__O, i16 zeroext %__M, i16 signext %__A) { 434 ; X86-LABEL: test_mm256_mask_set1_epi16: 435 ; X86: # %bb.0: # %entry 436 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 437 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 438 ; X86-NEXT: vpbroadcastw %eax, %ymm0 {%k1} 439 ; X86-NEXT: retl 440 ; 441 ; X64-LABEL: test_mm256_mask_set1_epi16: 442 ; X64: # %bb.0: # %entry 443 ; X64-NEXT: kmovd %edi, %k1 444 ; X64-NEXT: vpbroadcastw %esi, %ymm0 {%k1} 445 ; X64-NEXT: retq 446 entry: 447 %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 448 %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer 449 %0 = bitcast <4 x i64> %__O to <16 x i16> 450 %1 = bitcast i16 %__M to <16 x i1> 451 %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i, <16 x i16> %0 452 %3 = bitcast <16 x i16> %2 to <4 x i64> 453 ret <4 x i64> %3 454 } 455 456 define <4 x i64> @test_mm256_maskz_set1_epi16(i16 zeroext %__M, i16 signext %__A) { 457 ; X86-LABEL: test_mm256_maskz_set1_epi16: 458 ; X86: # %bb.0: # %entry 459 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 460 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 461 ; X86-NEXT: vpbroadcastw %eax, %ymm0 {%k1} {z} 462 ; X86-NEXT: retl 463 ; 464 ; X64-LABEL: test_mm256_maskz_set1_epi16: 465 ; X64: # %bb.0: # %entry 466 ; X64-NEXT: kmovd %edi, %k1 467 ; X64-NEXT: vpbroadcastw %esi, %ymm0 {%k1} {z} 468 ; X64-NEXT: retq 469 entry: 470 %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0 471 %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer 472 %0 = bitcast i16 %__M to <16 x i1> 473 %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i, <16 x i16> zeroinitializer 474 %2 = bitcast <16 x i16> %1 to <4 x i64> 475 ret <4 x i64> %2 476 } 477 478 define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 signext %__A) { 479 ; X86-LABEL: test_mm_mask_set1_epi16: 480 ; X86: # %bb.0: # %entry 481 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 482 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 483 ; X86-NEXT: kmovd %ecx, %k1 484 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} 485 ; X86-NEXT: retl 486 ; 487 ; X64-LABEL: test_mm_mask_set1_epi16: 488 ; X64: # %bb.0: # %entry 489 ; X64-NEXT: kmovd %edi, %k1 490 ; X64-NEXT: vpbroadcastw %esi, %xmm0 {%k1} 491 ; X64-NEXT: retq 492 entry: 493 %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 494 %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer 495 %0 = bitcast <2 x i64> %__O to <8 x i16> 496 %1 = bitcast i8 %__M to <8 x i1> 497 %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i, <8 x i16> %0 498 %3 = bitcast <8 x i16> %2 to <2 x i64> 499 ret <2 x i64> %3 500 } 501 502 define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) { 503 ; X86-LABEL: test_mm_maskz_set1_epi16: 504 ; X86: # %bb.0: # %entry 505 ; X86-NEXT: movzwl {{[0-9]+}}(%esp), %eax 506 ; X86-NEXT: movb {{[0-9]+}}(%esp), %cl 507 ; X86-NEXT: kmovd %ecx, %k1 508 ; X86-NEXT: vpbroadcastw %eax, %xmm0 {%k1} {z} 509 ; X86-NEXT: retl 510 ; 511 ; X64-LABEL: test_mm_maskz_set1_epi16: 512 ; X64: # %bb.0: # %entry 513 ; X64-NEXT: kmovd %edi, %k1 514 ; X64-NEXT: vpbroadcastw %esi, %xmm0 {%k1} {z} 515 ; X64-NEXT: retq 516 entry: 517 %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0 518 %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer 519 %0 = bitcast i8 %__M to <8 x i1> 520 %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i, <8 x i16> zeroinitializer 521 %2 = bitcast <8 x i16> %1 to <2 x i64> 522 ret <2 x i64> %2 523 } 524 525 526 define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) { 527 ; CHECK-LABEL: test_mm_broadcastb_epi8: 528 ; CHECK: # %bb.0: 529 ; CHECK-NEXT: vpbroadcastb %xmm0, %xmm0 530 ; CHECK-NEXT: ret{{[l|q]}} 531 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 532 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer 533 %res1 = bitcast <16 x i8> %res0 to <2 x i64> 534 ret <2 x i64> %res1 535 } 536 537 define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) { 538 ; X86-LABEL: test_mm_mask_broadcastb_epi8: 539 ; X86: # %bb.0: 540 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 541 ; X86-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 542 ; X86-NEXT: retl 543 ; 544 ; X64-LABEL: test_mm_mask_broadcastb_epi8: 545 ; X64: # %bb.0: 546 ; X64-NEXT: kmovd %edi, %k1 547 ; X64-NEXT: vpbroadcastb %xmm1, %xmm0 {%k1} 548 ; X64-NEXT: retq 549 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 550 %arg1 = bitcast i16 %a1 to <16 x i1> 551 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 552 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer 553 %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0 554 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 555 ret <2 x i64> %res2 556 } 557 558 define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) { 559 ; X86-LABEL: test_mm_maskz_broadcastb_epi8: 560 ; X86: # %bb.0: 561 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 562 ; X86-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 563 ; X86-NEXT: retl 564 ; 565 ; X64-LABEL: test_mm_maskz_broadcastb_epi8: 566 ; X64: # %bb.0: 567 ; X64-NEXT: kmovd %edi, %k1 568 ; X64-NEXT: vpbroadcastb %xmm0, %xmm0 {%k1} {z} 569 ; X64-NEXT: retq 570 %arg0 = bitcast i16 %a0 to <16 x i1> 571 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 572 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer 573 %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer 574 %res2 = bitcast <16 x i8> %res1 to <2 x i64> 575 ret <2 x i64> %res2 576 } 577 578 define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) { 579 ; CHECK-LABEL: test_mm256_broadcastb_epi8: 580 ; CHECK: # %bb.0: 581 ; CHECK-NEXT: vpbroadcastb %xmm0, %ymm0 582 ; CHECK-NEXT: ret{{[l|q]}} 583 %arg0 = bitcast <2 x i64> %a0 to <16 x i8> 584 %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer 585 %res1 = bitcast <32 x i8> %res0 to <4 x i64> 586 ret <4 x i64> %res1 587 } 588 589 define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) { 590 ; X86-LABEL: test_mm256_mask_broadcastb_epi8: 591 ; X86: # %bb.0: 592 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 593 ; X86-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 594 ; X86-NEXT: retl 595 ; 596 ; X64-LABEL: test_mm256_mask_broadcastb_epi8: 597 ; X64: # %bb.0: 598 ; X64-NEXT: kmovd %edi, %k1 599 ; X64-NEXT: vpbroadcastb %xmm1, %ymm0 {%k1} 600 ; X64-NEXT: retq 601 %arg0 = bitcast <4 x i64> %a0 to <32 x i8> 602 %arg1 = bitcast i32 %a1 to <32 x i1> 603 %arg2 = bitcast <2 x i64> %a2 to <16 x i8> 604 %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer 605 %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0 606 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 607 ret <4 x i64> %res2 608 } 609 610 define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) { 611 ; X86-LABEL: test_mm256_maskz_broadcastb_epi8: 612 ; X86: # %bb.0: 613 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 614 ; X86-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 615 ; X86-NEXT: retl 616 ; 617 ; X64-LABEL: test_mm256_maskz_broadcastb_epi8: 618 ; X64: # %bb.0: 619 ; X64-NEXT: kmovd %edi, %k1 620 ; X64-NEXT: vpbroadcastb %xmm0, %ymm0 {%k1} {z} 621 ; X64-NEXT: retq 622 %arg0 = bitcast i32 %a0 to <32 x i1> 623 %arg1 = bitcast <2 x i64> %a1 to <16 x i8> 624 %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer 625 %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer 626 %res2 = bitcast <32 x i8> %res1 to <4 x i64> 627 ret <4 x i64> %res2 628 } 629 630 define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) { 631 ; CHECK-LABEL: test_mm_broadcastw_epi16: 632 ; CHECK: # %bb.0: 633 ; CHECK-NEXT: vpbroadcastw %xmm0, %xmm0 634 ; CHECK-NEXT: ret{{[l|q]}} 635 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 636 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer 637 %res1 = bitcast <8 x i16> %res0 to <2 x i64> 638 ret <2 x i64> %res1 639 } 640 641 define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) { 642 ; X86-LABEL: test_mm_mask_broadcastw_epi16: 643 ; X86: # %bb.0: 644 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 645 ; X86-NEXT: kmovd %eax, %k1 646 ; X86-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 647 ; X86-NEXT: retl 648 ; 649 ; X64-LABEL: test_mm_mask_broadcastw_epi16: 650 ; X64: # %bb.0: 651 ; X64-NEXT: kmovd %edi, %k1 652 ; X64-NEXT: vpbroadcastw %xmm1, %xmm0 {%k1} 653 ; X64-NEXT: retq 654 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 655 %arg1 = bitcast i8 %a1 to <8 x i1> 656 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 657 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer 658 %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0 659 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 660 ret <2 x i64> %res2 661 } 662 663 define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) { 664 ; X86-LABEL: test_mm_maskz_broadcastw_epi16: 665 ; X86: # %bb.0: 666 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 667 ; X86-NEXT: kmovd %eax, %k1 668 ; X86-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 669 ; X86-NEXT: retl 670 ; 671 ; X64-LABEL: test_mm_maskz_broadcastw_epi16: 672 ; X64: # %bb.0: 673 ; X64-NEXT: kmovd %edi, %k1 674 ; X64-NEXT: vpbroadcastw %xmm0, %xmm0 {%k1} {z} 675 ; X64-NEXT: retq 676 %arg0 = bitcast i8 %a0 to <8 x i1> 677 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 678 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer 679 %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer 680 %res2 = bitcast <8 x i16> %res1 to <2 x i64> 681 ret <2 x i64> %res2 682 } 683 684 define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) { 685 ; CHECK-LABEL: test_mm256_broadcastw_epi16: 686 ; CHECK: # %bb.0: 687 ; CHECK-NEXT: vpbroadcastw %xmm0, %ymm0 688 ; CHECK-NEXT: ret{{[l|q]}} 689 %arg0 = bitcast <2 x i64> %a0 to <8 x i16> 690 %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer 691 %res1 = bitcast <16 x i16> %res0 to <4 x i64> 692 ret <4 x i64> %res1 693 } 694 695 define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) { 696 ; X86-LABEL: test_mm256_mask_broadcastw_epi16: 697 ; X86: # %bb.0: 698 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 699 ; X86-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 700 ; X86-NEXT: retl 701 ; 702 ; X64-LABEL: test_mm256_mask_broadcastw_epi16: 703 ; X64: # %bb.0: 704 ; X64-NEXT: kmovd %edi, %k1 705 ; X64-NEXT: vpbroadcastw %xmm1, %ymm0 {%k1} 706 ; X64-NEXT: retq 707 %arg0 = bitcast <4 x i64> %a0 to <16 x i16> 708 %arg1 = bitcast i16 %a1 to <16 x i1> 709 %arg2 = bitcast <2 x i64> %a2 to <8 x i16> 710 %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer 711 %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0 712 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 713 ret <4 x i64> %res2 714 } 715 716 define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) { 717 ; X86-LABEL: test_mm256_maskz_broadcastw_epi16: 718 ; X86: # %bb.0: 719 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 720 ; X86-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 721 ; X86-NEXT: retl 722 ; 723 ; X64-LABEL: test_mm256_maskz_broadcastw_epi16: 724 ; X64: # %bb.0: 725 ; X64-NEXT: kmovd %edi, %k1 726 ; X64-NEXT: vpbroadcastw %xmm0, %ymm0 {%k1} {z} 727 ; X64-NEXT: retq 728 %arg0 = bitcast i16 %a0 to <16 x i1> 729 %arg1 = bitcast <2 x i64> %a1 to <8 x i16> 730 %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer 731 %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer 732 %res2 = bitcast <16 x i16> %res1 to <4 x i64> 733 ret <4 x i64> %res2 734 } 735 736 define <2 x i64> @test_mm_cvtepi16_epi8(<2 x i64> %__A) { 737 ; CHECK-LABEL: test_mm_cvtepi16_epi8: 738 ; CHECK: # %bb.0: # %entry 739 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero 740 ; CHECK-NEXT: ret{{[l|q]}} 741 entry: 742 %0 = bitcast <2 x i64> %__A to <8 x i16> 743 %conv.i = trunc <8 x i16> %0 to <8 x i8> 744 %shuf.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15> 745 %1 = bitcast <16 x i8> %shuf.i to <2 x i64> 746 ret <2 x i64> %1 747 } 748 749 define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) { 750 ; CHECK-LABEL: test_mm256_cvtepi16_epi8: 751 ; CHECK: # %bb.0: # %entry 752 ; CHECK-NEXT: vpmovwb %ymm0, %xmm0 753 ; CHECK-NEXT: vzeroupper 754 ; CHECK-NEXT: ret{{[l|q]}} 755 entry: 756 %0 = bitcast <4 x i64> %__A to <16 x i16> 757 %conv.i = trunc <16 x i16> %0 to <16 x i8> 758 %1 = bitcast <16 x i8> %conv.i to <2 x i64> 759 ret <2 x i64> %1 760 } 761 762 define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) { 763 ; X86-LABEL: test_mm256_mask_cvtepi16_epi8: 764 ; X86: # %bb.0: # %entry 765 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 766 ; X86-NEXT: vpmovwb %ymm1, %xmm0 {%k1} 767 ; X86-NEXT: vzeroupper 768 ; X86-NEXT: retl 769 ; 770 ; X64-LABEL: test_mm256_mask_cvtepi16_epi8: 771 ; X64: # %bb.0: # %entry 772 ; X64-NEXT: kmovd %edi, %k1 773 ; X64-NEXT: vpmovwb %ymm1, %xmm0 {%k1} 774 ; X64-NEXT: vzeroupper 775 ; X64-NEXT: retq 776 entry: 777 %0 = bitcast <4 x i64> %__A to <16 x i16> 778 %conv.i.i = trunc <16 x i16> %0 to <16 x i8> 779 %1 = bitcast <2 x i64> %__O to <16 x i8> 780 %2 = bitcast i16 %__M to <16 x i1> 781 %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1 782 %4 = bitcast <16 x i8> %3 to <2 x i64> 783 ret <2 x i64> %4 784 } 785 786 define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) { 787 ; X86-LABEL: test_mm256_maskz_cvtepi16_epi8: 788 ; X86: # %bb.0: # %entry 789 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 790 ; X86-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} 791 ; X86-NEXT: vzeroupper 792 ; X86-NEXT: retl 793 ; 794 ; X64-LABEL: test_mm256_maskz_cvtepi16_epi8: 795 ; X64: # %bb.0: # %entry 796 ; X64-NEXT: kmovd %edi, %k1 797 ; X64-NEXT: vpmovwb %ymm0, %xmm0 {%k1} {z} 798 ; X64-NEXT: vzeroupper 799 ; X64-NEXT: retq 800 entry: 801 %0 = bitcast <4 x i64> %__A to <16 x i16> 802 %conv.i.i = trunc <16 x i16> %0 to <16 x i8> 803 %1 = bitcast i16 %__M to <16 x i1> 804 %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer 805 %3 = bitcast <16 x i8> %2 to <2 x i64> 806 ret <2 x i64> %3 807 } 808 809 define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) { 810 ; X86-LABEL: test_mm_mask2_permutex2var_epi16: 811 ; X86: # %bb.0: # %entry 812 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 813 ; X86-NEXT: kmovd %eax, %k1 814 ; X86-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} 815 ; X86-NEXT: vmovdqa %xmm1, %xmm0 816 ; X86-NEXT: retl 817 ; 818 ; X64-LABEL: test_mm_mask2_permutex2var_epi16: 819 ; X64: # %bb.0: # %entry 820 ; X64-NEXT: kmovd %edi, %k1 821 ; X64-NEXT: vpermi2w %xmm2, %xmm0, %xmm1 {%k1} 822 ; X64-NEXT: vmovdqa %xmm1, %xmm0 823 ; X64-NEXT: retq 824 entry: 825 %0 = bitcast <2 x i64> %__A to <8 x i16> 826 %1 = bitcast <2 x i64> %__I to <8 x i16> 827 %2 = bitcast <2 x i64> %__B to <8 x i16> 828 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 829 %4 = bitcast i8 %__U to <8 x i1> 830 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1 831 %6 = bitcast <8 x i16> %5 to <2 x i64> 832 ret <2 x i64> %6 833 } 834 835 define <4 x i64> @test_mm256_mask2_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, i16 zeroext %__U, <4 x i64> %__B) { 836 ; X86-LABEL: test_mm256_mask2_permutex2var_epi16: 837 ; X86: # %bb.0: # %entry 838 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 839 ; X86-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} 840 ; X86-NEXT: vmovdqa %ymm1, %ymm0 841 ; X86-NEXT: retl 842 ; 843 ; X64-LABEL: test_mm256_mask2_permutex2var_epi16: 844 ; X64: # %bb.0: # %entry 845 ; X64-NEXT: kmovd %edi, %k1 846 ; X64-NEXT: vpermi2w %ymm2, %ymm0, %ymm1 {%k1} 847 ; X64-NEXT: vmovdqa %ymm1, %ymm0 848 ; X64-NEXT: retq 849 entry: 850 %0 = bitcast <4 x i64> %__A to <16 x i16> 851 %1 = bitcast <4 x i64> %__I to <16 x i16> 852 %2 = bitcast <4 x i64> %__B to <16 x i16> 853 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 854 %4 = bitcast i16 %__U to <16 x i1> 855 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1 856 %6 = bitcast <16 x i16> %5 to <4 x i64> 857 ret <4 x i64> %6 858 } 859 860 define <2 x i64> @test_mm_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 861 ; CHECK-LABEL: test_mm_permutex2var_epi16: 862 ; CHECK: # %bb.0: # %entry 863 ; CHECK-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 864 ; CHECK-NEXT: ret{{[l|q]}} 865 entry: 866 %0 = bitcast <2 x i64> %__A to <8 x i16> 867 %1 = bitcast <2 x i64> %__I to <8 x i16> 868 %2 = bitcast <2 x i64> %__B to <8 x i16> 869 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 870 %4 = bitcast <8 x i16> %3 to <2 x i64> 871 ret <2 x i64> %4 872 } 873 874 define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) { 875 ; X86-LABEL: test_mm_mask_permutex2var_epi16: 876 ; X86: # %bb.0: # %entry 877 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 878 ; X86-NEXT: kmovd %eax, %k1 879 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} 880 ; X86-NEXT: retl 881 ; 882 ; X64-LABEL: test_mm_mask_permutex2var_epi16: 883 ; X64: # %bb.0: # %entry 884 ; X64-NEXT: kmovd %edi, %k1 885 ; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} 886 ; X64-NEXT: retq 887 entry: 888 %0 = bitcast <2 x i64> %__A to <8 x i16> 889 %1 = bitcast <2 x i64> %__I to <8 x i16> 890 %2 = bitcast <2 x i64> %__B to <8 x i16> 891 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 892 %4 = bitcast i8 %__U to <8 x i1> 893 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0 894 %6 = bitcast <8 x i16> %5 to <2 x i64> 895 ret <2 x i64> %6 896 } 897 898 define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) { 899 ; X86-LABEL: test_mm_maskz_permutex2var_epi16: 900 ; X86: # %bb.0: # %entry 901 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 902 ; X86-NEXT: kmovd %eax, %k1 903 ; X86-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z} 904 ; X86-NEXT: retl 905 ; 906 ; X64-LABEL: test_mm_maskz_permutex2var_epi16: 907 ; X64: # %bb.0: # %entry 908 ; X64-NEXT: kmovd %edi, %k1 909 ; X64-NEXT: vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z} 910 ; X64-NEXT: retq 911 entry: 912 %0 = bitcast <2 x i64> %__A to <8 x i16> 913 %1 = bitcast <2 x i64> %__I to <8 x i16> 914 %2 = bitcast <2 x i64> %__B to <8 x i16> 915 %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2) 916 %4 = bitcast i8 %__U to <8 x i1> 917 %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer 918 %6 = bitcast <8 x i16> %5 to <2 x i64> 919 ret <2 x i64> %6 920 } 921 922 define <4 x i64> @test_mm256_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 923 ; CHECK-LABEL: test_mm256_permutex2var_epi16: 924 ; CHECK: # %bb.0: # %entry 925 ; CHECK-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 926 ; CHECK-NEXT: ret{{[l|q]}} 927 entry: 928 %0 = bitcast <4 x i64> %__A to <16 x i16> 929 %1 = bitcast <4 x i64> %__I to <16 x i16> 930 %2 = bitcast <4 x i64> %__B to <16 x i16> 931 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 932 %4 = bitcast <16 x i16> %3 to <4 x i64> 933 ret <4 x i64> %4 934 } 935 936 define <4 x i64> @test_mm256_mask_permutex2var_epi16(<4 x i64> %__A, i16 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) { 937 ; X86-LABEL: test_mm256_mask_permutex2var_epi16: 938 ; X86: # %bb.0: # %entry 939 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 940 ; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} 941 ; X86-NEXT: retl 942 ; 943 ; X64-LABEL: test_mm256_mask_permutex2var_epi16: 944 ; X64: # %bb.0: # %entry 945 ; X64-NEXT: kmovd %edi, %k1 946 ; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} 947 ; X64-NEXT: retq 948 entry: 949 %0 = bitcast <4 x i64> %__A to <16 x i16> 950 %1 = bitcast <4 x i64> %__I to <16 x i16> 951 %2 = bitcast <4 x i64> %__B to <16 x i16> 952 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 953 %4 = bitcast i16 %__U to <16 x i1> 954 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0 955 %6 = bitcast <16 x i16> %5 to <4 x i64> 956 ret <4 x i64> %6 957 } 958 959 define <4 x i64> @test_mm256_maskz_permutex2var_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) { 960 ; X86-LABEL: test_mm256_maskz_permutex2var_epi16: 961 ; X86: # %bb.0: # %entry 962 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 963 ; X86-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z} 964 ; X86-NEXT: retl 965 ; 966 ; X64-LABEL: test_mm256_maskz_permutex2var_epi16: 967 ; X64: # %bb.0: # %entry 968 ; X64-NEXT: kmovd %edi, %k1 969 ; X64-NEXT: vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z} 970 ; X64-NEXT: retq 971 entry: 972 %0 = bitcast <4 x i64> %__A to <16 x i16> 973 %1 = bitcast <4 x i64> %__I to <16 x i16> 974 %2 = bitcast <4 x i64> %__B to <16 x i16> 975 %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2) 976 %4 = bitcast i16 %__U to <16 x i1> 977 %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer 978 %6 = bitcast <16 x i16> %5 to <4 x i64> 979 ret <4 x i64> %6 980 } 981 982 declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>) 983 declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>) 984 985 !0 = !{i32 1} 986 987