1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86 3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64 4 5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c 6 7 define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 8 ; X86-LABEL: test_mm512_mask_compress_epi16: 9 ; X86: # %bb.0: # %entry 10 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 11 ; X86-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 12 ; X86-NEXT: retl 13 ; 14 ; X64-LABEL: test_mm512_mask_compress_epi16: 15 ; X64: # %bb.0: # %entry 16 ; X64-NEXT: kmovd %edi, %k1 17 ; X64-NEXT: vpcompressw %zmm1, %zmm0 {%k1} 18 ; X64-NEXT: retq 19 entry: 20 %0 = bitcast <8 x i64> %__D to <32 x i16> 21 %1 = bitcast <8 x i64> %__S to <32 x i16> 22 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 23 %3 = bitcast <32 x i16> %2 to <8 x i64> 24 ret <8 x i64> %3 25 } 26 27 define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) { 28 ; X86-LABEL: test_mm512_maskz_compress_epi16: 29 ; X86: # %bb.0: # %entry 30 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 31 ; X86-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 32 ; X86-NEXT: retl 33 ; 34 ; X64-LABEL: test_mm512_maskz_compress_epi16: 35 ; X64: # %bb.0: # %entry 36 ; X64-NEXT: kmovd %edi, %k1 37 ; X64-NEXT: vpcompressw %zmm0, %zmm0 {%k1} {z} 38 ; X64-NEXT: retq 39 entry: 40 %0 = bitcast <8 x i64> %__D to <32 x i16> 41 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 42 %2 = bitcast <32 x i16> %1 to <8 x i64> 43 ret <8 x i64> %2 44 } 45 46 define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 47 ; X86-LABEL: test_mm512_mask_compress_epi8: 48 ; X86: # %bb.0: # %entry 49 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 50 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 51 ; X86-NEXT: kunpckdq %k1, %k0, %k1 52 ; X86-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 53 ; X86-NEXT: retl 54 ; 55 ; X64-LABEL: test_mm512_mask_compress_epi8: 56 ; X64: # %bb.0: # %entry 57 ; X64-NEXT: kmovq %rdi, %k1 58 ; X64-NEXT: vpcompressb %zmm1, %zmm0 {%k1} 59 ; X64-NEXT: retq 60 entry: 61 %0 = bitcast <8 x i64> %__D to <64 x i8> 62 %1 = bitcast <8 x i64> %__S to <64 x i8> 63 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 64 %3 = bitcast <64 x i8> %2 to <8 x i64> 65 ret <8 x i64> %3 66 } 67 68 define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) { 69 ; X86-LABEL: test_mm512_maskz_compress_epi8: 70 ; X86: # %bb.0: # %entry 71 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 72 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 73 ; X86-NEXT: kunpckdq %k1, %k0, %k1 74 ; X86-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 75 ; X86-NEXT: retl 76 ; 77 ; X64-LABEL: test_mm512_maskz_compress_epi8: 78 ; X64: # %bb.0: # %entry 79 ; X64-NEXT: kmovq %rdi, %k1 80 ; X64-NEXT: vpcompressb %zmm0, %zmm0 {%k1} {z} 81 ; X64-NEXT: retq 82 entry: 83 %0 = bitcast <8 x i64> %__D to <64 x i8> 84 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 85 %2 = bitcast <64 x i8> %1 to <8 x i64> 86 ret <8 x i64> %2 87 } 88 89 define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64> %__D) { 90 ; X86-LABEL: test_mm512_mask_compressstoreu_epi16: 91 ; X86: # %bb.0: # %entry 92 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 93 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 94 ; X86-NEXT: vpcompressw %zmm0, (%eax) {%k1} 95 ; X86-NEXT: vzeroupper 96 ; X86-NEXT: retl 97 ; 98 ; X64-LABEL: test_mm512_mask_compressstoreu_epi16: 99 ; X64: # %bb.0: # %entry 100 ; X64-NEXT: kmovd %esi, %k1 101 ; X64-NEXT: vpcompressw %zmm0, (%rdi) {%k1} 102 ; X64-NEXT: vzeroupper 103 ; X64-NEXT: retq 104 entry: 105 %0 = bitcast <8 x i64> %__D to <32 x i16> 106 %1 = bitcast i8* %__P to i16* 107 %2 = bitcast i32 %__U to <32 x i1> 108 tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2) 109 ret void 110 } 111 112 define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %__D) { 113 ; X86-LABEL: test_mm512_mask_compressstoreu_epi8: 114 ; X86: # %bb.0: # %entry 115 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 116 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 117 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 118 ; X86-NEXT: kunpckdq %k1, %k0, %k1 119 ; X86-NEXT: vpcompressb %zmm0, (%eax) {%k1} 120 ; X86-NEXT: vzeroupper 121 ; X86-NEXT: retl 122 ; 123 ; X64-LABEL: test_mm512_mask_compressstoreu_epi8: 124 ; X64: # %bb.0: # %entry 125 ; X64-NEXT: kmovq %rsi, %k1 126 ; X64-NEXT: vpcompressb %zmm0, (%rdi) {%k1} 127 ; X64-NEXT: vzeroupper 128 ; X64-NEXT: retq 129 entry: 130 %0 = bitcast <8 x i64> %__D to <64 x i8> 131 %1 = bitcast i64 %__U to <64 x i1> 132 tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1) 133 ret void 134 } 135 136 define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) { 137 ; X86-LABEL: test_mm512_mask_expand_epi16: 138 ; X86: # %bb.0: # %entry 139 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 140 ; X86-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 141 ; X86-NEXT: retl 142 ; 143 ; X64-LABEL: test_mm512_mask_expand_epi16: 144 ; X64: # %bb.0: # %entry 145 ; X64-NEXT: kmovd %edi, %k1 146 ; X64-NEXT: vpexpandw %zmm1, %zmm0 {%k1} 147 ; X64-NEXT: retq 148 entry: 149 %0 = bitcast <8 x i64> %__D to <32 x i16> 150 %1 = bitcast <8 x i64> %__S to <32 x i16> 151 %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U) 152 %3 = bitcast <32 x i16> %2 to <8 x i64> 153 ret <8 x i64> %3 154 } 155 156 define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) { 157 ; X86-LABEL: test_mm512_maskz_expand_epi16: 158 ; X86: # %bb.0: # %entry 159 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 160 ; X86-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 161 ; X86-NEXT: retl 162 ; 163 ; X64-LABEL: test_mm512_maskz_expand_epi16: 164 ; X64: # %bb.0: # %entry 165 ; X64-NEXT: kmovd %edi, %k1 166 ; X64-NEXT: vpexpandw %zmm0, %zmm0 {%k1} {z} 167 ; X64-NEXT: retq 168 entry: 169 %0 = bitcast <8 x i64> %__D to <32 x i16> 170 %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U) 171 %2 = bitcast <32 x i16> %1 to <8 x i64> 172 ret <8 x i64> %2 173 } 174 175 define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) { 176 ; X86-LABEL: test_mm512_mask_expand_epi8: 177 ; X86: # %bb.0: # %entry 178 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 179 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 180 ; X86-NEXT: kunpckdq %k1, %k0, %k1 181 ; X86-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 182 ; X86-NEXT: retl 183 ; 184 ; X64-LABEL: test_mm512_mask_expand_epi8: 185 ; X64: # %bb.0: # %entry 186 ; X64-NEXT: kmovq %rdi, %k1 187 ; X64-NEXT: vpexpandb %zmm1, %zmm0 {%k1} 188 ; X64-NEXT: retq 189 entry: 190 %0 = bitcast <8 x i64> %__D to <64 x i8> 191 %1 = bitcast <8 x i64> %__S to <64 x i8> 192 %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U) 193 %3 = bitcast <64 x i8> %2 to <8 x i64> 194 ret <8 x i64> %3 195 } 196 197 define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) { 198 ; X86-LABEL: test_mm512_maskz_expand_epi8: 199 ; X86: # %bb.0: # %entry 200 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 201 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 202 ; X86-NEXT: kunpckdq %k1, %k0, %k1 203 ; X86-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 204 ; X86-NEXT: retl 205 ; 206 ; X64-LABEL: test_mm512_maskz_expand_epi8: 207 ; X64: # %bb.0: # %entry 208 ; X64-NEXT: kmovq %rdi, %k1 209 ; X64-NEXT: vpexpandb %zmm0, %zmm0 {%k1} {z} 210 ; X64-NEXT: retq 211 entry: 212 %0 = bitcast <8 x i64> %__D to <64 x i8> 213 %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U) 214 %2 = bitcast <64 x i8> %1 to <8 x i64> 215 ret <8 x i64> %2 216 } 217 218 define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8* readonly %__P) { 219 ; X86-LABEL: test_mm512_mask_expandloadu_epi16: 220 ; X86: # %bb.0: # %entry 221 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 222 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 223 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} 224 ; X86-NEXT: retl 225 ; 226 ; X64-LABEL: test_mm512_mask_expandloadu_epi16: 227 ; X64: # %bb.0: # %entry 228 ; X64-NEXT: kmovd %edi, %k1 229 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} 230 ; X64-NEXT: retq 231 entry: 232 %0 = bitcast <8 x i64> %__S to <32 x i16> 233 %1 = bitcast i8* %__P to i16* 234 %2 = bitcast i32 %__U to <32 x i1> 235 %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0) 236 %4 = bitcast <32 x i16> %3 to <8 x i64> 237 ret <8 x i64> %4 238 } 239 240 define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) { 241 ; X86-LABEL: test_mm512_maskz_expandloadu_epi16: 242 ; X86: # %bb.0: # %entry 243 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 244 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 245 ; X86-NEXT: vpexpandw (%eax), %zmm0 {%k1} {z} 246 ; X86-NEXT: retl 247 ; 248 ; X64-LABEL: test_mm512_maskz_expandloadu_epi16: 249 ; X64: # %bb.0: # %entry 250 ; X64-NEXT: kmovd %edi, %k1 251 ; X64-NEXT: vpexpandw (%rsi), %zmm0 {%k1} {z} 252 ; X64-NEXT: retq 253 entry: 254 %0 = bitcast i8* %__P to i16* 255 %1 = bitcast i32 %__U to <32 x i1> 256 %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer) 257 %3 = bitcast <32 x i16> %2 to <8 x i64> 258 ret <8 x i64> %3 259 } 260 261 define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) { 262 ; X86-LABEL: test_mm512_mask_expandloadu_epi8: 263 ; X86: # %bb.0: # %entry 264 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 265 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 266 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 267 ; X86-NEXT: kunpckdq %k1, %k0, %k1 268 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} 269 ; X86-NEXT: retl 270 ; 271 ; X64-LABEL: test_mm512_mask_expandloadu_epi8: 272 ; X64: # %bb.0: # %entry 273 ; X64-NEXT: kmovq %rdi, %k1 274 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} 275 ; X64-NEXT: retq 276 entry: 277 %0 = bitcast <8 x i64> %__S to <64 x i8> 278 %1 = bitcast i64 %__U to <64 x i1> 279 %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0) 280 %3 = bitcast <64 x i8> %2 to <8 x i64> 281 ret <8 x i64> %3 282 } 283 284 define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) { 285 ; X86-LABEL: test_mm512_maskz_expandloadu_epi8: 286 ; X86: # %bb.0: # %entry 287 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k0 288 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 289 ; X86-NEXT: movl {{[0-9]+}}(%esp), %eax 290 ; X86-NEXT: kunpckdq %k1, %k0, %k1 291 ; X86-NEXT: vpexpandb (%eax), %zmm0 {%k1} {z} 292 ; X86-NEXT: retl 293 ; 294 ; X64-LABEL: test_mm512_maskz_expandloadu_epi8: 295 ; X64: # %bb.0: # %entry 296 ; X64-NEXT: kmovq %rdi, %k1 297 ; X64-NEXT: vpexpandb (%rsi), %zmm0 {%k1} {z} 298 ; X64-NEXT: retq 299 entry: 300 %0 = bitcast i64 %__U to <64 x i1> 301 %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer) 302 %2 = bitcast <64 x i8> %1 to <8 x i64> 303 ret <8 x i64> %2 304 } 305 306 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 307 ; X86-LABEL: test_mm512_mask_shldi_epi64: 308 ; X86: # %bb.0: # %entry 309 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 310 ; X86-NEXT: kmovd %eax, %k1 311 ; X86-NEXT: vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1} 312 ; X86-NEXT: retl 313 ; 314 ; X64-LABEL: test_mm512_mask_shldi_epi64: 315 ; X64: # %bb.0: # %entry 316 ; X64-NEXT: kmovd %edi, %k1 317 ; X64-NEXT: vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1} 318 ; X64-NEXT: retq 319 entry: 320 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127) 321 %1 = bitcast i8 %__U to <8 x i1> 322 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 323 ret <8 x i64> %2 324 } 325 326 declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32) 327 328 define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 329 ; X86-LABEL: test_mm512_maskz_shldi_epi64: 330 ; X86: # %bb.0: # %entry 331 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 332 ; X86-NEXT: kmovd %eax, %k1 333 ; X86-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 334 ; X86-NEXT: retl 335 ; 336 ; X64-LABEL: test_mm512_maskz_shldi_epi64: 337 ; X64: # %bb.0: # %entry 338 ; X64-NEXT: kmovd %edi, %k1 339 ; X64-NEXT: vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 340 ; X64-NEXT: retq 341 entry: 342 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 63) 343 %1 = bitcast i8 %__U to <8 x i1> 344 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 345 ret <8 x i64> %2 346 } 347 348 define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 349 ; CHECK-LABEL: test_mm512_shldi_epi64: 350 ; CHECK: # %bb.0: # %entry 351 ; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0 352 ; CHECK-NEXT: ret{{[l|q]}} 353 entry: 354 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 31) 355 ret <8 x i64> %0 356 } 357 358 define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 359 ; X86-LABEL: test_mm512_mask_shldi_epi32: 360 ; X86: # %bb.0: # %entry 361 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 362 ; X86-NEXT: vpshldd $127, %zmm2, %zmm1, %zmm0 {%k1} 363 ; X86-NEXT: retl 364 ; 365 ; X64-LABEL: test_mm512_mask_shldi_epi32: 366 ; X64: # %bb.0: # %entry 367 ; X64-NEXT: kmovd %edi, %k1 368 ; X64-NEXT: vpshldd $127, %zmm2, %zmm1, %zmm0 {%k1} 369 ; X64-NEXT: retq 370 entry: 371 %0 = bitcast <8 x i64> %__A to <16 x i32> 372 %1 = bitcast <8 x i64> %__B to <16 x i32> 373 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 127) 374 %3 = bitcast <8 x i64> %__S to <16 x i32> 375 %4 = bitcast i16 %__U to <16 x i1> 376 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 377 %6 = bitcast <16 x i32> %5 to <8 x i64> 378 ret <8 x i64> %6 379 } 380 381 declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32) 382 383 define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 384 ; X86-LABEL: test_mm512_maskz_shldi_epi32: 385 ; X86: # %bb.0: # %entry 386 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 387 ; X86-NEXT: vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 388 ; X86-NEXT: retl 389 ; 390 ; X64-LABEL: test_mm512_maskz_shldi_epi32: 391 ; X64: # %bb.0: # %entry 392 ; X64-NEXT: kmovd %edi, %k1 393 ; X64-NEXT: vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 394 ; X64-NEXT: retq 395 entry: 396 %0 = bitcast <8 x i64> %__A to <16 x i32> 397 %1 = bitcast <8 x i64> %__B to <16 x i32> 398 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 63) 399 %3 = bitcast i16 %__U to <16 x i1> 400 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 401 %5 = bitcast <16 x i32> %4 to <8 x i64> 402 ret <8 x i64> %5 403 } 404 405 define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 406 ; CHECK-LABEL: test_mm512_shldi_epi32: 407 ; CHECK: # %bb.0: # %entry 408 ; CHECK-NEXT: vpshldd $31, %zmm1, %zmm0, %zmm0 409 ; CHECK-NEXT: ret{{[l|q]}} 410 entry: 411 %0 = bitcast <8 x i64> %__A to <16 x i32> 412 %1 = bitcast <8 x i64> %__B to <16 x i32> 413 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 31) 414 %3 = bitcast <16 x i32> %2 to <8 x i64> 415 ret <8 x i64> %3 416 } 417 418 define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 419 ; X86-LABEL: test_mm512_mask_shldi_epi16: 420 ; X86: # %bb.0: # %entry 421 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 422 ; X86-NEXT: vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1} 423 ; X86-NEXT: retl 424 ; 425 ; X64-LABEL: test_mm512_mask_shldi_epi16: 426 ; X64: # %bb.0: # %entry 427 ; X64-NEXT: kmovd %edi, %k1 428 ; X64-NEXT: vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1} 429 ; X64-NEXT: retq 430 entry: 431 %0 = bitcast <8 x i64> %__A to <32 x i16> 432 %1 = bitcast <8 x i64> %__B to <32 x i16> 433 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 127) 434 %3 = bitcast <8 x i64> %__S to <32 x i16> 435 %4 = bitcast i32 %__U to <32 x i1> 436 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 437 %6 = bitcast <32 x i16> %5 to <8 x i64> 438 ret <8 x i64> %6 439 } 440 441 declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32) 442 443 define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 444 ; X86-LABEL: test_mm512_maskz_shldi_epi16: 445 ; X86: # %bb.0: # %entry 446 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 447 ; X86-NEXT: vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 448 ; X86-NEXT: retl 449 ; 450 ; X64-LABEL: test_mm512_maskz_shldi_epi16: 451 ; X64: # %bb.0: # %entry 452 ; X64-NEXT: kmovd %edi, %k1 453 ; X64-NEXT: vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 454 ; X64-NEXT: retq 455 entry: 456 %0 = bitcast <8 x i64> %__A to <32 x i16> 457 %1 = bitcast <8 x i64> %__B to <32 x i16> 458 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 63) 459 %3 = bitcast i32 %__U to <32 x i1> 460 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 461 %5 = bitcast <32 x i16> %4 to <8 x i64> 462 ret <8 x i64> %5 463 } 464 465 define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 466 ; CHECK-LABEL: test_mm512_shldi_epi16: 467 ; CHECK: # %bb.0: # %entry 468 ; CHECK-NEXT: vpshldw $31, %zmm1, %zmm0, %zmm0 469 ; CHECK-NEXT: ret{{[l|q]}} 470 entry: 471 %0 = bitcast <8 x i64> %__A to <32 x i16> 472 %1 = bitcast <8 x i64> %__B to <32 x i16> 473 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 31) 474 %3 = bitcast <32 x i16> %2 to <8 x i64> 475 ret <8 x i64> %3 476 } 477 478 define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 479 ; X86-LABEL: test_mm512_mask_shrdi_epi64: 480 ; X86: # %bb.0: # %entry 481 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 482 ; X86-NEXT: kmovd %eax, %k1 483 ; X86-NEXT: vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1} 484 ; X86-NEXT: retl 485 ; 486 ; X64-LABEL: test_mm512_mask_shrdi_epi64: 487 ; X64: # %bb.0: # %entry 488 ; X64-NEXT: kmovd %edi, %k1 489 ; X64-NEXT: vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1} 490 ; X64-NEXT: retq 491 entry: 492 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127) 493 %1 = bitcast i8 %__U to <8 x i1> 494 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S 495 ret <8 x i64> %2 496 } 497 498 declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32) 499 500 define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 501 ; X86-LABEL: test_mm512_maskz_shrdi_epi64: 502 ; X86: # %bb.0: # %entry 503 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 504 ; X86-NEXT: kmovd %eax, %k1 505 ; X86-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 506 ; X86-NEXT: retl 507 ; 508 ; X64-LABEL: test_mm512_maskz_shrdi_epi64: 509 ; X64: # %bb.0: # %entry 510 ; X64-NEXT: kmovd %edi, %k1 511 ; X64-NEXT: vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 512 ; X64-NEXT: retq 513 entry: 514 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 63) 515 %1 = bitcast i8 %__U to <8 x i1> 516 %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer 517 ret <8 x i64> %2 518 } 519 520 define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) { 521 ; CHECK-LABEL: test_mm512_shrdi_epi64: 522 ; CHECK: # %bb.0: # %entry 523 ; CHECK-NEXT: vpshrdq $31, %zmm1, %zmm0, %zmm0 524 ; CHECK-NEXT: ret{{[l|q]}} 525 entry: 526 %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 31) 527 ret <8 x i64> %0 528 } 529 530 define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 531 ; X86-LABEL: test_mm512_mask_shrdi_epi32: 532 ; X86: # %bb.0: # %entry 533 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 534 ; X86-NEXT: vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1} 535 ; X86-NEXT: retl 536 ; 537 ; X64-LABEL: test_mm512_mask_shrdi_epi32: 538 ; X64: # %bb.0: # %entry 539 ; X64-NEXT: kmovd %edi, %k1 540 ; X64-NEXT: vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1} 541 ; X64-NEXT: retq 542 entry: 543 %0 = bitcast <8 x i64> %__A to <16 x i32> 544 %1 = bitcast <8 x i64> %__B to <16 x i32> 545 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 127) 546 %3 = bitcast <8 x i64> %__S to <16 x i32> 547 %4 = bitcast i16 %__U to <16 x i1> 548 %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3 549 %6 = bitcast <16 x i32> %5 to <8 x i64> 550 ret <8 x i64> %6 551 } 552 553 declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32) 554 555 define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 556 ; X86-LABEL: test_mm512_maskz_shrdi_epi32: 557 ; X86: # %bb.0: # %entry 558 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 559 ; X86-NEXT: vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 560 ; X86-NEXT: retl 561 ; 562 ; X64-LABEL: test_mm512_maskz_shrdi_epi32: 563 ; X64: # %bb.0: # %entry 564 ; X64-NEXT: kmovd %edi, %k1 565 ; X64-NEXT: vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 566 ; X64-NEXT: retq 567 entry: 568 %0 = bitcast <8 x i64> %__A to <16 x i32> 569 %1 = bitcast <8 x i64> %__B to <16 x i32> 570 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 63) 571 %3 = bitcast i16 %__U to <16 x i1> 572 %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer 573 %5 = bitcast <16 x i32> %4 to <8 x i64> 574 ret <8 x i64> %5 575 } 576 577 define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) { 578 ; CHECK-LABEL: test_mm512_shrdi_epi32: 579 ; CHECK: # %bb.0: # %entry 580 ; CHECK-NEXT: vpshrdd $31, %zmm1, %zmm0, %zmm0 581 ; CHECK-NEXT: ret{{[l|q]}} 582 entry: 583 %0 = bitcast <8 x i64> %__A to <16 x i32> 584 %1 = bitcast <8 x i64> %__B to <16 x i32> 585 %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 31) 586 %3 = bitcast <16 x i32> %2 to <8 x i64> 587 ret <8 x i64> %3 588 } 589 590 define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 591 ; X86-LABEL: test_mm512_mask_shrdi_epi16: 592 ; X86: # %bb.0: # %entry 593 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 594 ; X86-NEXT: vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1} 595 ; X86-NEXT: retl 596 ; 597 ; X64-LABEL: test_mm512_mask_shrdi_epi16: 598 ; X64: # %bb.0: # %entry 599 ; X64-NEXT: kmovd %edi, %k1 600 ; X64-NEXT: vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1} 601 ; X64-NEXT: retq 602 entry: 603 %0 = bitcast <8 x i64> %__A to <32 x i16> 604 %1 = bitcast <8 x i64> %__B to <32 x i16> 605 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 127) 606 %3 = bitcast <8 x i64> %__S to <32 x i16> 607 %4 = bitcast i32 %__U to <32 x i1> 608 %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3 609 %6 = bitcast <32 x i16> %5 to <8 x i64> 610 ret <8 x i64> %6 611 } 612 613 declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32) 614 615 define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 616 ; X86-LABEL: test_mm512_maskz_shrdi_epi16: 617 ; X86: # %bb.0: # %entry 618 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 619 ; X86-NEXT: vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 620 ; X86-NEXT: retl 621 ; 622 ; X64-LABEL: test_mm512_maskz_shrdi_epi16: 623 ; X64: # %bb.0: # %entry 624 ; X64-NEXT: kmovd %edi, %k1 625 ; X64-NEXT: vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z} 626 ; X64-NEXT: retq 627 entry: 628 %0 = bitcast <8 x i64> %__A to <32 x i16> 629 %1 = bitcast <8 x i64> %__B to <32 x i16> 630 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 63) 631 %3 = bitcast i32 %__U to <32 x i1> 632 %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer 633 %5 = bitcast <32 x i16> %4 to <8 x i64> 634 ret <8 x i64> %5 635 } 636 637 define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) { 638 ; CHECK-LABEL: test_mm512_shrdi_epi16: 639 ; CHECK: # %bb.0: # %entry 640 ; CHECK-NEXT: vpshrdw $31, %zmm1, %zmm0, %zmm0 641 ; CHECK-NEXT: ret{{[l|q]}} 642 entry: 643 %0 = bitcast <8 x i64> %__A to <32 x i16> 644 %1 = bitcast <8 x i64> %__B to <32 x i16> 645 %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 31) 646 %3 = bitcast <32 x i16> %2 to <8 x i64> 647 ret <8 x i64> %3 648 } 649 650 define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 651 ; X86-LABEL: test_mm512_mask_shldv_epi64: 652 ; X86: # %bb.0: # %entry 653 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 654 ; X86-NEXT: kmovd %eax, %k1 655 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 656 ; X86-NEXT: retl 657 ; 658 ; X64-LABEL: test_mm512_mask_shldv_epi64: 659 ; X64: # %bb.0: # %entry 660 ; X64-NEXT: kmovd %edi, %k1 661 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} 662 ; X64-NEXT: retq 663 entry: 664 %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U) 665 ret <8 x i64> %0 666 } 667 668 define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 669 ; X86-LABEL: test_mm512_maskz_shldv_epi64: 670 ; X86: # %bb.0: # %entry 671 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 672 ; X86-NEXT: kmovd %eax, %k1 673 ; X86-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 674 ; X86-NEXT: retl 675 ; 676 ; X64-LABEL: test_mm512_maskz_shldv_epi64: 677 ; X64: # %bb.0: # %entry 678 ; X64-NEXT: kmovd %edi, %k1 679 ; X64-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} 680 ; X64-NEXT: retq 681 entry: 682 %0 = tail call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U) 683 ret <8 x i64> %0 684 } 685 686 define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 687 ; CHECK-LABEL: test_mm512_shldv_epi64: 688 ; CHECK: # %bb.0: # %entry 689 ; CHECK-NEXT: vpshldvq %zmm2, %zmm1, %zmm0 690 ; CHECK-NEXT: ret{{[l|q]}} 691 entry: 692 %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 -1) 693 ret <8 x i64> %0 694 } 695 696 define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 697 ; X86-LABEL: test_mm512_mask_shldv_epi32: 698 ; X86: # %bb.0: # %entry 699 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 700 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 701 ; X86-NEXT: retl 702 ; 703 ; X64-LABEL: test_mm512_mask_shldv_epi32: 704 ; X64: # %bb.0: # %entry 705 ; X64-NEXT: kmovd %edi, %k1 706 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} 707 ; X64-NEXT: retq 708 entry: 709 %0 = bitcast <8 x i64> %__S to <16 x i32> 710 %1 = bitcast <8 x i64> %__A to <16 x i32> 711 %2 = bitcast <8 x i64> %__B to <16 x i32> 712 %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U) 713 %4 = bitcast <16 x i32> %3 to <8 x i64> 714 ret <8 x i64> %4 715 } 716 717 define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 718 ; X86-LABEL: test_mm512_maskz_shldv_epi32: 719 ; X86: # %bb.0: # %entry 720 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 721 ; X86-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 722 ; X86-NEXT: retl 723 ; 724 ; X64-LABEL: test_mm512_maskz_shldv_epi32: 725 ; X64: # %bb.0: # %entry 726 ; X64-NEXT: kmovd %edi, %k1 727 ; X64-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} 728 ; X64-NEXT: retq 729 entry: 730 %0 = bitcast <8 x i64> %__S to <16 x i32> 731 %1 = bitcast <8 x i64> %__A to <16 x i32> 732 %2 = bitcast <8 x i64> %__B to <16 x i32> 733 %3 = tail call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U) 734 %4 = bitcast <16 x i32> %3 to <8 x i64> 735 ret <8 x i64> %4 736 } 737 738 define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 739 ; CHECK-LABEL: test_mm512_shldv_epi32: 740 ; CHECK: # %bb.0: # %entry 741 ; CHECK-NEXT: vpshldvd %zmm2, %zmm1, %zmm0 742 ; CHECK-NEXT: ret{{[l|q]}} 743 entry: 744 %0 = bitcast <8 x i64> %__S to <16 x i32> 745 %1 = bitcast <8 x i64> %__A to <16 x i32> 746 %2 = bitcast <8 x i64> %__B to <16 x i32> 747 %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 -1) 748 %4 = bitcast <16 x i32> %3 to <8 x i64> 749 ret <8 x i64> %4 750 } 751 752 define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 753 ; X86-LABEL: test_mm512_mask_shldv_epi16: 754 ; X86: # %bb.0: # %entry 755 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 756 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 757 ; X86-NEXT: retl 758 ; 759 ; X64-LABEL: test_mm512_mask_shldv_epi16: 760 ; X64: # %bb.0: # %entry 761 ; X64-NEXT: kmovd %edi, %k1 762 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} 763 ; X64-NEXT: retq 764 entry: 765 %0 = bitcast <8 x i64> %__S to <32 x i16> 766 %1 = bitcast <8 x i64> %__A to <32 x i16> 767 %2 = bitcast <8 x i64> %__B to <32 x i16> 768 %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U) 769 %4 = bitcast <32 x i16> %3 to <8 x i64> 770 ret <8 x i64> %4 771 } 772 773 define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 774 ; X86-LABEL: test_mm512_maskz_shldv_epi16: 775 ; X86: # %bb.0: # %entry 776 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 777 ; X86-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 778 ; X86-NEXT: retl 779 ; 780 ; X64-LABEL: test_mm512_maskz_shldv_epi16: 781 ; X64: # %bb.0: # %entry 782 ; X64-NEXT: kmovd %edi, %k1 783 ; X64-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z} 784 ; X64-NEXT: retq 785 entry: 786 %0 = bitcast <8 x i64> %__S to <32 x i16> 787 %1 = bitcast <8 x i64> %__A to <32 x i16> 788 %2 = bitcast <8 x i64> %__B to <32 x i16> 789 %3 = tail call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U) 790 %4 = bitcast <32 x i16> %3 to <8 x i64> 791 ret <8 x i64> %4 792 } 793 794 define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 795 ; CHECK-LABEL: test_mm512_shldv_epi16: 796 ; CHECK: # %bb.0: # %entry 797 ; CHECK-NEXT: vpshldvw %zmm2, %zmm1, %zmm0 798 ; CHECK-NEXT: ret{{[l|q]}} 799 entry: 800 %0 = bitcast <8 x i64> %__S to <32 x i16> 801 %1 = bitcast <8 x i64> %__A to <32 x i16> 802 %2 = bitcast <8 x i64> %__B to <32 x i16> 803 %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 -1) 804 %4 = bitcast <32 x i16> %3 to <8 x i64> 805 ret <8 x i64> %4 806 } 807 808 define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 809 ; X86-LABEL: test_mm512_mask_shrdv_epi64: 810 ; X86: # %bb.0: # %entry 811 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 812 ; X86-NEXT: kmovd %eax, %k1 813 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 814 ; X86-NEXT: retl 815 ; 816 ; X64-LABEL: test_mm512_mask_shrdv_epi64: 817 ; X64: # %bb.0: # %entry 818 ; X64-NEXT: kmovd %edi, %k1 819 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} 820 ; X64-NEXT: retq 821 entry: 822 %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U) 823 ret <8 x i64> %0 824 } 825 826 define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 827 ; X86-LABEL: test_mm512_maskz_shrdv_epi64: 828 ; X86: # %bb.0: # %entry 829 ; X86-NEXT: movb {{[0-9]+}}(%esp), %al 830 ; X86-NEXT: kmovd %eax, %k1 831 ; X86-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 832 ; X86-NEXT: retl 833 ; 834 ; X64-LABEL: test_mm512_maskz_shrdv_epi64: 835 ; X64: # %bb.0: # %entry 836 ; X64-NEXT: kmovd %edi, %k1 837 ; X64-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} 838 ; X64-NEXT: retq 839 entry: 840 %0 = tail call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U) 841 ret <8 x i64> %0 842 } 843 844 define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 845 ; CHECK-LABEL: test_mm512_shrdv_epi64: 846 ; CHECK: # %bb.0: # %entry 847 ; CHECK-NEXT: vpshrdvq %zmm2, %zmm1, %zmm0 848 ; CHECK-NEXT: ret{{[l|q]}} 849 entry: 850 %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 -1) 851 ret <8 x i64> %0 852 } 853 854 define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) { 855 ; X86-LABEL: test_mm512_mask_shrdv_epi32: 856 ; X86: # %bb.0: # %entry 857 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 858 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 859 ; X86-NEXT: retl 860 ; 861 ; X64-LABEL: test_mm512_mask_shrdv_epi32: 862 ; X64: # %bb.0: # %entry 863 ; X64-NEXT: kmovd %edi, %k1 864 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} 865 ; X64-NEXT: retq 866 entry: 867 %0 = bitcast <8 x i64> %__S to <16 x i32> 868 %1 = bitcast <8 x i64> %__A to <16 x i32> 869 %2 = bitcast <8 x i64> %__B to <16 x i32> 870 %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U) 871 %4 = bitcast <16 x i32> %3 to <8 x i64> 872 ret <8 x i64> %4 873 } 874 875 define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 876 ; X86-LABEL: test_mm512_maskz_shrdv_epi32: 877 ; X86: # %bb.0: # %entry 878 ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 879 ; X86-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 880 ; X86-NEXT: retl 881 ; 882 ; X64-LABEL: test_mm512_maskz_shrdv_epi32: 883 ; X64: # %bb.0: # %entry 884 ; X64-NEXT: kmovd %edi, %k1 885 ; X64-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} 886 ; X64-NEXT: retq 887 entry: 888 %0 = bitcast <8 x i64> %__S to <16 x i32> 889 %1 = bitcast <8 x i64> %__A to <16 x i32> 890 %2 = bitcast <8 x i64> %__B to <16 x i32> 891 %3 = tail call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U) 892 %4 = bitcast <16 x i32> %3 to <8 x i64> 893 ret <8 x i64> %4 894 } 895 896 define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 897 ; CHECK-LABEL: test_mm512_shrdv_epi32: 898 ; CHECK: # %bb.0: # %entry 899 ; CHECK-NEXT: vpshrdvd %zmm2, %zmm1, %zmm0 900 ; CHECK-NEXT: ret{{[l|q]}} 901 entry: 902 %0 = bitcast <8 x i64> %__S to <16 x i32> 903 %1 = bitcast <8 x i64> %__A to <16 x i32> 904 %2 = bitcast <8 x i64> %__B to <16 x i32> 905 %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 -1) 906 %4 = bitcast <16 x i32> %3 to <8 x i64> 907 ret <8 x i64> %4 908 } 909 910 define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) { 911 ; X86-LABEL: test_mm512_mask_shrdv_epi16: 912 ; X86: # %bb.0: # %entry 913 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 914 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 915 ; X86-NEXT: retl 916 ; 917 ; X64-LABEL: test_mm512_mask_shrdv_epi16: 918 ; X64: # %bb.0: # %entry 919 ; X64-NEXT: kmovd %edi, %k1 920 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} 921 ; X64-NEXT: retq 922 entry: 923 %0 = bitcast <8 x i64> %__S to <32 x i16> 924 %1 = bitcast <8 x i64> %__A to <32 x i16> 925 %2 = bitcast <8 x i64> %__B to <32 x i16> 926 %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U) 927 %4 = bitcast <32 x i16> %3 to <8 x i64> 928 ret <8 x i64> %4 929 } 930 931 define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 932 ; X86-LABEL: test_mm512_maskz_shrdv_epi16: 933 ; X86: # %bb.0: # %entry 934 ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 935 ; X86-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 936 ; X86-NEXT: retl 937 ; 938 ; X64-LABEL: test_mm512_maskz_shrdv_epi16: 939 ; X64: # %bb.0: # %entry 940 ; X64-NEXT: kmovd %edi, %k1 941 ; X64-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z} 942 ; X64-NEXT: retq 943 entry: 944 %0 = bitcast <8 x i64> %__S to <32 x i16> 945 %1 = bitcast <8 x i64> %__A to <32 x i16> 946 %2 = bitcast <8 x i64> %__B to <32 x i16> 947 %3 = tail call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U) 948 %4 = bitcast <32 x i16> %3 to <8 x i64> 949 ret <8 x i64> %4 950 } 951 952 define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) { 953 ; CHECK-LABEL: test_mm512_shrdv_epi16: 954 ; CHECK: # %bb.0: # %entry 955 ; CHECK-NEXT: vpshrdvw %zmm2, %zmm1, %zmm0 956 ; CHECK-NEXT: ret{{[l|q]}} 957 entry: 958 %0 = bitcast <8 x i64> %__S to <32 x i16> 959 %1 = bitcast <8 x i64> %__A to <32 x i16> 960 %2 = bitcast <8 x i64> %__B to <32 x i16> 961 %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 -1) 962 %4 = bitcast <32 x i16> %3 to <8 x i64> 963 ret <8 x i64> %4 964 } 965 966 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32) 967 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64) 968 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>) 969 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>) 970 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32) 971 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64) 972 declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>) 973 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>) 974 declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 975 declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 976 declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 977 declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 978 declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 979 declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 980 declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 981 declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8) 982 declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 983 declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16) 984 declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 985 declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32) 986