Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vbmi2-builtins.c
      6 
      7 define <8 x i64> @test_mm512_mask_compress_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
      8 ; X86-LABEL: test_mm512_mask_compress_epi16:
      9 ; X86:       # %bb.0: # %entry
     10 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     11 ; X86-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
     12 ; X86-NEXT:    retl
     13 ;
     14 ; X64-LABEL: test_mm512_mask_compress_epi16:
     15 ; X64:       # %bb.0: # %entry
     16 ; X64-NEXT:    kmovd %edi, %k1
     17 ; X64-NEXT:    vpcompressw %zmm1, %zmm0 {%k1}
     18 ; X64-NEXT:    retq
     19 entry:
     20   %0 = bitcast <8 x i64> %__D to <32 x i16>
     21   %1 = bitcast <8 x i64> %__S to <32 x i16>
     22   %2 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
     23   %3 = bitcast <32 x i16> %2 to <8 x i64>
     24   ret <8 x i64> %3
     25 }
     26 
     27 define <8 x i64> @test_mm512_maskz_compress_epi16(i32 %__U, <8 x i64> %__D) {
     28 ; X86-LABEL: test_mm512_maskz_compress_epi16:
     29 ; X86:       # %bb.0: # %entry
     30 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     31 ; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
     32 ; X86-NEXT:    retl
     33 ;
     34 ; X64-LABEL: test_mm512_maskz_compress_epi16:
     35 ; X64:       # %bb.0: # %entry
     36 ; X64-NEXT:    kmovd %edi, %k1
     37 ; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z}
     38 ; X64-NEXT:    retq
     39 entry:
     40   %0 = bitcast <8 x i64> %__D to <32 x i16>
     41   %1 = tail call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
     42   %2 = bitcast <32 x i16> %1 to <8 x i64>
     43   ret <8 x i64> %2
     44 }
     45 
     46 define <8 x i64> @test_mm512_mask_compress_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
     47 ; X86-LABEL: test_mm512_mask_compress_epi8:
     48 ; X86:       # %bb.0: # %entry
     49 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
     50 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     51 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
     52 ; X86-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
     53 ; X86-NEXT:    retl
     54 ;
     55 ; X64-LABEL: test_mm512_mask_compress_epi8:
     56 ; X64:       # %bb.0: # %entry
     57 ; X64-NEXT:    kmovq %rdi, %k1
     58 ; X64-NEXT:    vpcompressb %zmm1, %zmm0 {%k1}
     59 ; X64-NEXT:    retq
     60 entry:
     61   %0 = bitcast <8 x i64> %__D to <64 x i8>
     62   %1 = bitcast <8 x i64> %__S to <64 x i8>
     63   %2 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
     64   %3 = bitcast <64 x i8> %2 to <8 x i64>
     65   ret <8 x i64> %3
     66 }
     67 
     68 define <8 x i64> @test_mm512_maskz_compress_epi8(i64 %__U, <8 x i64> %__D) {
     69 ; X86-LABEL: test_mm512_maskz_compress_epi8:
     70 ; X86:       # %bb.0: # %entry
     71 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
     72 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     73 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
     74 ; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
     75 ; X86-NEXT:    retl
     76 ;
     77 ; X64-LABEL: test_mm512_maskz_compress_epi8:
     78 ; X64:       # %bb.0: # %entry
     79 ; X64-NEXT:    kmovq %rdi, %k1
     80 ; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z}
     81 ; X64-NEXT:    retq
     82 entry:
     83   %0 = bitcast <8 x i64> %__D to <64 x i8>
     84   %1 = tail call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
     85   %2 = bitcast <64 x i8> %1 to <8 x i64>
     86   ret <8 x i64> %2
     87 }
     88 
     89 define void @test_mm512_mask_compressstoreu_epi16(i8* %__P, i32 %__U, <8 x i64> %__D) {
     90 ; X86-LABEL: test_mm512_mask_compressstoreu_epi16:
     91 ; X86:       # %bb.0: # %entry
     92 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     93 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
     94 ; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1}
     95 ; X86-NEXT:    vzeroupper
     96 ; X86-NEXT:    retl
     97 ;
     98 ; X64-LABEL: test_mm512_mask_compressstoreu_epi16:
     99 ; X64:       # %bb.0: # %entry
    100 ; X64-NEXT:    kmovd %esi, %k1
    101 ; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1}
    102 ; X64-NEXT:    vzeroupper
    103 ; X64-NEXT:    retq
    104 entry:
    105   %0 = bitcast <8 x i64> %__D to <32 x i16>
    106   %1 = bitcast i8* %__P to i16*
    107   %2 = bitcast i32 %__U to <32 x i1>
    108   tail call void @llvm.masked.compressstore.v32i16(<32 x i16> %0, i16* %1, <32 x i1> %2)
    109   ret void
    110 }
    111 
    112 define void @test_mm512_mask_compressstoreu_epi8(i8* %__P, i64 %__U, <8 x i64> %__D) {
    113 ; X86-LABEL: test_mm512_mask_compressstoreu_epi8:
    114 ; X86:       # %bb.0: # %entry
    115 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
    116 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    117 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    118 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
    119 ; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1}
    120 ; X86-NEXT:    vzeroupper
    121 ; X86-NEXT:    retl
    122 ;
    123 ; X64-LABEL: test_mm512_mask_compressstoreu_epi8:
    124 ; X64:       # %bb.0: # %entry
    125 ; X64-NEXT:    kmovq %rsi, %k1
    126 ; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1}
    127 ; X64-NEXT:    vzeroupper
    128 ; X64-NEXT:    retq
    129 entry:
    130   %0 = bitcast <8 x i64> %__D to <64 x i8>
    131   %1 = bitcast i64 %__U to <64 x i1>
    132   tail call void @llvm.masked.compressstore.v64i8(<64 x i8> %0, i8* %__P, <64 x i1> %1)
    133   ret void
    134 }
    135 
    136 define <8 x i64> @test_mm512_mask_expand_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__D) {
    137 ; X86-LABEL: test_mm512_mask_expand_epi16:
    138 ; X86:       # %bb.0: # %entry
    139 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    140 ; X86-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
    141 ; X86-NEXT:    retl
    142 ;
    143 ; X64-LABEL: test_mm512_mask_expand_epi16:
    144 ; X64:       # %bb.0: # %entry
    145 ; X64-NEXT:    kmovd %edi, %k1
    146 ; X64-NEXT:    vpexpandw %zmm1, %zmm0 {%k1}
    147 ; X64-NEXT:    retq
    148 entry:
    149   %0 = bitcast <8 x i64> %__D to <32 x i16>
    150   %1 = bitcast <8 x i64> %__S to <32 x i16>
    151   %2 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> %1, i32 %__U)
    152   %3 = bitcast <32 x i16> %2 to <8 x i64>
    153   ret <8 x i64> %3
    154 }
    155 
    156 define <8 x i64> @test_mm512_maskz_expand_epi16(i32 %__U, <8 x i64> %__D) {
    157 ; X86-LABEL: test_mm512_maskz_expand_epi16:
    158 ; X86:       # %bb.0: # %entry
    159 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    160 ; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
    161 ; X86-NEXT:    retl
    162 ;
    163 ; X64-LABEL: test_mm512_maskz_expand_epi16:
    164 ; X64:       # %bb.0: # %entry
    165 ; X64-NEXT:    kmovd %edi, %k1
    166 ; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z}
    167 ; X64-NEXT:    retq
    168 entry:
    169   %0 = bitcast <8 x i64> %__D to <32 x i16>
    170   %1 = tail call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %0, <32 x i16> zeroinitializer, i32 %__U)
    171   %2 = bitcast <32 x i16> %1 to <8 x i64>
    172   ret <8 x i64> %2
    173 }
    174 
    175 define <8 x i64> @test_mm512_mask_expand_epi8(<8 x i64> %__S, i64 %__U, <8 x i64> %__D) {
    176 ; X86-LABEL: test_mm512_mask_expand_epi8:
    177 ; X86:       # %bb.0: # %entry
    178 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
    179 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    180 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
    181 ; X86-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
    182 ; X86-NEXT:    retl
    183 ;
    184 ; X64-LABEL: test_mm512_mask_expand_epi8:
    185 ; X64:       # %bb.0: # %entry
    186 ; X64-NEXT:    kmovq %rdi, %k1
    187 ; X64-NEXT:    vpexpandb %zmm1, %zmm0 {%k1}
    188 ; X64-NEXT:    retq
    189 entry:
    190   %0 = bitcast <8 x i64> %__D to <64 x i8>
    191   %1 = bitcast <8 x i64> %__S to <64 x i8>
    192   %2 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> %1, i64 %__U)
    193   %3 = bitcast <64 x i8> %2 to <8 x i64>
    194   ret <8 x i64> %3
    195 }
    196 
    197 define <8 x i64> @test_mm512_maskz_expand_epi8(i64 %__U, <8 x i64> %__D) {
    198 ; X86-LABEL: test_mm512_maskz_expand_epi8:
    199 ; X86:       # %bb.0: # %entry
    200 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
    201 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    202 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
    203 ; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
    204 ; X86-NEXT:    retl
    205 ;
    206 ; X64-LABEL: test_mm512_maskz_expand_epi8:
    207 ; X64:       # %bb.0: # %entry
    208 ; X64-NEXT:    kmovq %rdi, %k1
    209 ; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z}
    210 ; X64-NEXT:    retq
    211 entry:
    212   %0 = bitcast <8 x i64> %__D to <64 x i8>
    213   %1 = tail call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %0, <64 x i8> zeroinitializer, i64 %__U)
    214   %2 = bitcast <64 x i8> %1 to <8 x i64>
    215   ret <8 x i64> %2
    216 }
    217 
    218 define <8 x i64> @test_mm512_mask_expandloadu_epi16(<8 x i64> %__S, i32 %__U, i8* readonly %__P) {
    219 ; X86-LABEL: test_mm512_mask_expandloadu_epi16:
    220 ; X86:       # %bb.0: # %entry
    221 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    222 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    223 ; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1}
    224 ; X86-NEXT:    retl
    225 ;
    226 ; X64-LABEL: test_mm512_mask_expandloadu_epi16:
    227 ; X64:       # %bb.0: # %entry
    228 ; X64-NEXT:    kmovd %edi, %k1
    229 ; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1}
    230 ; X64-NEXT:    retq
    231 entry:
    232   %0 = bitcast <8 x i64> %__S to <32 x i16>
    233   %1 = bitcast i8* %__P to i16*
    234   %2 = bitcast i32 %__U to <32 x i1>
    235   %3 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %1, <32 x i1> %2, <32 x i16> %0)
    236   %4 = bitcast <32 x i16> %3 to <8 x i64>
    237   ret <8 x i64> %4
    238 }
    239 
    240 define <8 x i64> @test_mm512_maskz_expandloadu_epi16(i32 %__U, i8* readonly %__P) {
    241 ; X86-LABEL: test_mm512_maskz_expandloadu_epi16:
    242 ; X86:       # %bb.0: # %entry
    243 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    244 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    245 ; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z}
    246 ; X86-NEXT:    retl
    247 ;
    248 ; X64-LABEL: test_mm512_maskz_expandloadu_epi16:
    249 ; X64:       # %bb.0: # %entry
    250 ; X64-NEXT:    kmovd %edi, %k1
    251 ; X64-NEXT:    vpexpandw (%rsi), %zmm0 {%k1} {z}
    252 ; X64-NEXT:    retq
    253 entry:
    254   %0 = bitcast i8* %__P to i16*
    255   %1 = bitcast i32 %__U to <32 x i1>
    256   %2 = tail call <32 x i16> @llvm.masked.expandload.v32i16(i16* %0, <32 x i1> %1, <32 x i16> zeroinitializer)
    257   %3 = bitcast <32 x i16> %2 to <8 x i64>
    258   ret <8 x i64> %3
    259 }
    260 
    261 define <8 x i64> @test_mm512_mask_expandloadu_epi8(<8 x i64> %__S, i64 %__U, i8* readonly %__P) {
    262 ; X86-LABEL: test_mm512_mask_expandloadu_epi8:
    263 ; X86:       # %bb.0: # %entry
    264 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
    265 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    266 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    267 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
    268 ; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1}
    269 ; X86-NEXT:    retl
    270 ;
    271 ; X64-LABEL: test_mm512_mask_expandloadu_epi8:
    272 ; X64:       # %bb.0: # %entry
    273 ; X64-NEXT:    kmovq %rdi, %k1
    274 ; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1}
    275 ; X64-NEXT:    retq
    276 entry:
    277   %0 = bitcast <8 x i64> %__S to <64 x i8>
    278   %1 = bitcast i64 %__U to <64 x i1>
    279   %2 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %1, <64 x i8> %0)
    280   %3 = bitcast <64 x i8> %2 to <8 x i64>
    281   ret <8 x i64> %3
    282 }
    283 
    284 define <8 x i64> @test_mm512_maskz_expandloadu_epi8(i64 %__U, i8* readonly %__P) {
    285 ; X86-LABEL: test_mm512_maskz_expandloadu_epi8:
    286 ; X86:       # %bb.0: # %entry
    287 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0
    288 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    289 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax
    290 ; X86-NEXT:    kunpckdq %k1, %k0, %k1
    291 ; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z}
    292 ; X86-NEXT:    retl
    293 ;
    294 ; X64-LABEL: test_mm512_maskz_expandloadu_epi8:
    295 ; X64:       # %bb.0: # %entry
    296 ; X64-NEXT:    kmovq %rdi, %k1
    297 ; X64-NEXT:    vpexpandb (%rsi), %zmm0 {%k1} {z}
    298 ; X64-NEXT:    retq
    299 entry:
    300   %0 = bitcast i64 %__U to <64 x i1>
    301   %1 = tail call <64 x i8> @llvm.masked.expandload.v64i8(i8* %__P, <64 x i1> %0, <64 x i8> zeroinitializer)
    302   %2 = bitcast <64 x i8> %1 to <8 x i64>
    303   ret <8 x i64> %2
    304 }
    305 
    306 define <8 x i64> @test_mm512_mask_shldi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    307 ; X86-LABEL: test_mm512_mask_shldi_epi64:
    308 ; X86:       # %bb.0: # %entry
    309 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    310 ; X86-NEXT:    kmovd %eax, %k1
    311 ; X86-NEXT:    vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1}
    312 ; X86-NEXT:    retl
    313 ;
    314 ; X64-LABEL: test_mm512_mask_shldi_epi64:
    315 ; X64:       # %bb.0: # %entry
    316 ; X64-NEXT:    kmovd %edi, %k1
    317 ; X64-NEXT:    vpshldq $127, %zmm2, %zmm1, %zmm0 {%k1}
    318 ; X64-NEXT:    retq
    319 entry:
    320   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127)
    321   %1 = bitcast i8 %__U to <8 x i1>
    322   %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
    323   ret <8 x i64> %2
    324 }
    325 
    326 declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32)
    327 
    328 define <8 x i64> @test_mm512_maskz_shldi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    329 ; X86-LABEL: test_mm512_maskz_shldi_epi64:
    330 ; X86:       # %bb.0: # %entry
    331 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    332 ; X86-NEXT:    kmovd %eax, %k1
    333 ; X86-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    334 ; X86-NEXT:    retl
    335 ;
    336 ; X64-LABEL: test_mm512_maskz_shldi_epi64:
    337 ; X64:       # %bb.0: # %entry
    338 ; X64-NEXT:    kmovd %edi, %k1
    339 ; X64-NEXT:    vpshldq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    340 ; X64-NEXT:    retq
    341 entry:
    342   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 63)
    343   %1 = bitcast i8 %__U to <8 x i1>
    344   %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
    345   ret <8 x i64> %2
    346 }
    347 
    348 define <8 x i64> @test_mm512_shldi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
    349 ; CHECK-LABEL: test_mm512_shldi_epi64:
    350 ; CHECK:       # %bb.0: # %entry
    351 ; CHECK-NEXT:    vpshldq $31, %zmm1, %zmm0, %zmm0
    352 ; CHECK-NEXT:    ret{{[l|q]}}
    353 entry:
    354   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 31)
    355   ret <8 x i64> %0
    356 }
    357 
    358 define <8 x i64> @test_mm512_mask_shldi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    359 ; X86-LABEL: test_mm512_mask_shldi_epi32:
    360 ; X86:       # %bb.0: # %entry
    361 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    362 ; X86-NEXT:    vpshldd $127, %zmm2, %zmm1, %zmm0 {%k1}
    363 ; X86-NEXT:    retl
    364 ;
    365 ; X64-LABEL: test_mm512_mask_shldi_epi32:
    366 ; X64:       # %bb.0: # %entry
    367 ; X64-NEXT:    kmovd %edi, %k1
    368 ; X64-NEXT:    vpshldd $127, %zmm2, %zmm1, %zmm0 {%k1}
    369 ; X64-NEXT:    retq
    370 entry:
    371   %0 = bitcast <8 x i64> %__A to <16 x i32>
    372   %1 = bitcast <8 x i64> %__B to <16 x i32>
    373   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 127)
    374   %3 = bitcast <8 x i64> %__S to <16 x i32>
    375   %4 = bitcast i16 %__U to <16 x i1>
    376   %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
    377   %6 = bitcast <16 x i32> %5 to <8 x i64>
    378   ret <8 x i64> %6
    379 }
    380 
    381 declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32)
    382 
    383 define <8 x i64> @test_mm512_maskz_shldi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    384 ; X86-LABEL: test_mm512_maskz_shldi_epi32:
    385 ; X86:       # %bb.0: # %entry
    386 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    387 ; X86-NEXT:    vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    388 ; X86-NEXT:    retl
    389 ;
    390 ; X64-LABEL: test_mm512_maskz_shldi_epi32:
    391 ; X64:       # %bb.0: # %entry
    392 ; X64-NEXT:    kmovd %edi, %k1
    393 ; X64-NEXT:    vpshldd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    394 ; X64-NEXT:    retq
    395 entry:
    396   %0 = bitcast <8 x i64> %__A to <16 x i32>
    397   %1 = bitcast <8 x i64> %__B to <16 x i32>
    398   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 63)
    399   %3 = bitcast i16 %__U to <16 x i1>
    400   %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
    401   %5 = bitcast <16 x i32> %4 to <8 x i64>
    402   ret <8 x i64> %5
    403 }
    404 
    405 define <8 x i64> @test_mm512_shldi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
    406 ; CHECK-LABEL: test_mm512_shldi_epi32:
    407 ; CHECK:       # %bb.0: # %entry
    408 ; CHECK-NEXT:    vpshldd $31, %zmm1, %zmm0, %zmm0
    409 ; CHECK-NEXT:    ret{{[l|q]}}
    410 entry:
    411   %0 = bitcast <8 x i64> %__A to <16 x i32>
    412   %1 = bitcast <8 x i64> %__B to <16 x i32>
    413   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %0, <16 x i32> %1, i32 31)
    414   %3 = bitcast <16 x i32> %2 to <8 x i64>
    415   ret <8 x i64> %3
    416 }
    417 
    418 define <8 x i64> @test_mm512_mask_shldi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    419 ; X86-LABEL: test_mm512_mask_shldi_epi16:
    420 ; X86:       # %bb.0: # %entry
    421 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    422 ; X86-NEXT:    vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1}
    423 ; X86-NEXT:    retl
    424 ;
    425 ; X64-LABEL: test_mm512_mask_shldi_epi16:
    426 ; X64:       # %bb.0: # %entry
    427 ; X64-NEXT:    kmovd %edi, %k1
    428 ; X64-NEXT:    vpshldw $127, %zmm2, %zmm1, %zmm0 {%k1}
    429 ; X64-NEXT:    retq
    430 entry:
    431   %0 = bitcast <8 x i64> %__A to <32 x i16>
    432   %1 = bitcast <8 x i64> %__B to <32 x i16>
    433   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 127)
    434   %3 = bitcast <8 x i64> %__S to <32 x i16>
    435   %4 = bitcast i32 %__U to <32 x i1>
    436   %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
    437   %6 = bitcast <32 x i16> %5 to <8 x i64>
    438   ret <8 x i64> %6
    439 }
    440 
    441 declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32)
    442 
    443 define <8 x i64> @test_mm512_maskz_shldi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    444 ; X86-LABEL: test_mm512_maskz_shldi_epi16:
    445 ; X86:       # %bb.0: # %entry
    446 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    447 ; X86-NEXT:    vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    448 ; X86-NEXT:    retl
    449 ;
    450 ; X64-LABEL: test_mm512_maskz_shldi_epi16:
    451 ; X64:       # %bb.0: # %entry
    452 ; X64-NEXT:    kmovd %edi, %k1
    453 ; X64-NEXT:    vpshldw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    454 ; X64-NEXT:    retq
    455 entry:
    456   %0 = bitcast <8 x i64> %__A to <32 x i16>
    457   %1 = bitcast <8 x i64> %__B to <32 x i16>
    458   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 63)
    459   %3 = bitcast i32 %__U to <32 x i1>
    460   %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
    461   %5 = bitcast <32 x i16> %4 to <8 x i64>
    462   ret <8 x i64> %5
    463 }
    464 
    465 define <8 x i64> @test_mm512_shldi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
    466 ; CHECK-LABEL: test_mm512_shldi_epi16:
    467 ; CHECK:       # %bb.0: # %entry
    468 ; CHECK-NEXT:    vpshldw $31, %zmm1, %zmm0, %zmm0
    469 ; CHECK-NEXT:    ret{{[l|q]}}
    470 entry:
    471   %0 = bitcast <8 x i64> %__A to <32 x i16>
    472   %1 = bitcast <8 x i64> %__B to <32 x i16>
    473   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %0, <32 x i16> %1, i32 31)
    474   %3 = bitcast <32 x i16> %2 to <8 x i64>
    475   ret <8 x i64> %3
    476 }
    477 
    478 define <8 x i64> @test_mm512_mask_shrdi_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    479 ; X86-LABEL: test_mm512_mask_shrdi_epi64:
    480 ; X86:       # %bb.0: # %entry
    481 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    482 ; X86-NEXT:    kmovd %eax, %k1
    483 ; X86-NEXT:    vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1}
    484 ; X86-NEXT:    retl
    485 ;
    486 ; X64-LABEL: test_mm512_mask_shrdi_epi64:
    487 ; X64:       # %bb.0: # %entry
    488 ; X64-NEXT:    kmovd %edi, %k1
    489 ; X64-NEXT:    vpshrdq $127, %zmm2, %zmm1, %zmm0 {%k1}
    490 ; X64-NEXT:    retq
    491 entry:
    492   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 127)
    493   %1 = bitcast i8 %__U to <8 x i1>
    494   %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> %__S
    495   ret <8 x i64> %2
    496 }
    497 
    498 declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32)
    499 
    500 define <8 x i64> @test_mm512_maskz_shrdi_epi64(i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    501 ; X86-LABEL: test_mm512_maskz_shrdi_epi64:
    502 ; X86:       # %bb.0: # %entry
    503 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    504 ; X86-NEXT:    kmovd %eax, %k1
    505 ; X86-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    506 ; X86-NEXT:    retl
    507 ;
    508 ; X64-LABEL: test_mm512_maskz_shrdi_epi64:
    509 ; X64:       # %bb.0: # %entry
    510 ; X64-NEXT:    kmovd %edi, %k1
    511 ; X64-NEXT:    vpshrdq $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    512 ; X64-NEXT:    retq
    513 entry:
    514   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 63)
    515   %1 = bitcast i8 %__U to <8 x i1>
    516   %2 = select <8 x i1> %1, <8 x i64> %0, <8 x i64> zeroinitializer
    517   ret <8 x i64> %2
    518 }
    519 
    520 define <8 x i64> @test_mm512_shrdi_epi64(<8 x i64> %__A, <8 x i64> %__B) {
    521 ; CHECK-LABEL: test_mm512_shrdi_epi64:
    522 ; CHECK:       # %bb.0: # %entry
    523 ; CHECK-NEXT:    vpshrdq $31, %zmm1, %zmm0, %zmm0
    524 ; CHECK-NEXT:    ret{{[l|q]}}
    525 entry:
    526   %0 = tail call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %__A, <8 x i64> %__B, i32 31)
    527   ret <8 x i64> %0
    528 }
    529 
    530 define <8 x i64> @test_mm512_mask_shrdi_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    531 ; X86-LABEL: test_mm512_mask_shrdi_epi32:
    532 ; X86:       # %bb.0: # %entry
    533 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    534 ; X86-NEXT:    vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1}
    535 ; X86-NEXT:    retl
    536 ;
    537 ; X64-LABEL: test_mm512_mask_shrdi_epi32:
    538 ; X64:       # %bb.0: # %entry
    539 ; X64-NEXT:    kmovd %edi, %k1
    540 ; X64-NEXT:    vpshrdd $127, %zmm2, %zmm1, %zmm0 {%k1}
    541 ; X64-NEXT:    retq
    542 entry:
    543   %0 = bitcast <8 x i64> %__A to <16 x i32>
    544   %1 = bitcast <8 x i64> %__B to <16 x i32>
    545   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 127)
    546   %3 = bitcast <8 x i64> %__S to <16 x i32>
    547   %4 = bitcast i16 %__U to <16 x i1>
    548   %5 = select <16 x i1> %4, <16 x i32> %2, <16 x i32> %3
    549   %6 = bitcast <16 x i32> %5 to <8 x i64>
    550   ret <8 x i64> %6
    551 }
    552 
    553 declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32)
    554 
    555 define <8 x i64> @test_mm512_maskz_shrdi_epi32(i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    556 ; X86-LABEL: test_mm512_maskz_shrdi_epi32:
    557 ; X86:       # %bb.0: # %entry
    558 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    559 ; X86-NEXT:    vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    560 ; X86-NEXT:    retl
    561 ;
    562 ; X64-LABEL: test_mm512_maskz_shrdi_epi32:
    563 ; X64:       # %bb.0: # %entry
    564 ; X64-NEXT:    kmovd %edi, %k1
    565 ; X64-NEXT:    vpshrdd $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    566 ; X64-NEXT:    retq
    567 entry:
    568   %0 = bitcast <8 x i64> %__A to <16 x i32>
    569   %1 = bitcast <8 x i64> %__B to <16 x i32>
    570   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 63)
    571   %3 = bitcast i16 %__U to <16 x i1>
    572   %4 = select <16 x i1> %3, <16 x i32> %2, <16 x i32> zeroinitializer
    573   %5 = bitcast <16 x i32> %4 to <8 x i64>
    574   ret <8 x i64> %5
    575 }
    576 
    577 define <8 x i64> @test_mm512_shrdi_epi32(<8 x i64> %__A, <8 x i64> %__B) {
    578 ; CHECK-LABEL: test_mm512_shrdi_epi32:
    579 ; CHECK:       # %bb.0: # %entry
    580 ; CHECK-NEXT:    vpshrdd $31, %zmm1, %zmm0, %zmm0
    581 ; CHECK-NEXT:    ret{{[l|q]}}
    582 entry:
    583   %0 = bitcast <8 x i64> %__A to <16 x i32>
    584   %1 = bitcast <8 x i64> %__B to <16 x i32>
    585   %2 = tail call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %0, <16 x i32> %1, i32 31)
    586   %3 = bitcast <16 x i32> %2 to <8 x i64>
    587   ret <8 x i64> %3
    588 }
    589 
    590 define <8 x i64> @test_mm512_mask_shrdi_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    591 ; X86-LABEL: test_mm512_mask_shrdi_epi16:
    592 ; X86:       # %bb.0: # %entry
    593 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    594 ; X86-NEXT:    vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1}
    595 ; X86-NEXT:    retl
    596 ;
    597 ; X64-LABEL: test_mm512_mask_shrdi_epi16:
    598 ; X64:       # %bb.0: # %entry
    599 ; X64-NEXT:    kmovd %edi, %k1
    600 ; X64-NEXT:    vpshrdw $127, %zmm2, %zmm1, %zmm0 {%k1}
    601 ; X64-NEXT:    retq
    602 entry:
    603   %0 = bitcast <8 x i64> %__A to <32 x i16>
    604   %1 = bitcast <8 x i64> %__B to <32 x i16>
    605   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 127)
    606   %3 = bitcast <8 x i64> %__S to <32 x i16>
    607   %4 = bitcast i32 %__U to <32 x i1>
    608   %5 = select <32 x i1> %4, <32 x i16> %2, <32 x i16> %3
    609   %6 = bitcast <32 x i16> %5 to <8 x i64>
    610   ret <8 x i64> %6
    611 }
    612 
    613 declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32)
    614 
    615 define <8 x i64> @test_mm512_maskz_shrdi_epi16(i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    616 ; X86-LABEL: test_mm512_maskz_shrdi_epi16:
    617 ; X86:       # %bb.0: # %entry
    618 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    619 ; X86-NEXT:    vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    620 ; X86-NEXT:    retl
    621 ;
    622 ; X64-LABEL: test_mm512_maskz_shrdi_epi16:
    623 ; X64:       # %bb.0: # %entry
    624 ; X64-NEXT:    kmovd %edi, %k1
    625 ; X64-NEXT:    vpshrdw $63, %zmm1, %zmm0, %zmm0 {%k1} {z}
    626 ; X64-NEXT:    retq
    627 entry:
    628   %0 = bitcast <8 x i64> %__A to <32 x i16>
    629   %1 = bitcast <8 x i64> %__B to <32 x i16>
    630   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 63)
    631   %3 = bitcast i32 %__U to <32 x i1>
    632   %4 = select <32 x i1> %3, <32 x i16> %2, <32 x i16> zeroinitializer
    633   %5 = bitcast <32 x i16> %4 to <8 x i64>
    634   ret <8 x i64> %5
    635 }
    636 
    637 define <8 x i64> @test_mm512_shrdi_epi16(<8 x i64> %__A, <8 x i64> %__B) {
    638 ; CHECK-LABEL: test_mm512_shrdi_epi16:
    639 ; CHECK:       # %bb.0: # %entry
    640 ; CHECK-NEXT:    vpshrdw $31, %zmm1, %zmm0, %zmm0
    641 ; CHECK-NEXT:    ret{{[l|q]}}
    642 entry:
    643   %0 = bitcast <8 x i64> %__A to <32 x i16>
    644   %1 = bitcast <8 x i64> %__B to <32 x i16>
    645   %2 = tail call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %0, <32 x i16> %1, i32 31)
    646   %3 = bitcast <32 x i16> %2 to <8 x i64>
    647   ret <8 x i64> %3
    648 }
    649 
    650 define <8 x i64> @test_mm512_mask_shldv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    651 ; X86-LABEL: test_mm512_mask_shldv_epi64:
    652 ; X86:       # %bb.0: # %entry
    653 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    654 ; X86-NEXT:    kmovd %eax, %k1
    655 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
    656 ; X86-NEXT:    retl
    657 ;
    658 ; X64-LABEL: test_mm512_mask_shldv_epi64:
    659 ; X64:       # %bb.0: # %entry
    660 ; X64-NEXT:    kmovd %edi, %k1
    661 ; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1}
    662 ; X64-NEXT:    retq
    663 entry:
    664   %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U)
    665   ret <8 x i64> %0
    666 }
    667 
    668 define <8 x i64> @test_mm512_maskz_shldv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    669 ; X86-LABEL: test_mm512_maskz_shldv_epi64:
    670 ; X86:       # %bb.0: # %entry
    671 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    672 ; X86-NEXT:    kmovd %eax, %k1
    673 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
    674 ; X86-NEXT:    retl
    675 ;
    676 ; X64-LABEL: test_mm512_maskz_shldv_epi64:
    677 ; X64:       # %bb.0: # %entry
    678 ; X64-NEXT:    kmovd %edi, %k1
    679 ; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z}
    680 ; X64-NEXT:    retq
    681 entry:
    682   %0 = tail call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U)
    683   ret <8 x i64> %0
    684 }
    685 
    686 define <8 x i64> @test_mm512_shldv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    687 ; CHECK-LABEL: test_mm512_shldv_epi64:
    688 ; CHECK:       # %bb.0: # %entry
    689 ; CHECK-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0
    690 ; CHECK-NEXT:    ret{{[l|q]}}
    691 entry:
    692   %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 -1)
    693   ret <8 x i64> %0
    694 }
    695 
    696 define <8 x i64> @test_mm512_mask_shldv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    697 ; X86-LABEL: test_mm512_mask_shldv_epi32:
    698 ; X86:       # %bb.0: # %entry
    699 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    700 ; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
    701 ; X86-NEXT:    retl
    702 ;
    703 ; X64-LABEL: test_mm512_mask_shldv_epi32:
    704 ; X64:       # %bb.0: # %entry
    705 ; X64-NEXT:    kmovd %edi, %k1
    706 ; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1}
    707 ; X64-NEXT:    retq
    708 entry:
    709   %0 = bitcast <8 x i64> %__S to <16 x i32>
    710   %1 = bitcast <8 x i64> %__A to <16 x i32>
    711   %2 = bitcast <8 x i64> %__B to <16 x i32>
    712   %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U)
    713   %4 = bitcast <16 x i32> %3 to <8 x i64>
    714   ret <8 x i64> %4
    715 }
    716 
    717 define <8 x i64> @test_mm512_maskz_shldv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    718 ; X86-LABEL: test_mm512_maskz_shldv_epi32:
    719 ; X86:       # %bb.0: # %entry
    720 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    721 ; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
    722 ; X86-NEXT:    retl
    723 ;
    724 ; X64-LABEL: test_mm512_maskz_shldv_epi32:
    725 ; X64:       # %bb.0: # %entry
    726 ; X64-NEXT:    kmovd %edi, %k1
    727 ; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z}
    728 ; X64-NEXT:    retq
    729 entry:
    730   %0 = bitcast <8 x i64> %__S to <16 x i32>
    731   %1 = bitcast <8 x i64> %__A to <16 x i32>
    732   %2 = bitcast <8 x i64> %__B to <16 x i32>
    733   %3 = tail call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U)
    734   %4 = bitcast <16 x i32> %3 to <8 x i64>
    735   ret <8 x i64> %4
    736 }
    737 
    738 define <8 x i64> @test_mm512_shldv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    739 ; CHECK-LABEL: test_mm512_shldv_epi32:
    740 ; CHECK:       # %bb.0: # %entry
    741 ; CHECK-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0
    742 ; CHECK-NEXT:    ret{{[l|q]}}
    743 entry:
    744   %0 = bitcast <8 x i64> %__S to <16 x i32>
    745   %1 = bitcast <8 x i64> %__A to <16 x i32>
    746   %2 = bitcast <8 x i64> %__B to <16 x i32>
    747   %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 -1)
    748   %4 = bitcast <16 x i32> %3 to <8 x i64>
    749   ret <8 x i64> %4
    750 }
    751 
    752 define <8 x i64> @test_mm512_mask_shldv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    753 ; X86-LABEL: test_mm512_mask_shldv_epi16:
    754 ; X86:       # %bb.0: # %entry
    755 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    756 ; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
    757 ; X86-NEXT:    retl
    758 ;
    759 ; X64-LABEL: test_mm512_mask_shldv_epi16:
    760 ; X64:       # %bb.0: # %entry
    761 ; X64-NEXT:    kmovd %edi, %k1
    762 ; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1}
    763 ; X64-NEXT:    retq
    764 entry:
    765   %0 = bitcast <8 x i64> %__S to <32 x i16>
    766   %1 = bitcast <8 x i64> %__A to <32 x i16>
    767   %2 = bitcast <8 x i64> %__B to <32 x i16>
    768   %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U)
    769   %4 = bitcast <32 x i16> %3 to <8 x i64>
    770   ret <8 x i64> %4
    771 }
    772 
    773 define <8 x i64> @test_mm512_maskz_shldv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    774 ; X86-LABEL: test_mm512_maskz_shldv_epi16:
    775 ; X86:       # %bb.0: # %entry
    776 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    777 ; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
    778 ; X86-NEXT:    retl
    779 ;
    780 ; X64-LABEL: test_mm512_maskz_shldv_epi16:
    781 ; X64:       # %bb.0: # %entry
    782 ; X64-NEXT:    kmovd %edi, %k1
    783 ; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 {%k1} {z}
    784 ; X64-NEXT:    retq
    785 entry:
    786   %0 = bitcast <8 x i64> %__S to <32 x i16>
    787   %1 = bitcast <8 x i64> %__A to <32 x i16>
    788   %2 = bitcast <8 x i64> %__B to <32 x i16>
    789   %3 = tail call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U)
    790   %4 = bitcast <32 x i16> %3 to <8 x i64>
    791   ret <8 x i64> %4
    792 }
    793 
    794 define <8 x i64> @test_mm512_shldv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    795 ; CHECK-LABEL: test_mm512_shldv_epi16:
    796 ; CHECK:       # %bb.0: # %entry
    797 ; CHECK-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0
    798 ; CHECK-NEXT:    ret{{[l|q]}}
    799 entry:
    800   %0 = bitcast <8 x i64> %__S to <32 x i16>
    801   %1 = bitcast <8 x i64> %__A to <32 x i16>
    802   %2 = bitcast <8 x i64> %__B to <32 x i16>
    803   %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 -1)
    804   %4 = bitcast <32 x i16> %3 to <8 x i64>
    805   ret <8 x i64> %4
    806 }
    807 
    808 define <8 x i64> @test_mm512_mask_shrdv_epi64(<8 x i64> %__S, i8 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    809 ; X86-LABEL: test_mm512_mask_shrdv_epi64:
    810 ; X86:       # %bb.0: # %entry
    811 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    812 ; X86-NEXT:    kmovd %eax, %k1
    813 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
    814 ; X86-NEXT:    retl
    815 ;
    816 ; X64-LABEL: test_mm512_mask_shrdv_epi64:
    817 ; X64:       # %bb.0: # %entry
    818 ; X64-NEXT:    kmovd %edi, %k1
    819 ; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1}
    820 ; X64-NEXT:    retq
    821 entry:
    822   %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U)
    823   ret <8 x i64> %0
    824 }
    825 
    826 define <8 x i64> @test_mm512_maskz_shrdv_epi64(i8 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    827 ; X86-LABEL: test_mm512_maskz_shrdv_epi64:
    828 ; X86:       # %bb.0: # %entry
    829 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    830 ; X86-NEXT:    kmovd %eax, %k1
    831 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
    832 ; X86-NEXT:    retl
    833 ;
    834 ; X64-LABEL: test_mm512_maskz_shrdv_epi64:
    835 ; X64:       # %bb.0: # %entry
    836 ; X64-NEXT:    kmovd %edi, %k1
    837 ; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z}
    838 ; X64-NEXT:    retq
    839 entry:
    840   %0 = tail call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 %__U)
    841   ret <8 x i64> %0
    842 }
    843 
    844 define <8 x i64> @test_mm512_shrdv_epi64(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    845 ; CHECK-LABEL: test_mm512_shrdv_epi64:
    846 ; CHECK:       # %bb.0: # %entry
    847 ; CHECK-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0
    848 ; CHECK-NEXT:    ret{{[l|q]}}
    849 entry:
    850   %0 = tail call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B, i8 -1)
    851   ret <8 x i64> %0
    852 }
    853 
    854 define <8 x i64> @test_mm512_mask_shrdv_epi32(<8 x i64> %__S, i16 zeroext %__U, <8 x i64> %__A, <8 x i64> %__B) {
    855 ; X86-LABEL: test_mm512_mask_shrdv_epi32:
    856 ; X86:       # %bb.0: # %entry
    857 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    858 ; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
    859 ; X86-NEXT:    retl
    860 ;
    861 ; X64-LABEL: test_mm512_mask_shrdv_epi32:
    862 ; X64:       # %bb.0: # %entry
    863 ; X64-NEXT:    kmovd %edi, %k1
    864 ; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1}
    865 ; X64-NEXT:    retq
    866 entry:
    867   %0 = bitcast <8 x i64> %__S to <16 x i32>
    868   %1 = bitcast <8 x i64> %__A to <16 x i32>
    869   %2 = bitcast <8 x i64> %__B to <16 x i32>
    870   %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U)
    871   %4 = bitcast <16 x i32> %3 to <8 x i64>
    872   ret <8 x i64> %4
    873 }
    874 
    875 define <8 x i64> @test_mm512_maskz_shrdv_epi32(i16 zeroext %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    876 ; X86-LABEL: test_mm512_maskz_shrdv_epi32:
    877 ; X86:       # %bb.0: # %entry
    878 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    879 ; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
    880 ; X86-NEXT:    retl
    881 ;
    882 ; X64-LABEL: test_mm512_maskz_shrdv_epi32:
    883 ; X64:       # %bb.0: # %entry
    884 ; X64-NEXT:    kmovd %edi, %k1
    885 ; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z}
    886 ; X64-NEXT:    retq
    887 entry:
    888   %0 = bitcast <8 x i64> %__S to <16 x i32>
    889   %1 = bitcast <8 x i64> %__A to <16 x i32>
    890   %2 = bitcast <8 x i64> %__B to <16 x i32>
    891   %3 = tail call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 %__U)
    892   %4 = bitcast <16 x i32> %3 to <8 x i64>
    893   ret <8 x i64> %4
    894 }
    895 
    896 define <8 x i64> @test_mm512_shrdv_epi32(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    897 ; CHECK-LABEL: test_mm512_shrdv_epi32:
    898 ; CHECK:       # %bb.0: # %entry
    899 ; CHECK-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0
    900 ; CHECK-NEXT:    ret{{[l|q]}}
    901 entry:
    902   %0 = bitcast <8 x i64> %__S to <16 x i32>
    903   %1 = bitcast <8 x i64> %__A to <16 x i32>
    904   %2 = bitcast <8 x i64> %__B to <16 x i32>
    905   %3 = tail call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %0, <16 x i32> %1, <16 x i32> %2, i16 -1)
    906   %4 = bitcast <16 x i32> %3 to <8 x i64>
    907   ret <8 x i64> %4
    908 }
    909 
    910 define <8 x i64> @test_mm512_mask_shrdv_epi16(<8 x i64> %__S, i32 %__U, <8 x i64> %__A, <8 x i64> %__B) {
    911 ; X86-LABEL: test_mm512_mask_shrdv_epi16:
    912 ; X86:       # %bb.0: # %entry
    913 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    914 ; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
    915 ; X86-NEXT:    retl
    916 ;
    917 ; X64-LABEL: test_mm512_mask_shrdv_epi16:
    918 ; X64:       # %bb.0: # %entry
    919 ; X64-NEXT:    kmovd %edi, %k1
    920 ; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1}
    921 ; X64-NEXT:    retq
    922 entry:
    923   %0 = bitcast <8 x i64> %__S to <32 x i16>
    924   %1 = bitcast <8 x i64> %__A to <32 x i16>
    925   %2 = bitcast <8 x i64> %__B to <32 x i16>
    926   %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U)
    927   %4 = bitcast <32 x i16> %3 to <8 x i64>
    928   ret <8 x i64> %4
    929 }
    930 
    931 define <8 x i64> @test_mm512_maskz_shrdv_epi16(i32 %__U, <8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    932 ; X86-LABEL: test_mm512_maskz_shrdv_epi16:
    933 ; X86:       # %bb.0: # %entry
    934 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    935 ; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
    936 ; X86-NEXT:    retl
    937 ;
    938 ; X64-LABEL: test_mm512_maskz_shrdv_epi16:
    939 ; X64:       # %bb.0: # %entry
    940 ; X64-NEXT:    kmovd %edi, %k1
    941 ; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 {%k1} {z}
    942 ; X64-NEXT:    retq
    943 entry:
    944   %0 = bitcast <8 x i64> %__S to <32 x i16>
    945   %1 = bitcast <8 x i64> %__A to <32 x i16>
    946   %2 = bitcast <8 x i64> %__B to <32 x i16>
    947   %3 = tail call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 %__U)
    948   %4 = bitcast <32 x i16> %3 to <8 x i64>
    949   ret <8 x i64> %4
    950 }
    951 
    952 define <8 x i64> @test_mm512_shrdv_epi16(<8 x i64> %__S, <8 x i64> %__A, <8 x i64> %__B) {
    953 ; CHECK-LABEL: test_mm512_shrdv_epi16:
    954 ; CHECK:       # %bb.0: # %entry
    955 ; CHECK-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0
    956 ; CHECK-NEXT:    ret{{[l|q]}}
    957 entry:
    958   %0 = bitcast <8 x i64> %__S to <32 x i16>
    959   %1 = bitcast <8 x i64> %__A to <32 x i16>
    960   %2 = bitcast <8 x i64> %__B to <32 x i16>
    961   %3 = tail call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %0, <32 x i16> %1, <32 x i16> %2, i32 -1)
    962   %4 = bitcast <32 x i16> %3 to <8 x i64>
    963   ret <8 x i64> %4
    964 }
    965 
    966 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16>, <32 x i16>, i32)
    967 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8>, <64 x i8>, i64)
    968 declare void @llvm.masked.compressstore.v32i16(<32 x i16>, i16*, <32 x i1>)
    969 declare void @llvm.masked.compressstore.v64i8(<64 x i8>, i8*, <64 x i1>)
    970 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16>, <32 x i16>, i32)
    971 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8>, <64 x i8>, i64)
    972 declare <32 x i16> @llvm.masked.expandload.v32i16(i16*, <32 x i1>, <32 x i16>)
    973 declare <64 x i8> @llvm.masked.expandload.v64i8(i8*, <64 x i1>, <64 x i8>)
    974 declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    975 declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    976 declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    977 declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    978 declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    979 declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    980 declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    981 declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    982 declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    983 declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    984 declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    985 declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    986