Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <32 x i16> @test_mask_expand_load_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
      6 ; X86-LABEL: test_mask_expand_load_w_512:
      7 ; X86:       # %bb.0:
      8 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
      9 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     10 ; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
     11 ; X86-NEXT:    retl # encoding: [0xc3]
     12 ;
     13 ; X64-LABEL: test_mask_expand_load_w_512:
     14 ; X64:       # %bb.0:
     15 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     16 ; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
     17 ; X64-NEXT:    retq # encoding: [0xc3]
     18   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
     19   ret <32 x i16> %res
     20 }
     21 
     22 define <32 x i16> @test_maskz_expand_load_w_512(i8* %addr, i32 %mask) {
     23 ; X86-LABEL: test_maskz_expand_load_w_512:
     24 ; X86:       # %bb.0:
     25 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     26 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     27 ; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x00]
     28 ; X86-NEXT:    retl # encoding: [0xc3]
     29 ;
     30 ; X64-LABEL: test_maskz_expand_load_w_512:
     31 ; X64:       # %bb.0:
     32 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     33 ; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0x07]
     34 ; X64-NEXT:    retq # encoding: [0xc3]
     35   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> zeroinitializer, i32 %mask)
     36   ret <32 x i16> %res
     37 }
     38 
     39 declare <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
     40 
     41 define <32 x i16> @test_expand_load_w_512(i8* %addr, <32 x i16> %data) {
     42 ; X86-LABEL: test_expand_load_w_512:
     43 ; X86:       # %bb.0:
     44 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     45 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
     46 ; X86-NEXT:    vpexpandw (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x00]
     47 ; X86-NEXT:    retl # encoding: [0xc3]
     48 ;
     49 ; X64-LABEL: test_expand_load_w_512:
     50 ; X64:       # %bb.0:
     51 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
     52 ; X64-NEXT:    vpexpandw (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0x07]
     53 ; X64-NEXT:    retq # encoding: [0xc3]
     54   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.load.w.512(i8* %addr, <32 x i16> %data, i32 -1)
     55   ret <32 x i16> %res
     56 }
     57 
     58 define <64 x i8> @test_mask_expand_load_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
     59 ; X86-LABEL: test_mask_expand_load_b_512:
     60 ; X86:       # %bb.0:
     61 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     62 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
     63 ; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
     64 ; X86-NEXT:    retl # encoding: [0xc3]
     65 ;
     66 ; X64-LABEL: test_mask_expand_load_b_512:
     67 ; X64:       # %bb.0:
     68 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
     69 ; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
     70 ; X64-NEXT:    retq # encoding: [0xc3]
     71   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
     72   ret <64 x i8> %res
     73 }
     74 
     75 define <64 x i8> @test_maskz_expand_load_b_512(i8* %addr, i64 %mask) {
     76 ; X86-LABEL: test_maskz_expand_load_b_512:
     77 ; X86:       # %bb.0:
     78 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     79 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
     80 ; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x00]
     81 ; X86-NEXT:    retl # encoding: [0xc3]
     82 ;
     83 ; X64-LABEL: test_maskz_expand_load_b_512:
     84 ; X64:       # %bb.0:
     85 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
     86 ; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0x07]
     87 ; X64-NEXT:    retq # encoding: [0xc3]
     88   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> zeroinitializer, i64 %mask)
     89   ret <64 x i8> %res
     90 }
     91 
     92 declare <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
     93 
     94 define <64 x i8> @test_expand_load_b_512(i8* %addr, <64 x i8> %data) {
     95 ; X86-LABEL: test_expand_load_b_512:
     96 ; X86:       # %bb.0:
     97 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     98 ; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
     99 ; X86-NEXT:    vpexpandb (%eax), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x00]
    100 ; X86-NEXT:    retl # encoding: [0xc3]
    101 ;
    102 ; X64-LABEL: test_expand_load_b_512:
    103 ; X64:       # %bb.0:
    104 ; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
    105 ; X64-NEXT:    vpexpandb (%rdi), %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0x07]
    106 ; X64-NEXT:    retq # encoding: [0xc3]
    107   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.load.b.512(i8* %addr, <64 x i8> %data, i64 -1)
    108   ret <64 x i8> %res
    109 }
    110 
    111 define void @test_mask_compress_store_w_512(i8* %addr, <32 x i16> %data, i32 %mask) {
    112 ; X86-LABEL: test_mask_compress_store_w_512:
    113 ; X86:       # %bb.0:
    114 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    115 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    116 ; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
    117 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    118 ; X86-NEXT:    retl # encoding: [0xc3]
    119 ;
    120 ; X64-LABEL: test_mask_compress_store_w_512:
    121 ; X64:       # %bb.0:
    122 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    123 ; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
    124 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    125 ; X64-NEXT:    retq # encoding: [0xc3]
    126   call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
    127   ret void
    128 }
    129 
    130 declare void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 %mask)
    131 
    132 define <32 x i16> @test_mask_compress_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
    133 ; X86-LABEL: test_mask_compress_w_512:
    134 ; X86:       # %bb.0:
    135 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    136 ; X86-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
    137 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    138 ; X86-NEXT:    retl # encoding: [0xc3]
    139 ;
    140 ; X64-LABEL: test_mask_compress_w_512:
    141 ; X64:       # %bb.0:
    142 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    143 ; X64-NEXT:    vpcompressw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0xc1]
    144 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    145 ; X64-NEXT:    retq # encoding: [0xc3]
    146   %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
    147   ret <32 x i16> %res
    148 }
    149 
    150 define <32 x i16> @test_maskz_compress_w_512(<32 x i16> %data, i32 %mask) {
    151 ; X86-LABEL: test_maskz_compress_w_512:
    152 ; X86:       # %bb.0:
    153 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    154 ; X86-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
    155 ; X86-NEXT:    retl # encoding: [0xc3]
    156 ;
    157 ; X64-LABEL: test_maskz_compress_w_512:
    158 ; X64:       # %bb.0:
    159 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    160 ; X64-NEXT:    vpcompressw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x63,0xc0]
    161 ; X64-NEXT:    retq # encoding: [0xc3]
    162   %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
    163   ret <32 x i16> %res
    164 }
    165 
    166 define <32 x i16> @test_compress_w_512(<32 x i16> %data) {
    167 ; CHECK-LABEL: test_compress_w_512:
    168 ; CHECK:       # %bb.0:
    169 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    170   %res = call <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
    171   ret <32 x i16> %res
    172 }
    173 
    174 declare <32 x i16> @llvm.x86.avx512.mask.compress.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
    175 
    176 define void @test_compress_store_w_512(i8* %addr, <32 x i16> %data) {
    177 ; X86-LABEL: test_compress_store_w_512:
    178 ; X86:       # %bb.0:
    179 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    180 ; X86-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
    181 ; X86-NEXT:    vpcompressw %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x00]
    182 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    183 ; X86-NEXT:    retl # encoding: [0xc3]
    184 ;
    185 ; X64-LABEL: test_compress_store_w_512:
    186 ; X64:       # %bb.0:
    187 ; X64-NEXT:    kxnord %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfd,0x46,0xc8]
    188 ; X64-NEXT:    vpcompressw %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x63,0x07]
    189 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    190 ; X64-NEXT:    retq # encoding: [0xc3]
    191   call void @llvm.x86.avx512.mask.compress.store.w.512(i8* %addr, <32 x i16> %data, i32 -1)
    192   ret void
    193 }
    194 
    195 define void @test_mask_compress_store_b_512(i8* %addr, <64 x i8> %data, i64 %mask) {
    196 ; X86-LABEL: test_mask_compress_store_b_512:
    197 ; X86:       # %bb.0:
    198 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    199 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
    200 ; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
    201 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    202 ; X86-NEXT:    retl # encoding: [0xc3]
    203 ;
    204 ; X64-LABEL: test_mask_compress_store_b_512:
    205 ; X64:       # %bb.0:
    206 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
    207 ; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
    208 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    209 ; X64-NEXT:    retq # encoding: [0xc3]
    210   call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
    211   ret void
    212 }
    213 
    214 declare void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 %mask)
    215 
    216 define <64 x i8> @test_mask_compress_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
    217 ; X86-LABEL: test_mask_compress_b_512:
    218 ; X86:       # %bb.0:
    219 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
    220 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    221 ; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
    222 ; X86-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
    223 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    224 ; X86-NEXT:    retl # encoding: [0xc3]
    225 ;
    226 ; X64-LABEL: test_mask_compress_b_512:
    227 ; X64:       # %bb.0:
    228 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    229 ; X64-NEXT:    vpcompressb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0xc1]
    230 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    231 ; X64-NEXT:    retq # encoding: [0xc3]
    232   %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
    233   ret <64 x i8> %res
    234 }
    235 
    236 define <64 x i8> @test_maskz_compress_b_512(<64 x i8> %data, i64 %mask) {
    237 ; X86-LABEL: test_maskz_compress_b_512:
    238 ; X86:       # %bb.0:
    239 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
    240 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    241 ; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
    242 ; X86-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
    243 ; X86-NEXT:    retl # encoding: [0xc3]
    244 ;
    245 ; X64-LABEL: test_maskz_compress_b_512:
    246 ; X64:       # %bb.0:
    247 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    248 ; X64-NEXT:    vpcompressb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x63,0xc0]
    249 ; X64-NEXT:    retq # encoding: [0xc3]
    250   %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
    251   ret <64 x i8> %res
    252 }
    253 
    254 define <64 x i8> @test_compress_b_512(<64 x i8> %data) {
    255 ; CHECK-LABEL: test_compress_b_512:
    256 ; CHECK:       # %bb.0:
    257 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    258   %res = call <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
    259   ret <64 x i8> %res
    260 }
    261 
    262 declare <64 x i8> @llvm.x86.avx512.mask.compress.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
    263 
    264 define void @test_compress_store_b_512(i8* %addr, <64 x i8> %data) {
    265 ; X86-LABEL: test_compress_store_b_512:
    266 ; X86:       # %bb.0:
    267 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    268 ; X86-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
    269 ; X86-NEXT:    vpcompressb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x00]
    270 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    271 ; X86-NEXT:    retl # encoding: [0xc3]
    272 ;
    273 ; X64-LABEL: test_compress_store_b_512:
    274 ; X64:       # %bb.0:
    275 ; X64-NEXT:    kxnorq %k0, %k0, %k1 # encoding: [0xc4,0xe1,0xfc,0x46,0xc8]
    276 ; X64-NEXT:    vpcompressb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x63,0x07]
    277 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    278 ; X64-NEXT:    retq # encoding: [0xc3]
    279   call void @llvm.x86.avx512.mask.compress.store.b.512(i8* %addr, <64 x i8> %data, i64 -1)
    280   ret void
    281 }
    282 
    283 define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
    284 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
    285 ; X86:       # %bb.0:
    286 ; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
    287 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    288 ; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
    289 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    290 ; X86-NEXT:    retl # encoding: [0xc3]
    291 ;
    292 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
    293 ; X64:       # %bb.0:
    294 ; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
    295 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    296 ; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
    297 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    298 ; X64-NEXT:    retq # encoding: [0xc3]
    299   %res = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
    300   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
    301   %res2 = add <16 x i32> %res, %res1
    302   ret <16 x i32> %res2
    303 }
    304 declare <16 x i32> @llvm.x86.avx512.mask.vpshld.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
    305 
    306 define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    307 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
    308 ; X86:       # %bb.0:
    309 ; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
    310 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    311 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    312 ; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
    313 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    314 ; X86-NEXT:    retl # encoding: [0xc3]
    315 ;
    316 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
    317 ; X64:       # %bb.0:
    318 ; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
    319 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    320 ; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
    321 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    322 ; X64-NEXT:    retq # encoding: [0xc3]
    323   %res = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
    324   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
    325   %res2 = add <8 x i64> %res, %res1
    326   ret <8 x i64> %res2
    327 }
    328 declare <8 x i64> @llvm.x86.avx512.mask.vpshld.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
    329 
    330 define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
    331 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
    332 ; X86:       # %bb.0:
    333 ; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
    334 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    335 ; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
    336 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    337 ; X86-NEXT:    retl # encoding: [0xc3]
    338 ;
    339 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
    340 ; X64:       # %bb.0:
    341 ; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
    342 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    343 ; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
    344 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    345 ; X64-NEXT:    retq # encoding: [0xc3]
    346   %res = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
    347   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
    348   %res2 = add <32 x i16> %res, %res1
    349   ret <32 x i16> %res2
    350 }
    351 declare <32 x i16> @llvm.x86.avx512.mask.vpshld.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
    352 
    353 define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
    354 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
    355 ; X86:       # %bb.0:
    356 ; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
    357 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    358 ; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
    359 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    360 ; X86-NEXT:    retl # encoding: [0xc3]
    361 ;
    362 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
    363 ; X64:       # %bb.0:
    364 ; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
    365 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    366 ; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
    367 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    368 ; X64-NEXT:    retq # encoding: [0xc3]
    369   %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
    370   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
    371   %res2 = add <16 x i32> %res, %res1
    372   ret <16 x i32> %res2
    373 }
    374 declare <16 x i32> @llvm.x86.avx512.mask.vpshrd.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
    375 
    376 define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    377 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
    378 ; X86:       # %bb.0:
    379 ; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
    380 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    381 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    382 ; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
    383 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    384 ; X86-NEXT:    retl # encoding: [0xc3]
    385 ;
    386 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
    387 ; X64:       # %bb.0:
    388 ; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
    389 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    390 ; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
    391 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    392 ; X64-NEXT:    retq # encoding: [0xc3]
    393   %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
    394   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
    395   %res2 = add <8 x i64> %res, %res1
    396   ret <8 x i64> %res2
    397 }
    398 declare <8 x i64> @llvm.x86.avx512.mask.vpshrd.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
    399 
    400 define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
    401 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
    402 ; X86:       # %bb.0:
    403 ; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
    404 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    405 ; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
    406 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    407 ; X86-NEXT:    retl # encoding: [0xc3]
    408 ;
    409 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
    410 ; X64:       # %bb.0:
    411 ; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
    412 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    413 ; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
    414 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    415 ; X64-NEXT:    retq # encoding: [0xc3]
    416   %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 %x4)
    417   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22, <32 x i16> %x3, i32 -1)
    418   %res2 = add <32 x i16> %res, %res1
    419   ret <32 x i16> %res2
    420 }
    421 declare <32 x i16> @llvm.x86.avx512.mask.vpshrd.w.512(<32 x i16>, <32 x i16>, i32, <32 x i16>, i32)
    422