Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <32 x i16> @test_expand_w_512(<32 x i16> %data) {
      6 ; CHECK-LABEL: test_expand_w_512:
      7 ; CHECK:       # %bb.0:
      8 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
      9   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> undef, i32 -1)
     10   ret <32 x i16> %res
     11 }
     12 
     13 define <32 x i16> @test_mask_expand_w_512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask) {
     14 ; X86-LABEL: test_mask_expand_w_512:
     15 ; X86:       # %bb.0:
     16 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
     17 ; X86-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
     18 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     19 ; X86-NEXT:    retl # encoding: [0xc3]
     20 ;
     21 ; X64-LABEL: test_mask_expand_w_512:
     22 ; X64:       # %bb.0:
     23 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     24 ; X64-NEXT:    vpexpandw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x62,0xc8]
     25 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     26 ; X64-NEXT:    retq # encoding: [0xc3]
     27   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %passthru, i32 %mask)
     28   ret <32 x i16> %res
     29 }
     30 
     31 define <32 x i16> @test_maskz_expand_w_512(<32 x i16> %data, i32 %mask) {
     32 ; X86-LABEL: test_maskz_expand_w_512:
     33 ; X86:       # %bb.0:
     34 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
     35 ; X86-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
     36 ; X86-NEXT:    retl # encoding: [0xc3]
     37 ;
     38 ; X64-LABEL: test_maskz_expand_w_512:
     39 ; X64:       # %bb.0:
     40 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     41 ; X64-NEXT:    vpexpandw %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x62,0xc0]
     42 ; X64-NEXT:    retq # encoding: [0xc3]
     43   %res = call <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> zeroinitializer, i32 %mask)
     44   ret <32 x i16> %res
     45 }
     46 
     47 declare <32 x i16> @llvm.x86.avx512.mask.expand.w.512(<32 x i16> %data, <32 x i16> %src0, i32 %mask)
     48 
     49 define <64 x i8> @test_expand_b_512(<64 x i8> %data) {
     50 ; CHECK-LABEL: test_expand_b_512:
     51 ; CHECK:       # %bb.0:
     52 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     53   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> undef, i64 -1)
     54   ret <64 x i8> %res
     55 }
     56 
     57 define <64 x i8> @test_mask_expand_b_512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask) {
     58 ; X86-LABEL: test_mask_expand_b_512:
     59 ; X86:       # %bb.0:
     60 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
     61 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     62 ; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
     63 ; X86-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
     64 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     65 ; X86-NEXT:    retl # encoding: [0xc3]
     66 ;
     67 ; X64-LABEL: test_mask_expand_b_512:
     68 ; X64:       # %bb.0:
     69 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
     70 ; X64-NEXT:    vpexpandb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x62,0xc8]
     71 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     72 ; X64-NEXT:    retq # encoding: [0xc3]
     73   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %passthru, i64 %mask)
     74   ret <64 x i8> %res
     75 }
     76 
     77 define <64 x i8> @test_maskz_expand_b_512(<64 x i8> %data, i64 %mask) {
     78 ; X86-LABEL: test_maskz_expand_b_512:
     79 ; X86:       # %bb.0:
     80 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
     81 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     82 ; X86-NEXT:    kunpckdq %k0, %k1, %k1 # encoding: [0xc4,0xe1,0xf4,0x4b,0xc8]
     83 ; X86-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
     84 ; X86-NEXT:    retl # encoding: [0xc3]
     85 ;
     86 ; X64-LABEL: test_maskz_expand_b_512:
     87 ; X64:       # %bb.0:
     88 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
     89 ; X64-NEXT:    vpexpandb %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x62,0xc0]
     90 ; X64-NEXT:    retq # encoding: [0xc3]
     91   %res = call <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> zeroinitializer, i64 %mask)
     92   ret <64 x i8> %res
     93 }
     94 
     95 declare <64 x i8> @llvm.x86.avx512.mask.expand.b.512(<64 x i8> %data, <64 x i8> %src0, i64 %mask)
     96 
     97 define <16 x i32>@test_int_x86_avx512_mask_vpshld_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
     98 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
     99 ; X86:       # %bb.0:
    100 ; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
    101 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    102 ; X86-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
    103 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    104 ; X86-NEXT:    retl # encoding: [0xc3]
    105 ;
    106 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_d_512:
    107 ; X64:       # %bb.0:
    108 ; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x71,0xd9,0x16]
    109 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    110 ; X64-NEXT:    vpshldd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x71,0xd1,0x16]
    111 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    112 ; X64-NEXT:    retq # encoding: [0xc3]
    113   %1 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
    114   %2 = bitcast i16 %x4 to <16 x i1>
    115   %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
    116   %4 = call <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
    117   %res2 = add <16 x i32> %3, %4
    118   ret <16 x i32> %res2
    119 }
    120 declare <16 x i32> @llvm.x86.avx512.vpshld.d.512(<16 x i32>, <16 x i32>, i32)
    121 
    122 define <8 x i64>@test_int_x86_avx512_mask_vpshld_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    123 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
    124 ; X86:       # %bb.0:
    125 ; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
    126 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    127 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    128 ; X86-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
    129 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    130 ; X86-NEXT:    retl # encoding: [0xc3]
    131 ;
    132 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_q_512:
    133 ; X64:       # %bb.0:
    134 ; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x71,0xd9,0x16]
    135 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    136 ; X64-NEXT:    vpshldq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x71,0xd1,0x16]
    137 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    138 ; X64-NEXT:    retq # encoding: [0xc3]
    139   %1 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
    140   %2 = bitcast i8 %x4 to <8 x i1>
    141   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
    142   %4 = call <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
    143   %res2 = add <8 x i64> %3, %4
    144   ret <8 x i64> %res2
    145 }
    146 declare <8 x i64> @llvm.x86.avx512.vpshld.q.512(<8 x i64>, <8 x i64>, i32)
    147 
    148 define <32 x i16>@test_int_x86_avx512_mask_vpshld_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
    149 ; X86-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
    150 ; X86:       # %bb.0:
    151 ; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
    152 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    153 ; X86-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
    154 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    155 ; X86-NEXT:    retl # encoding: [0xc3]
    156 ;
    157 ; X64-LABEL: test_int_x86_avx512_mask_vpshld_w_512:
    158 ; X64:       # %bb.0:
    159 ; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x70,0xd9,0x16]
    160 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    161 ; X64-NEXT:    vpshldw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x70,0xd1,0x16]
    162 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    163 ; X64-NEXT:    retq # encoding: [0xc3]
    164   %1 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
    165   %2 = bitcast i32 %x4 to <32 x i1>
    166   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
    167   %4 = call <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
    168   %res2 = add <32 x i16> %3, %4
    169   ret <32 x i16> %res2
    170 }
    171 declare <32 x i16> @llvm.x86.avx512.vpshld.w.512(<32 x i16>, <32 x i16>, i32)
    172 
    173 define <16 x i32>@test_int_x86_avx512_mask_vpshrd_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
    174 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
    175 ; X86:       # %bb.0:
    176 ; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
    177 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    178 ; X86-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
    179 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    180 ; X86-NEXT:    retl # encoding: [0xc3]
    181 ;
    182 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_d_512:
    183 ; X64:       # %bb.0:
    184 ; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x73,0xd9,0x16]
    185 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    186 ; X64-NEXT:    vpshrdd $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x73,0xd1,0x16]
    187 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
    188 ; X64-NEXT:    retq # encoding: [0xc3]
    189   %1 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
    190   %2 = bitcast i16 %x4 to <16 x i1>
    191   %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x3
    192   %4 = call <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32> %x0, <16 x i32> %x1, i32 22)
    193   %res2 = add <16 x i32> %3, %4
    194   ret <16 x i32> %res2
    195 }
    196 declare <16 x i32> @llvm.x86.avx512.vpshrd.d.512(<16 x i32>, <16 x i32>, i32)
    197 
    198 define <8 x i64>@test_int_x86_avx512_mask_vpshrd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
    199 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
    200 ; X86:       # %bb.0:
    201 ; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
    202 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    203 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    204 ; X86-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
    205 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    206 ; X86-NEXT:    retl # encoding: [0xc3]
    207 ;
    208 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_q_512:
    209 ; X64:       # %bb.0:
    210 ; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x73,0xd9,0x16]
    211 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    212 ; X64-NEXT:    vpshrdq $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x73,0xd1,0x16]
    213 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
    214 ; X64-NEXT:    retq # encoding: [0xc3]
    215   %1 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
    216   %2 = bitcast i8 %x4 to <8 x i1>
    217   %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %x3
    218   %4 = call <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64> %x0, <8 x i64> %x1, i32 22)
    219   %res2 = add <8 x i64> %3, %4
    220   ret <8 x i64> %res2
    221 }
    222 declare <8 x i64> @llvm.x86.avx512.vpshrd.q.512(<8 x i64>, <8 x i64>, i32)
    223 
    224 define <32 x i16>@test_int_x86_avx512_mask_vpshrd_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x3, i32 %x4) {
    225 ; X86-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
    226 ; X86:       # %bb.0:
    227 ; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
    228 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    229 ; X86-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
    230 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    231 ; X86-NEXT:    retl # encoding: [0xc3]
    232 ;
    233 ; X64-LABEL: test_int_x86_avx512_mask_vpshrd_w_512:
    234 ; X64:       # %bb.0:
    235 ; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0x72,0xd9,0x16]
    236 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    237 ; X64-NEXT:    vpshrdw $22, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x72,0xd1,0x16]
    238 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    239 ; X64-NEXT:    retq # encoding: [0xc3]
    240   %1 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
    241   %2 = bitcast i32 %x4 to <32 x i1>
    242   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
    243   %4 = call <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 22)
    244   %res2 = add <32 x i16> %3, %4
    245   ret <32 x i16> %res2
    246 }
    247 declare <32 x i16> @llvm.x86.avx512.vpshrd.w.512(<32 x i16>, <32 x i16>, i32)
    248 
    249 declare <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    250 declare <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    251 
    252 define <16 x i32>@test_int_x86_avx512_mask_vpshrdv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
    253 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
    254 ; X86:       # %bb.0:
    255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    256 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    257 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    258 ; X86-NEXT:    vpshrdvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x18]
    259 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    260 ; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xe2]
    261 ; X86-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xc2]
    262 ; X86-NEXT:    vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
    263 ; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
    264 ; X86-NEXT:    retl # encoding: [0xc3]
    265 ;
    266 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_d_512:
    267 ; X64:       # %bb.0:
    268 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    269 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    270 ; X64-NEXT:    vpshrdvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x73,0x1f]
    271 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    272 ; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x73,0xe2]
    273 ; X64-NEXT:    vpshrdvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x73,0xc2]
    274 ; X64-NEXT:    vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
    275 ; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
    276 ; X64-NEXT:    retq # encoding: [0xc3]
    277   %x2 = load <16 x i32>, <16 x i32>* %x2p
    278   %res = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
    279   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
    280   %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshrdv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
    281   %res3 = add <16 x i32> %res, %res1
    282   %res4 = add <16 x i32> %res2, %res3
    283   ret <16 x i32> %res4
    284 }
    285 
    286 declare <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    287 declare <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    288 
    289 define <8 x i64>@test_int_x86_avx512_mask_vpshrdv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
    290 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
    291 ; X86:       # %bb.0:
    292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    293 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    294 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    295 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    296 ; X86-NEXT:    vpshrdvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x18]
    297 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    298 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xe2]
    299 ; X86-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xc2]
    300 ; X86-NEXT:    vpaddq %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc4]
    301 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    302 ; X86-NEXT:    retl # encoding: [0xc3]
    303 ;
    304 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_q_512:
    305 ; X64:       # %bb.0:
    306 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    307 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    308 ; X64-NEXT:    vpshrdvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x73,0x1f]
    309 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    310 ; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x73,0xe2]
    311 ; X64-NEXT:    vpshrdvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x73,0xc2]
    312 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    313 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    314 ; X64-NEXT:    retq # encoding: [0xc3]
    315   %x2 = load <8 x i64>, <8 x i64>* %x2p
    316   %res = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
    317   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1)
    318   %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshrdv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
    319   %res3 = add <8 x i64> %res, %res1
    320   %res4 = add <8 x i64> %res2, %res3
    321   ret <8 x i64> %res4
    322 }
    323 
    324 declare <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    325 declare <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    326 
    327 define <32 x i16>@test_int_x86_avx512_mask_vpshrdv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
    328 ; X86-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
    329 ; X86:       # %bb.0:
    330 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    331 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    332 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    333 ; X86-NEXT:    vpshrdvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x18]
    334 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    335 ; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xe2]
    336 ; X86-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2]
    337 ; X86-NEXT:    vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4]
    338 ; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    339 ; X86-NEXT:    retl # encoding: [0xc3]
    340 ;
    341 ; X64-LABEL: test_int_x86_avx512_mask_vpshrdv_w_512:
    342 ; X64:       # %bb.0:
    343 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    344 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    345 ; X64-NEXT:    vpshrdvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x72,0x1f]
    346 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    347 ; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x72,0xe2]
    348 ; X64-NEXT:    vpshrdvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x72,0xc2]
    349 ; X64-NEXT:    vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4]
    350 ; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    351 ; X64-NEXT:    retq # encoding: [0xc3]
    352   %x2 = load <32 x i16>, <32 x i16>* %x2p
    353   %res = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    354   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1)
    355   %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshrdv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
    356   %res3 = add <32 x i16> %res, %res1
    357   %res4 = add <32 x i16> %res2, %res3
    358   ret <32 x i16> %res4
    359 }
    360 
    361 declare <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    362 declare <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
    363 
    364 define <16 x i32>@test_int_x86_avx512_mask_vpshldv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
    365 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
    366 ; X86:       # %bb.0:
    367 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    368 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    369 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    370 ; X86-NEXT:    vpshldvd (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x18]
    371 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    372 ; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xe2]
    373 ; X86-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xc2]
    374 ; X86-NEXT:    vpaddd %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc4]
    375 ; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
    376 ; X86-NEXT:    retl # encoding: [0xc3]
    377 ;
    378 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_d_512:
    379 ; X64:       # %bb.0:
    380 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    381 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    382 ; X64-NEXT:    vpshldvd (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0x75,0x49,0x71,0x1f]
    383 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    384 ; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0x75,0x48,0x71,0xe2]
    385 ; X64-NEXT:    vpshldvd %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xc9,0x71,0xc2]
    386 ; X64-NEXT:    vpaddd %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0x5d,0x48,0xfe,0xc0]
    387 ; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
    388 ; X64-NEXT:    retq # encoding: [0xc3]
    389   %x2 = load <16 x i32>, <16 x i32>* %x2p
    390   %res = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
    391   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
    392   %res2 = call <16 x i32> @llvm.x86.avx512.maskz.vpshldv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16  %x3)
    393   %res3 = add <16 x i32> %res, %res1
    394   %res4 = add <16 x i32> %res2, %res3
    395   ret <16 x i32> %res4
    396 }
    397 
    398 declare <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    399 declare <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
    400 
    401 define <8 x i64>@test_int_x86_avx512_mask_vpshldv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64>* %x2p, <8 x i64> %x4, i8 %x3) {
    402 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
    403 ; X86:       # %bb.0:
    404 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    405 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    406 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    407 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    408 ; X86-NEXT:    vpshldvq (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x18]
    409 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    410 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xe2]
    411 ; X86-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xc2]
    412 ; X86-NEXT:    vpaddq %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc4]
    413 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    414 ; X86-NEXT:    retl # encoding: [0xc3]
    415 ;
    416 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_q_512:
    417 ; X64:       # %bb.0:
    418 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    419 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    420 ; X64-NEXT:    vpshldvq (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x71,0x1f]
    421 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    422 ; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm4 # encoding: [0x62,0xf2,0xf5,0x48,0x71,0xe2]
    423 ; X64-NEXT:    vpshldvq %zmm2, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x71,0xc2]
    424 ; X64-NEXT:    vpaddq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xd4,0xc0]
    425 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
    426 ; X64-NEXT:    retq # encoding: [0xc3]
    427   %x2 = load <8 x i64>, <8 x i64>* %x2p
    428   %res = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
    429   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8 -1)
    430   %res2 = call <8 x i64> @llvm.x86.avx512.maskz.vpshldv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x4, i8  %x3)
    431   %res3 = add <8 x i64> %res, %res1
    432   %res4 = add <8 x i64> %res2, %res3
    433   ret <8 x i64> %res4
    434 }
    435 
    436 declare <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    437 declare <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    438 
    439 define <32 x i16>@test_int_x86_avx512_mask_vpshldv_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16>* %x2p, <32 x i16> %x4, i32 %x3) {
    440 ; X86-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
    441 ; X86:       # %bb.0:
    442 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    443 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    444 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    445 ; X86-NEXT:    vpshldvw (%eax), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x18]
    446 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    447 ; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xe2]
    448 ; X86-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2]
    449 ; X86-NEXT:    vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4]
    450 ; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    451 ; X86-NEXT:    retl # encoding: [0xc3]
    452 ;
    453 ; X64-LABEL: test_int_x86_avx512_mask_vpshldv_w_512:
    454 ; X64:       # %bb.0:
    455 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    456 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
    457 ; X64-NEXT:    vpshldvw (%rdi), %zmm1, %zmm3 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x70,0x1f]
    458 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm4 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xe0]
    459 ; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x70,0xe2]
    460 ; X64-NEXT:    vpshldvw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf2,0xf5,0x48,0x70,0xc2]
    461 ; X64-NEXT:    vpaddw %zmm4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc4]
    462 ; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    463 ; X64-NEXT:    retq # encoding: [0xc3]
    464   %x2 = load <32 x i16>, <32 x i16>* %x2p
    465   %res = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    466   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32 -1)
    467   %res2 = call <32 x i16> @llvm.x86.avx512.maskz.vpshldv.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x4, i32  %x3)
    468   %res3 = add <32 x i16> %res, %res1
    469   %res4 = add <32 x i16> %res2, %res3
    470   ret <32 x i16> %res4
    471 }
    472 
    473