Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare i16 @llvm.x86.avx512.kunpck.bw(i16, i16) nounwind readnone
      6 
      7 define i16 @unpckbw_test(i16 %a0, i16 %a1) {
      8 ; X86-LABEL: unpckbw_test:
      9 ; X86:       ## %bb.0:
     10 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
     11 ; X86-NEXT:    kmovw %eax, %k0 ## encoding: [0xc5,0xf8,0x92,0xc0]
     12 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
     13 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
     14 ; X86-NEXT:    kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
     15 ; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
     16 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
     17 ; X86-NEXT:    retl ## encoding: [0xc3]
     18 ;
     19 ; X64-LABEL: unpckbw_test:
     20 ; X64:       ## %bb.0:
     21 ; X64-NEXT:    kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
     22 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
     23 ; X64-NEXT:    kunpckbw %k1, %k0, %k0 ## encoding: [0xc5,0xfd,0x4b,0xc1]
     24 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
     25 ; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
     26 ; X64-NEXT:    retq ## encoding: [0xc3]
     27   %res = call i16 @llvm.x86.avx512.kunpck.bw(i16 %a0, i16 %a1)
     28   ret i16 %res
     29 }
     30 
     31 define <16 x i32>@test_int_x86_avx512_mask_pbroadcastd_gpr_512(i32 %x0, <16 x i32> %x1, i16 %mask) {
     32 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
     33 ; X86:       ## %bb.0:
     34 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
     35 ; X86-NEXT:    vpbroadcastd %eax, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xc8]
     36 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
     37 ; X86-NEXT:    vpbroadcastd %eax, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc0]
     38 ; X86-NEXT:    vpbroadcastd %eax, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd0]
     39 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
     40 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
     41 ; X86-NEXT:    retl ## encoding: [0xc3]
     42 ;
     43 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastd_gpr_512:
     44 ; X64:       ## %bb.0:
     45 ; X64-NEXT:    vpbroadcastd %edi, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7c,0xcf]
     46 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
     47 ; X64-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7c,0xc7]
     48 ; X64-NEXT:    vpbroadcastd %edi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7c,0xd7]
     49 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
     50 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
     51 ; X64-NEXT:    retq ## encoding: [0xc3]
     52     %res = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 -1)
     53     %res1 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> %x1, i16 %mask)
     54     %res2 = call <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32 %x0, <16 x i32> zeroinitializer, i16 %mask)
     55     %res3 = add <16 x i32> %res, %res1
     56     %res4 = add <16 x i32> %res2, %res3
     57     ret <16 x i32> %res4
     58   }
     59 declare <16 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.512(i32, <16 x i32>, i16)
     60 
     61 
     62 define <8 x i64>@test_int_x86_avx512_mask_pbroadcastq_gpr_512(i64 %x0, <8 x i64> %x1, i8 %mask) {
     63 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
     64 ; X86:       ## %bb.0:
     65 ; X86-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
     66 ; X86-NEXT:    ## xmm1 = mem[0],zero
     67 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd1]
     68 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x0c]
     69 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
     70 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc1]
     71 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc9]
     72 ; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
     73 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
     74 ; X86-NEXT:    retl ## encoding: [0xc3]
     75 ;
     76 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcastq_gpr_512:
     77 ; X64:       ## %bb.0:
     78 ; X64-NEXT:    vpbroadcastq %rdi, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7c,0xcf]
     79 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
     80 ; X64-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x7c,0xc7]
     81 ; X64-NEXT:    vpbroadcastq %rdi, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7c,0xd7]
     82 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
     83 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
     84 ; X64-NEXT:    retq ## encoding: [0xc3]
     85    %res = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 -1)
     86    %res1 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> %x1,i8 %mask)
     87    %res2 = call <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64 %x0, <8 x i64> zeroinitializer,i8 %mask)
     88    %res3 = add <8 x i64> %res, %res1
     89    %res4 = add <8 x i64> %res2, %res3
     90    ret <8 x i64> %res4
     91 }
     92 declare <8 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.512(i64, <8 x i64>, i8)
     93 
     94 
     95 declare <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float>, <16 x float>, i16) nounwind readonly
     96 
     97 define <16 x float> @test_x86_vbroadcast_ss_ps_512(<4 x float> %a0, <16 x float> %a1, i16 %mask ) {
     98 ; X86-LABEL: test_x86_vbroadcast_ss_ps_512:
     99 ; X86:       ## %bb.0:
    100 ; X86-NEXT:    vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0]
    101 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    102 ; X86-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
    103 ; X86-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
    104 ; X86-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
    105 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    106 ; X86-NEXT:    retl ## encoding: [0xc3]
    107 ;
    108 ; X64-LABEL: test_x86_vbroadcast_ss_ps_512:
    109 ; X64:       ## %bb.0:
    110 ; X64-NEXT:    vbroadcastss %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0xd0]
    111 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    112 ; X64-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x18,0xc8]
    113 ; X64-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
    114 ; X64-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x18,0xc0]
    115 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    116 ; X64-NEXT:    retq ## encoding: [0xc3]
    117 
    118   %res = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 -1)
    119   %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> %a1, i16 %mask)
    120   %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.512(<4 x float> %a0, <16 x float> zeroinitializer, i16 %mask)
    121   %res3 = fadd <16 x float> %res, %res1
    122   %res4 = fadd <16 x float> %res2, %res3
    123   ret <16 x float> %res4
    124 }
    125 
    126 declare <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double>, <8 x double>, i8) nounwind readonly
    127 
    128 define <8 x double> @test_x86_vbroadcast_sd_pd_512(<2 x double> %a0, <8 x double> %a1, i8 %mask ) {
    129 ; X86-LABEL: test_x86_vbroadcast_sd_pd_512:
    130 ; X86:       ## %bb.0:
    131 ; X86-NEXT:    vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0]
    132 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
    133 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    134 ; X86-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
    135 ; X86-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
    136 ; X86-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
    137 ; X86-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
    138 ; X86-NEXT:    retl ## encoding: [0xc3]
    139 ;
    140 ; X64-LABEL: test_x86_vbroadcast_sd_pd_512:
    141 ; X64:       ## %bb.0:
    142 ; X64-NEXT:    vbroadcastsd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0xd0]
    143 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    144 ; X64-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x19,0xc8]
    145 ; X64-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
    146 ; X64-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x19,0xc0]
    147 ; X64-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
    148 ; X64-NEXT:    retq ## encoding: [0xc3]
    149 
    150   %res = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 -1)
    151   %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> %a1, i8 %mask)
    152   %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.512(<2 x double> %a0, <8 x double> zeroinitializer, i8 %mask)
    153   %res3 = fadd <8 x double> %res, %res1
    154   %res4 = fadd <8 x double> %res2, %res3
    155   ret <8 x double> %res4
    156 }
    157 
    158 declare <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32>, <16 x i32>, i16)
    159 
    160 define <16 x i32>@test_int_x86_avx512_pbroadcastd_512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask) {
    161 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_512:
    162 ; X86:       ## %bb.0:
    163 ; X86-NEXT:    vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0]
    164 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    165 ; X86-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
    166 ; X86-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
    167 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    168 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
    169 ; X86-NEXT:    retl ## encoding: [0xc3]
    170 ;
    171 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_512:
    172 ; X64:       ## %bb.0:
    173 ; X64-NEXT:    vpbroadcastd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x58,0xd0]
    174 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    175 ; X64-NEXT:    vpbroadcastd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x58,0xc8]
    176 ; X64-NEXT:    vpbroadcastd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x58,0xc0]
    177 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    178 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
    179 ; X64-NEXT:    retq ## encoding: [0xc3]
    180   %res = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 -1)
    181   %res1 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> %x1, i16 %mask)
    182   %res2 = call <16 x i32> @llvm.x86.avx512.pbroadcastd.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
    183   %res3 = add <16 x i32> %res, %res1
    184   %res4 = add <16 x i32> %res2, %res3
    185   ret <16 x i32> %res4
    186 }
    187 
    188 declare <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64>, <8 x i64>, i8)
    189 
    190 define <8 x i64>@test_int_x86_avx512_pbroadcastq_512(<2 x i64> %x0, <8 x i64> %x1, i8 %mask) {
    191 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_512:
    192 ; X86:       ## %bb.0:
    193 ; X86-NEXT:    vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0]
    194 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
    195 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    196 ; X86-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
    197 ; X86-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
    198 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    199 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    200 ; X86-NEXT:    retl ## encoding: [0xc3]
    201 ;
    202 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_512:
    203 ; X64:       ## %bb.0:
    204 ; X64-NEXT:    vpbroadcastq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd0]
    205 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    206 ; X64-NEXT:    vpbroadcastq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x59,0xc8]
    207 ; X64-NEXT:    vpbroadcastq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x59,0xc0]
    208 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    209 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
    210 ; X64-NEXT:    retq ## encoding: [0xc3]
    211   %res = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 -1)
    212   %res1 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> %x1,i8 %mask)
    213   %res2 = call <8 x i64> @llvm.x86.avx512.pbroadcastq.512(<2 x i64> %x0, <8 x i64> zeroinitializer,i8 %mask)
    214   %res3 = add <8 x i64> %res, %res1
    215   %res4 = add <8 x i64> %res2, %res3
    216   ret <8 x i64> %res4
    217 }
    218 
    219 declare <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float>, <16 x float>, i16)
    220 
    221 define <16 x float>@test_int_x86_avx512_mask_movsldup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
    222 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_512:
    223 ; X86:       ## %bb.0:
    224 ; X86-NEXT:    vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0]
    225 ; X86-NEXT:    ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    226 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    227 ; X86-NEXT:    vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
    228 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    229 ; X86-NEXT:    vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
    230 ; X86-NEXT:    vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
    231 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    232 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    233 ; X86-NEXT:    retl ## encoding: [0xc3]
    234 ;
    235 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_512:
    236 ; X64:       ## %bb.0:
    237 ; X64-NEXT:    vmovsldup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x12,0xd0]
    238 ; X64-NEXT:    ## zmm2 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    239 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    240 ; X64-NEXT:    vmovsldup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x12,0xc8]
    241 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    242 ; X64-NEXT:    vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
    243 ; X64-NEXT:    vmovsldup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x12,0xc0]
    244 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
    245 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    246 ; X64-NEXT:    retq ## encoding: [0xc3]
    247   %res = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
    248   %res1 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
    249   %res2 = call <16 x float> @llvm.x86.avx512.mask.movsldup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
    250   %res3 = fadd <16 x float> %res, %res1
    251   %res4 = fadd <16 x float> %res2, %res3
    252   ret <16 x float> %res4
    253 }
    254 
    255 declare <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float>, <16 x float>, i16)
    256 
    257 define <16 x float>@test_int_x86_avx512_mask_movshdup_512(<16 x float> %x0, <16 x float> %x1, i16 %x2) {
    258 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_512:
    259 ; X86:       ## %bb.0:
    260 ; X86-NEXT:    vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0]
    261 ; X86-NEXT:    ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    262 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    263 ; X86-NEXT:    vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
    264 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    265 ; X86-NEXT:    vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
    266 ; X86-NEXT:    vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
    267 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    268 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    269 ; X86-NEXT:    retl ## encoding: [0xc3]
    270 ;
    271 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_512:
    272 ; X64:       ## %bb.0:
    273 ; X64-NEXT:    vmovshdup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x16,0xd0]
    274 ; X64-NEXT:    ## zmm2 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    275 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    276 ; X64-NEXT:    vmovshdup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x16,0xc8]
    277 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    278 ; X64-NEXT:    vaddps %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xca]
    279 ; X64-NEXT:    vmovshdup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x16,0xc0]
    280 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
    281 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
    282 ; X64-NEXT:    retq ## encoding: [0xc3]
    283   %res = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 %x2)
    284   %res1 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> %x1, i16 -1)
    285   %res2 = call <16 x float> @llvm.x86.avx512.mask.movshdup.512(<16 x float> %x0, <16 x float> zeroinitializer, i16 %x2)
    286   %res3 = fadd <16 x float> %res, %res1
    287   %res4 = fadd <16 x float> %res2, %res3
    288   ret <16 x float> %res4
    289 }
    290 
    291 declare <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double>, <8 x double>, i8)
    292 
    293 define <8 x double>@test_int_x86_avx512_mask_movddup_512(<8 x double> %x0, <8 x double> %x1, i8 %x2) {
    294 ; X86-LABEL: test_int_x86_avx512_mask_movddup_512:
    295 ; X86:       ## %bb.0:
    296 ; X86-NEXT:    vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0]
    297 ; X86-NEXT:    ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
    298 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
    299 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    300 ; X86-NEXT:    vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
    301 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
    302 ; X86-NEXT:    vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
    303 ; X86-NEXT:    vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
    304 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
    305 ; X86-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
    306 ; X86-NEXT:    retl ## encoding: [0xc3]
    307 ;
    308 ; X64-LABEL: test_int_x86_avx512_mask_movddup_512:
    309 ; X64:       ## %bb.0:
    310 ; X64-NEXT:    vmovddup %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xff,0x48,0x12,0xd0]
    311 ; X64-NEXT:    ## zmm2 = zmm0[0,0,2,2,4,4,6,6]
    312 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    313 ; X64-NEXT:    vmovddup %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x49,0x12,0xc8]
    314 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
    315 ; X64-NEXT:    vaddpd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xca]
    316 ; X64-NEXT:    vmovddup %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xc9,0x12,0xc0]
    317 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
    318 ; X64-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
    319 ; X64-NEXT:    retq ## encoding: [0xc3]
    320   %res = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 %x2)
    321   %res1 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> %x1, i8 -1)
    322   %res2 = call <8 x double> @llvm.x86.avx512.mask.movddup.512(<8 x double> %x0, <8 x double> zeroinitializer, i8 %x2)
    323   %res3 = fadd <8 x double> %res, %res1
    324   %res4 = fadd <8 x double> %res2, %res3
    325   ret <8 x double> %res4
    326 }
    327 
    328 declare <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double>, i32, <8 x double>, i8)
    329 
    330 define <8 x double>@test_int_x86_avx512_mask_perm_df_512(<8 x double> %x0, i32 %x1, <8 x double> %x2, i8 %x3) {
    331 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_512:
    332 ; X86:       ## %bb.0:
    333 ; X86-NEXT:    vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03]
    334 ; X86-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
    335 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
    336 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    337 ; X86-NEXT:    vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
    338 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
    339 ; X86-NEXT:    vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
    340 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
    341 ; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    342 ; X86-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
    343 ; X86-NEXT:    retl ## encoding: [0xc3]
    344 ;
    345 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_512:
    346 ; X64:       ## %bb.0:
    347 ; X64-NEXT:    vpermpd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x01,0xd0,0x03]
    348 ; X64-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
    349 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    350 ; X64-NEXT:    vpermpd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x01,0xc8,0x03]
    351 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
    352 ; X64-NEXT:    vpermpd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x01,0xc0,0x03]
    353 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
    354 ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    355 ; X64-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
    356 ; X64-NEXT:    retq ## encoding: [0xc3]
    357   %res = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 %x3)
    358   %res1 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> zeroinitializer, i8 %x3)
    359   %res2 = call <8 x double> @llvm.x86.avx512.mask.perm.df.512(<8 x double> %x0, i32 3, <8 x double> %x2, i8 -1)
    360   %res3 = fadd <8 x double> %res, %res1
    361   %res4 = fadd <8 x double> %res3, %res2
    362   ret <8 x double> %res4
    363 }
    364 
    365 declare <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64>, i32, <8 x i64>, i8)
    366 
    367 define <8 x i64>@test_int_x86_avx512_mask_perm_di_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
    368 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_512:
    369 ; X86:       ## %bb.0:
    370 ; X86-NEXT:    vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03]
    371 ; X86-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
    372 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
    373 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    374 ; X86-NEXT:    vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
    375 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
    376 ; X86-NEXT:    vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
    377 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
    378 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
    379 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    380 ; X86-NEXT:    retl ## encoding: [0xc3]
    381 ;
    382 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_512:
    383 ; X64:       ## %bb.0:
    384 ; X64-NEXT:    vpermq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x00,0xd0,0x03]
    385 ; X64-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4]
    386 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    387 ; X64-NEXT:    vpermq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x00,0xc8,0x03]
    388 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4]
    389 ; X64-NEXT:    vpermq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x00,0xc0,0x03]
    390 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4]
    391 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
    392 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    393 ; X64-NEXT:    retq ## encoding: [0xc3]
    394   %res = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
    395   %res1 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
    396   %res2 = call <8 x i64> @llvm.x86.avx512.mask.perm.di.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
    397   %res3 = add <8 x i64> %res, %res1
    398   %res4 = add <8 x i64> %res3, %res2
    399   ret <8 x i64> %res4
    400 }
    401 
    402 define void @test_store1(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
    403 ; X86-LABEL: test_store1:
    404 ; X86:       ## %bb.0:
    405 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    406 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    407 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    408 ; X86-NEXT:    vmovups %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x01]
    409 ; X86-NEXT:    vmovups %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x00]
    410 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    411 ; X86-NEXT:    retl ## encoding: [0xc3]
    412 ;
    413 ; X64-LABEL: test_store1:
    414 ; X64:       ## %bb.0:
    415 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    416 ; X64-NEXT:    vmovups %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x11,0x07]
    417 ; X64-NEXT:    vmovups %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x11,0x06]
    418 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    419 ; X64-NEXT:    retq ## encoding: [0xc3]
    420   call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
    421   call void @llvm.x86.avx512.mask.storeu.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
    422   ret void
    423 }
    424 
    425 declare void @llvm.x86.avx512.mask.storeu.ps.512(i8*, <16 x float>, i16 )
    426 
    427 define void @test_store2(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
    428 ; X86-LABEL: test_store2:
    429 ; X86:       ## %bb.0:
    430 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    431 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    432 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    433 ; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    434 ; X86-NEXT:    vmovupd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x01]
    435 ; X86-NEXT:    vmovupd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x00]
    436 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    437 ; X86-NEXT:    retl ## encoding: [0xc3]
    438 ;
    439 ; X64-LABEL: test_store2:
    440 ; X64:       ## %bb.0:
    441 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    442 ; X64-NEXT:    vmovupd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x11,0x07]
    443 ; X64-NEXT:    vmovupd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x11,0x06]
    444 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    445 ; X64-NEXT:    retq ## encoding: [0xc3]
    446   call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
    447   call void @llvm.x86.avx512.mask.storeu.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
    448   ret void
    449 }
    450 
    451 declare void @llvm.x86.avx512.mask.storeu.pd.512(i8*, <8 x double>, i8)
    452 
    453 define void @test_mask_store_aligned_ps(<16 x float> %data, i8* %ptr, i8* %ptr2, i16 %mask) {
    454 ; X86-LABEL: test_mask_store_aligned_ps:
    455 ; X86:       ## %bb.0:
    456 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    457 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    458 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    459 ; X86-NEXT:    vmovaps %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x01]
    460 ; X86-NEXT:    vmovaps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x00]
    461 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    462 ; X86-NEXT:    retl ## encoding: [0xc3]
    463 ;
    464 ; X64-LABEL: test_mask_store_aligned_ps:
    465 ; X64:       ## %bb.0:
    466 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    467 ; X64-NEXT:    vmovaps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x29,0x07]
    468 ; X64-NEXT:    vmovaps %zmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x48,0x29,0x06]
    469 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    470 ; X64-NEXT:    retq ## encoding: [0xc3]
    471   call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr, <16 x float> %data, i16 %mask)
    472   call void @llvm.x86.avx512.mask.store.ps.512(i8* %ptr2, <16 x float> %data, i16 -1)
    473   ret void
    474 }
    475 
    476 declare void @llvm.x86.avx512.mask.store.ps.512(i8*, <16 x float>, i16 )
    477 
    478 define void @test_mask_store_aligned_pd(<8 x double> %data, i8* %ptr, i8* %ptr2, i8 %mask) {
    479 ; X86-LABEL: test_mask_store_aligned_pd:
    480 ; X86:       ## %bb.0:
    481 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    483 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    484 ; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    485 ; X86-NEXT:    vmovapd %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x01]
    486 ; X86-NEXT:    vmovapd %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x00]
    487 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    488 ; X86-NEXT:    retl ## encoding: [0xc3]
    489 ;
    490 ; X64-LABEL: test_mask_store_aligned_pd:
    491 ; X64:       ## %bb.0:
    492 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    493 ; X64-NEXT:    vmovapd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x29,0x07]
    494 ; X64-NEXT:    vmovapd %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x29,0x06]
    495 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    496 ; X64-NEXT:    retq ## encoding: [0xc3]
    497   call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr, <8 x double> %data, i8 %mask)
    498   call void @llvm.x86.avx512.mask.store.pd.512(i8* %ptr2, <8 x double> %data, i8 -1)
    499   ret void
    500 }
    501 
    502 declare void @llvm.x86.avx512.mask.store.pd.512(i8*, <8 x double>, i8)
    503 
    504 define void@test_int_x86_avx512_mask_storeu_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
    505 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_512:
    506 ; X86:       ## %bb.0:
    507 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    508 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    509 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    510 ; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    511 ; X86-NEXT:    vmovdqu64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x01]
    512 ; X86-NEXT:    vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
    513 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    514 ; X86-NEXT:    retl ## encoding: [0xc3]
    515 ;
    516 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_512:
    517 ; X64:       ## %bb.0:
    518 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    519 ; X64-NEXT:    vmovdqu64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x7f,0x07]
    520 ; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
    521 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    522 ; X64-NEXT:    retq ## encoding: [0xc3]
    523   call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
    524   call void @llvm.x86.avx512.mask.storeu.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
    525   ret void
    526 }
    527 
    528 declare void @llvm.x86.avx512.mask.storeu.q.512(i8*, <8 x i64>, i8)
    529 
    530 define void@test_int_x86_avx512_mask_storeu_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
    531 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_512:
    532 ; X86:       ## %bb.0:
    533 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    534 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    535 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    536 ; X86-NEXT:    vmovdqu32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x01]
    537 ; X86-NEXT:    vmovdqu64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
    538 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    539 ; X86-NEXT:    retl ## encoding: [0xc3]
    540 ;
    541 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_512:
    542 ; X64:       ## %bb.0:
    543 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    544 ; X64-NEXT:    vmovdqu32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7f,0x07]
    545 ; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
    546 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    547 ; X64-NEXT:    retq ## encoding: [0xc3]
    548   call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
    549   call void @llvm.x86.avx512.mask.storeu.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
    550   ret void
    551 }
    552 
    553 declare void @llvm.x86.avx512.mask.storeu.d.512(i8*, <16 x i32>, i16)
    554 
    555 define void@test_int_x86_avx512_mask_store_q_512(i8* %ptr1, i8* %ptr2, <8 x i64> %x1, i8 %x2) {
    556 ; X86-LABEL: test_int_x86_avx512_mask_store_q_512:
    557 ; X86:       ## %bb.0:
    558 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    559 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    560 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    561 ; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    562 ; X86-NEXT:    vmovdqa64 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x01]
    563 ; X86-NEXT:    vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
    564 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    565 ; X86-NEXT:    retl ## encoding: [0xc3]
    566 ;
    567 ; X64-LABEL: test_int_x86_avx512_mask_store_q_512:
    568 ; X64:       ## %bb.0:
    569 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    570 ; X64-NEXT:    vmovdqa64 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x7f,0x07]
    571 ; X64-NEXT:    vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
    572 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    573 ; X64-NEXT:    retq ## encoding: [0xc3]
    574   call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr1, <8 x i64> %x1, i8 %x2)
    575   call void @llvm.x86.avx512.mask.store.q.512(i8* %ptr2, <8 x i64> %x1, i8 -1)
    576   ret void
    577 }
    578 
    579 declare void @llvm.x86.avx512.mask.store.q.512(i8*, <8 x i64>, i8)
    580 
    581 define void@test_int_x86_avx512_mask_store_d_512(i8* %ptr1, i8* %ptr2, <16 x i32> %x1, i16 %x2) {
    582 ; X86-LABEL: test_int_x86_avx512_mask_store_d_512:
    583 ; X86:       ## %bb.0:
    584 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    585 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    586 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    587 ; X86-NEXT:    vmovdqa32 %zmm0, (%ecx) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x01]
    588 ; X86-NEXT:    vmovdqa64 %zmm0, (%eax) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x00]
    589 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    590 ; X86-NEXT:    retl ## encoding: [0xc3]
    591 ;
    592 ; X64-LABEL: test_int_x86_avx512_mask_store_d_512:
    593 ; X64:       ## %bb.0:
    594 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    595 ; X64-NEXT:    vmovdqa32 %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x7f,0x07]
    596 ; X64-NEXT:    vmovdqa64 %zmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x48,0x7f,0x06]
    597 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    598 ; X64-NEXT:    retq ## encoding: [0xc3]
    599   call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr1, <16 x i32> %x1, i16 %x2)
    600   call void @llvm.x86.avx512.mask.store.d.512(i8* %ptr2, <16 x i32> %x1, i16 -1)
    601   ret void
    602 }
    603 
    604 declare void @llvm.x86.avx512.mask.store.d.512(i8*, <16 x i32>, i16)
    605 
    606 define <16 x float> @test_mask_load_aligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
    607 ; X86-LABEL: test_mask_load_aligned_ps:
    608 ; X86:       ## %bb.0:
    609 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    610 ; X86-NEXT:    vmovaps (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x00]
    611 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    612 ; X86-NEXT:    vmovaps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x00]
    613 ; X86-NEXT:    vmovaps (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x08]
    614 ; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    615 ; X86-NEXT:    retl ## encoding: [0xc3]
    616 ;
    617 ; X64-LABEL: test_mask_load_aligned_ps:
    618 ; X64:       ## %bb.0:
    619 ; X64-NEXT:    vmovaps (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0x07]
    620 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    621 ; X64-NEXT:    vmovaps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0x07]
    622 ; X64-NEXT:    vmovaps (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0x0f]
    623 ; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    624 ; X64-NEXT:    retq ## encoding: [0xc3]
    625   %res = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
    626   %res1 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
    627   %res2 = call <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
    628   %res4 = fadd <16 x float> %res2, %res1
    629   ret <16 x float> %res4
    630 }
    631 
    632 declare <16 x float> @llvm.x86.avx512.mask.load.ps.512(i8*, <16 x float>, i16)
    633 
    634 define <16 x float> @test_mask_load_unaligned_ps(<16 x float> %data, i8* %ptr, i16 %mask) {
    635 ; X86-LABEL: test_mask_load_unaligned_ps:
    636 ; X86:       ## %bb.0:
    637 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    638 ; X86-NEXT:    vmovups (%eax), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x00]
    639 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    640 ; X86-NEXT:    vmovups (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x00]
    641 ; X86-NEXT:    vmovups (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x08]
    642 ; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    643 ; X86-NEXT:    retl ## encoding: [0xc3]
    644 ;
    645 ; X64-LABEL: test_mask_load_unaligned_ps:
    646 ; X64:       ## %bb.0:
    647 ; X64-NEXT:    vmovups (%rdi), %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x10,0x07]
    648 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    649 ; X64-NEXT:    vmovups (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x10,0x07]
    650 ; X64-NEXT:    vmovups (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x10,0x0f]
    651 ; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    652 ; X64-NEXT:    retq ## encoding: [0xc3]
    653   %res = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 -1)
    654   %res1 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> %res, i16 %mask)
    655   %res2 = call <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8* %ptr, <16 x float> zeroinitializer, i16 %mask)
    656   %res4 = fadd <16 x float> %res2, %res1
    657   ret <16 x float> %res4
    658 }
    659 
    660 declare <16 x float> @llvm.x86.avx512.mask.loadu.ps.512(i8*, <16 x float>, i16)
    661 
    662 define <8 x double> @test_mask_load_aligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
    663 ; X86-LABEL: test_mask_load_aligned_pd:
    664 ; X86:       ## %bb.0:
    665 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    666 ; X86-NEXT:    vmovapd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x00]
    667 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    668 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
    669 ; X86-NEXT:    vmovapd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x00]
    670 ; X86-NEXT:    vmovapd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x08]
    671 ; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    672 ; X86-NEXT:    retl ## encoding: [0xc3]
    673 ;
    674 ; X64-LABEL: test_mask_load_aligned_pd:
    675 ; X64:       ## %bb.0:
    676 ; X64-NEXT:    vmovapd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0x07]
    677 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    678 ; X64-NEXT:    vmovapd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x28,0x07]
    679 ; X64-NEXT:    vmovapd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x28,0x0f]
    680 ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    681 ; X64-NEXT:    retq ## encoding: [0xc3]
    682   %res = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
    683   %res1 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
    684   %res2 = call <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
    685   %res4 = fadd <8 x double> %res2, %res1
    686   ret <8 x double> %res4
    687 }
    688 
    689 declare <8 x double> @llvm.x86.avx512.mask.load.pd.512(i8*, <8 x double>, i8)
    690 
    691 define <8 x double> @test_mask_load_unaligned_pd(<8 x double> %data, i8* %ptr, i8 %mask) {
    692 ; X86-LABEL: test_mask_load_unaligned_pd:
    693 ; X86:       ## %bb.0:
    694 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    695 ; X86-NEXT:    vmovupd (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x00]
    696 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    697 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
    698 ; X86-NEXT:    vmovupd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x00]
    699 ; X86-NEXT:    vmovupd (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x08]
    700 ; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    701 ; X86-NEXT:    retl ## encoding: [0xc3]
    702 ;
    703 ; X64-LABEL: test_mask_load_unaligned_pd:
    704 ; X64:       ## %bb.0:
    705 ; X64-NEXT:    vmovupd (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x10,0x07]
    706 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    707 ; X64-NEXT:    vmovupd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x10,0x07]
    708 ; X64-NEXT:    vmovupd (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x10,0x0f]
    709 ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    710 ; X64-NEXT:    retq ## encoding: [0xc3]
    711   %res = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 -1)
    712   %res1 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> %res, i8 %mask)
    713   %res2 = call <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8* %ptr, <8 x double> zeroinitializer, i8 %mask)
    714   %res4 = fadd <8 x double> %res2, %res1
    715   ret <8 x double> %res4
    716 }
    717 
    718 declare <8 x double> @llvm.x86.avx512.mask.loadu.pd.512(i8*, <8 x double>, i8)
    719 
    720 declare <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8*, <16 x i32>, i16)
    721 
    722 define <16 x i32> @test_mask_load_unaligned_d(i8* %ptr, i8* %ptr2, <16 x i32> %data, i16 %mask) {
    723 ; X86-LABEL: test_mask_load_unaligned_d:
    724 ; X86:       ## %bb.0:
    725 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    726 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    727 ; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
    728 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    729 ; X86-NEXT:    vmovdqu32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x00]
    730 ; X86-NEXT:    vmovdqu32 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x09]
    731 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    732 ; X86-NEXT:    retl ## encoding: [0xc3]
    733 ;
    734 ; X64-LABEL: test_mask_load_unaligned_d:
    735 ; X64:       ## %bb.0:
    736 ; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
    737 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    738 ; X64-NEXT:    vmovdqu32 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x6f,0x06]
    739 ; X64-NEXT:    vmovdqu32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xc9,0x6f,0x0f]
    740 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    741 ; X64-NEXT:    retq ## encoding: [0xc3]
    742   %res = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
    743   %res1 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr2, <16 x i32> %res, i16 %mask)
    744   %res2 = call <16 x i32> @llvm.x86.avx512.mask.loadu.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
    745   %res4 = add <16 x i32> %res2, %res1
    746   ret <16 x i32> %res4
    747 }
    748 
    749 declare <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8*, <8 x i64>, i8)
    750 
    751 define <8 x i64> @test_mask_load_unaligned_q(i8* %ptr, i8* %ptr2, <8 x i64> %data, i8 %mask) {
    752 ; X86-LABEL: test_mask_load_unaligned_q:
    753 ; X86:       ## %bb.0:
    754 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x08]
    755 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x04]
    756 ; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
    757 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx ## encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    758 ; X86-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    759 ; X86-NEXT:    vmovdqu64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x00]
    760 ; X86-NEXT:    vmovdqu64 (%ecx), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x09]
    761 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    762 ; X86-NEXT:    retl ## encoding: [0xc3]
    763 ;
    764 ; X64-LABEL: test_mask_load_unaligned_q:
    765 ; X64:       ## %bb.0:
    766 ; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
    767 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    768 ; X64-NEXT:    vmovdqu64 (%rsi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x49,0x6f,0x06]
    769 ; X64-NEXT:    vmovdqu64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xc9,0x6f,0x0f]
    770 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    771 ; X64-NEXT:    retq ## encoding: [0xc3]
    772   %res = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
    773   %res1 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr2, <8 x i64> %res, i8 %mask)
    774   %res2 = call <8 x i64> @llvm.x86.avx512.mask.loadu.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
    775   %res4 = add <8 x i64> %res2, %res1
    776   ret <8 x i64> %res4
    777 }
    778 
    779 declare <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8*, <16 x i32>, i16)
    780 
    781 define <16 x i32> @test_mask_load_aligned_d(<16 x i32> %data, i8* %ptr, i16 %mask) {
    782 ; X86-LABEL: test_mask_load_aligned_d:
    783 ; X86:       ## %bb.0:
    784 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    785 ; X86-NEXT:    vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
    786 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    787 ; X86-NEXT:    vmovdqa32 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x00]
    788 ; X86-NEXT:    vmovdqa32 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x08]
    789 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    790 ; X86-NEXT:    retl ## encoding: [0xc3]
    791 ;
    792 ; X64-LABEL: test_mask_load_aligned_d:
    793 ; X64:       ## %bb.0:
    794 ; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
    795 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    796 ; X64-NEXT:    vmovdqa32 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0x07]
    797 ; X64-NEXT:    vmovdqa32 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0x0f]
    798 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    799 ; X64-NEXT:    retq ## encoding: [0xc3]
    800   %res = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 -1)
    801   %res1 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> %res, i16 %mask)
    802   %res2 = call <16 x i32> @llvm.x86.avx512.mask.load.d.512(i8* %ptr, <16 x i32> zeroinitializer, i16 %mask)
    803   %res4 = add <16 x i32> %res2, %res1
    804   ret <16 x i32> %res4
    805 }
    806 
    807 declare <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8*, <8 x i64>, i8)
    808 
    809 define <8 x i64> @test_mask_load_aligned_q(<8 x i64> %data, i8* %ptr, i8 %mask) {
    810 ; X86-LABEL: test_mask_load_aligned_q:
    811 ; X86:       ## %bb.0:
    812 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
    813 ; X86-NEXT:    vmovdqa64 (%eax), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x00]
    814 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
    815 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
    816 ; X86-NEXT:    vmovdqa64 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x00]
    817 ; X86-NEXT:    vmovdqa64 (%eax), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x08]
    818 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    819 ; X86-NEXT:    retl ## encoding: [0xc3]
    820 ;
    821 ; X64-LABEL: test_mask_load_aligned_q:
    822 ; X64:       ## %bb.0:
    823 ; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x07]
    824 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    825 ; X64-NEXT:    vmovdqa64 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6f,0x07]
    826 ; X64-NEXT:    vmovdqa64 (%rdi), %zmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6f,0x0f]
    827 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    828 ; X64-NEXT:    retq ## encoding: [0xc3]
    829   %res = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 -1)
    830   %res1 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> %res, i8 %mask)
    831   %res2 = call <8 x i64> @llvm.x86.avx512.mask.load.q.512(i8* %ptr, <8 x i64> zeroinitializer, i8 %mask)
    832   %res4 = add <8 x i64> %res2, %res1
    833   ret <8 x i64> %res4
    834 }
    835 
    836 declare <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double>, i32, <8 x double>, i8)
    837 
    838 define <8 x double>@test_int_x86_avx512_mask_vpermil_pd_512(<8 x double> %x0, <8 x double> %x2, i8 %x3) {
    839 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
    840 ; X86:       ## %bb.0:
    841 ; X86-NEXT:    vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16]
    842 ; X86-NEXT:    ## zmm2 = zmm0[0,1,3,2,5,4,6,6]
    843 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
    844 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
    845 ; X86-NEXT:    vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
    846 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
    847 ; X86-NEXT:    vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
    848 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
    849 ; X86-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    850 ; X86-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
    851 ; X86-NEXT:    retl ## encoding: [0xc3]
    852 ;
    853 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_512:
    854 ; X64:       ## %bb.0:
    855 ; X64-NEXT:    vpermilpd $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x05,0xd0,0x16]
    856 ; X64-NEXT:    ## zmm2 = zmm0[0,1,3,2,5,4,6,6]
    857 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    858 ; X64-NEXT:    vpermilpd $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x05,0xc8,0x16]
    859 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[0,1,3,2,5,4,6,6]
    860 ; X64-NEXT:    vpermilpd $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x05,0xc0,0x16]
    861 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0,1,3,2,5,4,6,6]
    862 ; X64-NEXT:    vaddpd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc0]
    863 ; X64-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
    864 ; X64-NEXT:    retq ## encoding: [0xc3]
    865   %res = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 %x3)
    866   %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> zeroinitializer, i8 %x3)
    867   %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermil.pd.512(<8 x double> %x0, i32 22, <8 x double> %x2, i8 -1)
    868   %res3 = fadd <8 x double> %res, %res1
    869   %res4 = fadd <8 x double> %res3, %res2
    870   ret <8 x double> %res4
    871 }
    872 
    873 declare <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float>, i32, <16 x float>, i16)
    874 
    875 define <16 x float>@test_int_x86_avx512_mask_vpermil_ps_512(<16 x float> %x0, <16 x float> %x2, i16 %x3) {
    876 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
    877 ; X86:       ## %bb.0:
    878 ; X86-NEXT:    vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16]
    879 ; X86-NEXT:    ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    880 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    881 ; X86-NEXT:    vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
    882 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    883 ; X86-NEXT:    vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
    884 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    885 ; X86-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    886 ; X86-NEXT:    vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
    887 ; X86-NEXT:    retl ## encoding: [0xc3]
    888 ;
    889 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_512:
    890 ; X64:       ## %bb.0:
    891 ; X64-NEXT:    vpermilps $22, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0x7d,0x48,0x04,0xd0,0x16]
    892 ; X64-NEXT:    ## zmm2 = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    893 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    894 ; X64-NEXT:    vpermilps $22, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x04,0xc8,0x16]
    895 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    896 ; X64-NEXT:    vpermilps $22, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x04,0xc0,0x16]
    897 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[2,1,1,0,6,5,5,4,10,9,9,8,14,13,13,12]
    898 ; X64-NEXT:    vaddps %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc0]
    899 ; X64-NEXT:    vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
    900 ; X64-NEXT:    retq ## encoding: [0xc3]
    901   %res = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 %x3)
    902   %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> zeroinitializer, i16 %x3)
    903   %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermil.ps.512(<16 x float> %x0, i32 22, <16 x float> %x2, i16 -1)
    904   %res3 = fadd <16 x float> %res, %res1
    905   %res4 = fadd <16 x float> %res3, %res2
    906   ret <16 x float> %res4
    907 }
    908 
    909 declare <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32>, i32, <16 x i32>, i16)
    910 
    911 define <16 x i32>@test_int_x86_avx512_mask_pshuf_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
    912 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
    913 ; X86:       ## %bb.0:
    914 ; X86-NEXT:    vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03]
    915 ; X86-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    916 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    917 ; X86-NEXT:    vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
    918 ; X86-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    919 ; X86-NEXT:    vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
    920 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    921 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
    922 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    923 ; X86-NEXT:    retl ## encoding: [0xc3]
    924 ;
    925 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_512:
    926 ; X64:       ## %bb.0:
    927 ; X64-NEXT:    vpshufd $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x7d,0x48,0x70,0xd0,0x03]
    928 ; X64-NEXT:    ## zmm2 = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    929 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    930 ; X64-NEXT:    vpshufd $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x70,0xc8,0x03]
    931 ; X64-NEXT:    ## zmm1 {%k1} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    932 ; X64-NEXT:    vpshufd $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x70,0xc0,0x03]
    933 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[3,0,0,0,7,4,4,4,11,8,8,8,15,12,12,12]
    934 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
    935 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
    936 ; X64-NEXT:    retq ## encoding: [0xc3]
    937 	%res = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
    938 	%res1 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
    939 	%res2 = call <16 x i32> @llvm.x86.avx512.mask.pshuf.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
    940 	%res3 = add <16 x i32> %res, %res1
    941 	%res4 = add <16 x i32> %res3, %res2
    942 	ret <16 x i32> %res4
    943 }
    944 
    945 define i16 @test_pcmpeq_d(<16 x i32> %a, <16 x i32> %b) {
    946 ; CHECK-LABEL: test_pcmpeq_d:
    947 ; CHECK:       ## %bb.0:
    948 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
    949 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    950 ; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
    951 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    952 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    953   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
    954   ret i16 %res
    955 }
    956 
    957 define i16 @test_mask_pcmpeq_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
    958 ; X86-LABEL: test_mask_pcmpeq_d:
    959 ; X86:       ## %bb.0:
    960 ; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
    961 ; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    962 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
    963 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
    964 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    965 ; X86-NEXT:    retl ## encoding: [0xc3]
    966 ;
    967 ; X64-LABEL: test_mask_pcmpeq_d:
    968 ; X64:       ## %bb.0:
    969 ; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
    970 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    971 ; X64-NEXT:    andl %edi, %eax ## encoding: [0x21,0xf8]
    972 ; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
    973 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    974 ; X64-NEXT:    retq ## encoding: [0xc3]
    975   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
    976   ret i16 %res
    977 }
    978 
    979 declare i16 @llvm.x86.avx512.mask.pcmpeq.d.512(<16 x i32>, <16 x i32>, i16)
    980 
    981 define i8 @test_pcmpeq_q(<8 x i64> %a, <8 x i64> %b) {
    982 ; CHECK-LABEL: test_pcmpeq_q:
    983 ; CHECK:       ## %bb.0:
    984 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
    985 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    986 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
    987 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
    988 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
    989   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
    990   ret i8 %res
    991 }
    992 
    993 define i8 @test_mask_pcmpeq_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
    994 ; X86-LABEL: test_mask_pcmpeq_q:
    995 ; X86:       ## %bb.0:
    996 ; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
    997 ; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    998 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
    999 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
   1000 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1001 ; X86-NEXT:    retl ## encoding: [0xc3]
   1002 ;
   1003 ; X64-LABEL: test_mask_pcmpeq_q:
   1004 ; X64:       ## %bb.0:
   1005 ; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
   1006 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1007 ; X64-NEXT:    andb %dil, %al ## encoding: [0x40,0x20,0xf8]
   1008 ; X64-NEXT:    ## kill: def $al killed $al killed $eax
   1009 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1010 ; X64-NEXT:    retq ## encoding: [0xc3]
   1011   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   1012   ret i8 %res
   1013 }
   1014 
   1015 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.512(<8 x i64>, <8 x i64>, i8)
   1016 
   1017 define i16 @test_pcmpgt_d(<16 x i32> %a, <16 x i32> %b) {
   1018 ; CHECK-LABEL: test_pcmpgt_d:
   1019 ; CHECK:       ## %bb.0:
   1020 ; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
   1021 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1022 ; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
   1023 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1024 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1025   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 -1)
   1026   ret i16 %res
   1027 }
   1028 
   1029 define i16 @test_mask_pcmpgt_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
   1030 ; X86-LABEL: test_mask_pcmpgt_d:
   1031 ; X86:       ## %bb.0:
   1032 ; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
   1033 ; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1034 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x04]
   1035 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   1036 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1037 ; X86-NEXT:    retl ## encoding: [0xc3]
   1038 ;
   1039 ; X64-LABEL: test_mask_pcmpgt_d:
   1040 ; X64:       ## %bb.0:
   1041 ; X64-NEXT:    vpcmpgtd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xc1]
   1042 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1043 ; X64-NEXT:    andl %edi, %eax ## encoding: [0x21,0xf8]
   1044 ; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
   1045 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1046 ; X64-NEXT:    retq ## encoding: [0xc3]
   1047   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32> %a, <16 x i32> %b, i16 %mask)
   1048   ret i16 %res
   1049 }
   1050 
   1051 declare i16 @llvm.x86.avx512.mask.pcmpgt.d.512(<16 x i32>, <16 x i32>, i16)
   1052 
   1053 define i8 @test_pcmpgt_q(<8 x i64> %a, <8 x i64> %b) {
   1054 ; CHECK-LABEL: test_pcmpgt_q:
   1055 ; CHECK:       ## %bb.0:
   1056 ; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
   1057 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1058 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
   1059 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1060 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1061   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 -1)
   1062   ret i8 %res
   1063 }
   1064 
   1065 define i8 @test_mask_pcmpgt_q(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
   1066 ; X86-LABEL: test_mask_pcmpgt_q:
   1067 ; X86:       ## %bb.0:
   1068 ; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
   1069 ; X86-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1070 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al ## encoding: [0x22,0x44,0x24,0x04]
   1071 ; X86-NEXT:    ## kill: def $al killed $al killed $eax
   1072 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1073 ; X86-NEXT:    retl ## encoding: [0xc3]
   1074 ;
   1075 ; X64-LABEL: test_mask_pcmpgt_q:
   1076 ; X64:       ## %bb.0:
   1077 ; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xc1]
   1078 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1079 ; X64-NEXT:    andb %dil, %al ## encoding: [0x40,0x20,0xf8]
   1080 ; X64-NEXT:    ## kill: def $al killed $al killed $eax
   1081 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1082 ; X64-NEXT:    retq ## encoding: [0xc3]
   1083   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64> %a, <8 x i64> %b, i8 %mask)
   1084   ret i8 %res
   1085 }
   1086 
   1087 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.512(<8 x i64>, <8 x i64>, i8)
   1088 
   1089 declare <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
   1090 
   1091 define <8 x double>@test_int_x86_avx512_mask_unpckh_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
   1092 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
   1093 ; X86:       ## %bb.0:
   1094 ; X86-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9]
   1095 ; X86-NEXT:    ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1096 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1097 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1098 ; X86-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
   1099 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1100 ; X86-NEXT:    vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
   1101 ; X86-NEXT:    retl ## encoding: [0xc3]
   1102 ;
   1103 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_512:
   1104 ; X64:       ## %bb.0:
   1105 ; X64-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x15,0xd9]
   1106 ; X64-NEXT:    ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1107 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1108 ; X64-NEXT:    vunpckhpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x15,0xd1]
   1109 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1110 ; X64-NEXT:    vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
   1111 ; X64-NEXT:    retq ## encoding: [0xc3]
   1112   %res = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
   1113   %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckh.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
   1114   %res2 = fadd <8 x double> %res, %res1
   1115   ret <8 x double> %res2
   1116 }
   1117 
   1118 declare <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1119 
   1120 define <16 x float>@test_int_x86_avx512_mask_unpckh_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
   1121 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
   1122 ; X86:       ## %bb.0:
   1123 ; X86-NEXT:    vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9]
   1124 ; X86-NEXT:    ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1125 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1126 ; X86-NEXT:    vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
   1127 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1128 ; X86-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   1129 ; X86-NEXT:    retl ## encoding: [0xc3]
   1130 ;
   1131 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_512:
   1132 ; X64:       ## %bb.0:
   1133 ; X64-NEXT:    vunpckhps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x15,0xd9]
   1134 ; X64-NEXT:    ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1135 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1136 ; X64-NEXT:    vunpckhps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x15,0xd1]
   1137 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1138 ; X64-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   1139 ; X64-NEXT:    retq ## encoding: [0xc3]
   1140   %res = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
   1141   %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckh.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
   1142   %res2 = fadd <16 x float> %res, %res1
   1143   ret <16 x float> %res2
   1144 }
   1145 
   1146 declare <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double>, <8 x double>, <8 x double>, i8)
   1147 
   1148 define <8 x double>@test_int_x86_avx512_mask_unpckl_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3) {
   1149 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
   1150 ; X86:       ## %bb.0:
   1151 ; X86-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9]
   1152 ; X86-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1153 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1154 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1155 ; X86-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
   1156 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1157 ; X86-NEXT:    vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
   1158 ; X86-NEXT:    retl ## encoding: [0xc3]
   1159 ;
   1160 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_512:
   1161 ; X64:       ## %bb.0:
   1162 ; X64-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x14,0xd9]
   1163 ; X64-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1164 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1165 ; X64-NEXT:    vunpcklpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x14,0xd1]
   1166 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1167 ; X64-NEXT:    vaddpd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc3]
   1168 ; X64-NEXT:    retq ## encoding: [0xc3]
   1169   %res = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
   1170   %res1 = call <8 x double> @llvm.x86.avx512.mask.unpckl.pd.512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, i8 -1)
   1171   %res2 = fadd <8 x double> %res, %res1
   1172   ret <8 x double> %res2
   1173 }
   1174 
   1175 declare <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float>, <16 x float>, <16 x float>, i16)
   1176 
   1177 define <16 x float>@test_int_x86_avx512_mask_unpckl_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
   1178 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
   1179 ; X86:       ## %bb.0:
   1180 ; X86-NEXT:    vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9]
   1181 ; X86-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1182 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1183 ; X86-NEXT:    vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
   1184 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1185 ; X86-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   1186 ; X86-NEXT:    retl ## encoding: [0xc3]
   1187 ;
   1188 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_512:
   1189 ; X64:       ## %bb.0:
   1190 ; X64-NEXT:    vunpcklps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x14,0xd9]
   1191 ; X64-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1192 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1193 ; X64-NEXT:    vunpcklps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x14,0xd1]
   1194 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1195 ; X64-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   1196 ; X64-NEXT:    retq ## encoding: [0xc3]
   1197   %res = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
   1198   %res1 = call <16 x float> @llvm.x86.avx512.mask.unpckl.ps.512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
   1199   %res2 = fadd <16 x float> %res, %res1
   1200   ret <16 x float> %res2
   1201 }
   1202 
   1203 declare <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   1204 
   1205 define <8 x i64>@test_int_x86_avx512_mask_punpcklqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   1206 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
   1207 ; X86:       ## %bb.0:
   1208 ; X86-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9]
   1209 ; X86-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1210 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1211 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1212 ; X86-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
   1213 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1214 ; X86-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
   1215 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1216 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
   1217 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   1218 ; X86-NEXT:    retl ## encoding: [0xc3]
   1219 ;
   1220 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_512:
   1221 ; X64:       ## %bb.0:
   1222 ; X64-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6c,0xd9]
   1223 ; X64-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1224 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1225 ; X64-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6c,0xd1]
   1226 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1227 ; X64-NEXT:    vpunpcklqdq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x6c,0xc1]
   1228 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1229 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
   1230 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   1231 ; X64-NEXT:    retq ## encoding: [0xc3]
   1232   %res = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   1233   %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   1234   %res2 = call <8 x i64> @llvm.x86.avx512.mask.punpcklqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer,i8 %x3)
   1235   %res3 = add <8 x i64> %res, %res1
   1236   %res4 = add <8 x i64> %res2, %res3
   1237   ret <8 x i64> %res4
   1238 }
   1239 
   1240 declare <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   1241 
   1242 define <8 x i64>@test_int_x86_avx512_mask_punpckhqd_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   1243 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
   1244 ; X86:       ## %bb.0:
   1245 ; X86-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9]
   1246 ; X86-NEXT:    ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1247 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1248 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1249 ; X86-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
   1250 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1251 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   1252 ; X86-NEXT:    retl ## encoding: [0xc3]
   1253 ;
   1254 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_512:
   1255 ; X64:       ## %bb.0:
   1256 ; X64-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6d,0xd9]
   1257 ; X64-NEXT:    ## zmm3 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1258 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1259 ; X64-NEXT:    vpunpckhqdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x6d,0xd1]
   1260 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   1261 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   1262 ; X64-NEXT:    retq ## encoding: [0xc3]
   1263   %res = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   1264   %res1 = call <8 x i64> @llvm.x86.avx512.mask.punpckhqd.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   1265   %res2 = add <8 x i64> %res, %res1
   1266   ret <8 x i64> %res2
   1267 }
   1268 
   1269 declare <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1270 
   1271 define <16 x i32>@test_int_x86_avx512_mask_punpckhd_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   1272 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
   1273 ; X86:       ## %bb.0:
   1274 ; X86-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9]
   1275 ; X86-NEXT:    ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1276 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1277 ; X86-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
   1278 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1279 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1280 ; X86-NEXT:    retl ## encoding: [0xc3]
   1281 ;
   1282 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_512:
   1283 ; X64:       ## %bb.0:
   1284 ; X64-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x6a,0xd9]
   1285 ; X64-NEXT:    ## zmm3 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1286 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1287 ; X64-NEXT:    vpunpckhdq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6a,0xd1]
   1288 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
   1289 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1290 ; X64-NEXT:    retq ## encoding: [0xc3]
   1291   %res = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   1292   %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckhd.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   1293   %res2 = add <16 x i32> %res, %res1
   1294   ret <16 x i32> %res2
   1295 }
   1296 
   1297 declare <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1298 
   1299 define <16 x i32>@test_int_x86_avx512_mask_punpckld_q_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   1300 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
   1301 ; X86:       ## %bb.0:
   1302 ; X86-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9]
   1303 ; X86-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1304 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1305 ; X86-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
   1306 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1307 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1308 ; X86-NEXT:    retl ## encoding: [0xc3]
   1309 ;
   1310 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_512:
   1311 ; X64:       ## %bb.0:
   1312 ; X64-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7d,0x48,0x62,0xd9]
   1313 ; X64-NEXT:    ## zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1314 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1315 ; X64-NEXT:    vpunpckldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x62,0xd1]
   1316 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
   1317 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1318 ; X64-NEXT:    retq ## encoding: [0xc3]
   1319   %res = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   1320   %res1 = call <16 x i32> @llvm.x86.avx512.mask.punpckld.q.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   1321   %res2 = add <16 x i32> %res, %res1
   1322   ret <16 x i32> %res2
   1323 }
   1324 
   1325 define <16 x i32> @test_x86_avx512_pslli_d(<16 x i32> %a0) {
   1326 ; CHECK-LABEL: test_x86_avx512_pslli_d:
   1327 ; CHECK:       ## %bb.0:
   1328 ; CHECK-NEXT:    vpslld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xf0,0x07]
   1329 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1330   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   1331   ret <16 x i32> %res
   1332 }
   1333 
   1334 define <16 x i32> @test_x86_avx512_mask_pslli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   1335 ; X86-LABEL: test_x86_avx512_mask_pslli_d:
   1336 ; X86:       ## %bb.0:
   1337 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1338 ; X86-NEXT:    vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
   1339 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1340 ; X86-NEXT:    retl ## encoding: [0xc3]
   1341 ;
   1342 ; X64-LABEL: test_x86_avx512_mask_pslli_d:
   1343 ; X64:       ## %bb.0:
   1344 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1345 ; X64-NEXT:    vpslld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x07]
   1346 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1347 ; X64-NEXT:    retq ## encoding: [0xc3]
   1348   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   1349   ret <16 x i32> %res
   1350 }
   1351 
   1352 define <16 x i32> @test_x86_avx512_maskz_pslli_d(<16 x i32> %a0, i16 %mask) {
   1353 ; X86-LABEL: test_x86_avx512_maskz_pslli_d:
   1354 ; X86:       ## %bb.0:
   1355 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1356 ; X86-NEXT:    vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
   1357 ; X86-NEXT:    retl ## encoding: [0xc3]
   1358 ;
   1359 ; X64-LABEL: test_x86_avx512_maskz_pslli_d:
   1360 ; X64:       ## %bb.0:
   1361 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1362 ; X64-NEXT:    vpslld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x07]
   1363 ; X64-NEXT:    retq ## encoding: [0xc3]
   1364   %res = call <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   1365   ret <16 x i32> %res
   1366 }
   1367 
   1368 declare <16 x i32> @llvm.x86.avx512.mask.pslli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
   1369 
   1370 define <8 x i64> @test_x86_avx512_pslli_q(<8 x i64> %a0) {
   1371 ; CHECK-LABEL: test_x86_avx512_pslli_q:
   1372 ; CHECK:       ## %bb.0:
   1373 ; CHECK-NEXT:    vpsllq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xf0,0x07]
   1374 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1375   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   1376   ret <8 x i64> %res
   1377 }
   1378 
   1379 define <8 x i64> @test_x86_avx512_mask_pslli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   1380 ; X86-LABEL: test_x86_avx512_mask_pslli_q:
   1381 ; X86:       ## %bb.0:
   1382 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1383 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1384 ; X86-NEXT:    vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
   1385 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1386 ; X86-NEXT:    retl ## encoding: [0xc3]
   1387 ;
   1388 ; X64-LABEL: test_x86_avx512_mask_pslli_q:
   1389 ; X64:       ## %bb.0:
   1390 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1391 ; X64-NEXT:    vpsllq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x07]
   1392 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1393 ; X64-NEXT:    retq ## encoding: [0xc3]
   1394   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   1395   ret <8 x i64> %res
   1396 }
   1397 
   1398 define <8 x i64> @test_x86_avx512_maskz_pslli_q(<8 x i64> %a0, i8 %mask) {
   1399 ; X86-LABEL: test_x86_avx512_maskz_pslli_q:
   1400 ; X86:       ## %bb.0:
   1401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1402 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1403 ; X86-NEXT:    vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
   1404 ; X86-NEXT:    retl ## encoding: [0xc3]
   1405 ;
   1406 ; X64-LABEL: test_x86_avx512_maskz_pslli_q:
   1407 ; X64:       ## %bb.0:
   1408 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1409 ; X64-NEXT:    vpsllq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x07]
   1410 ; X64-NEXT:    retq ## encoding: [0xc3]
   1411   %res = call <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   1412   ret <8 x i64> %res
   1413 }
   1414 
   1415 declare <8 x i64> @llvm.x86.avx512.mask.pslli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
   1416 
   1417 define <16 x i32> @test_x86_avx512_psrli_d(<16 x i32> %a0) {
   1418 ; CHECK-LABEL: test_x86_avx512_psrli_d:
   1419 ; CHECK:       ## %bb.0:
   1420 ; CHECK-NEXT:    vpsrld $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xd0,0x07]
   1421 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1422   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   1423   ret <16 x i32> %res
   1424 }
   1425 
   1426 define <16 x i32> @test_x86_avx512_mask_psrli_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   1427 ; X86-LABEL: test_x86_avx512_mask_psrli_d:
   1428 ; X86:       ## %bb.0:
   1429 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1430 ; X86-NEXT:    vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
   1431 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1432 ; X86-NEXT:    retl ## encoding: [0xc3]
   1433 ;
   1434 ; X64-LABEL: test_x86_avx512_mask_psrli_d:
   1435 ; X64:       ## %bb.0:
   1436 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1437 ; X64-NEXT:    vpsrld $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x07]
   1438 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1439 ; X64-NEXT:    retq ## encoding: [0xc3]
   1440   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   1441   ret <16 x i32> %res
   1442 }
   1443 
   1444 define <16 x i32> @test_x86_avx512_maskz_psrli_d(<16 x i32> %a0, i16 %mask) {
   1445 ; X86-LABEL: test_x86_avx512_maskz_psrli_d:
   1446 ; X86:       ## %bb.0:
   1447 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1448 ; X86-NEXT:    vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
   1449 ; X86-NEXT:    retl ## encoding: [0xc3]
   1450 ;
   1451 ; X64-LABEL: test_x86_avx512_maskz_psrli_d:
   1452 ; X64:       ## %bb.0:
   1453 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1454 ; X64-NEXT:    vpsrld $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x07]
   1455 ; X64-NEXT:    retq ## encoding: [0xc3]
   1456   %res = call <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   1457   ret <16 x i32> %res
   1458 }
   1459 
   1460 declare <16 x i32> @llvm.x86.avx512.mask.psrli.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
   1461 
   1462 define <8 x i64> @test_x86_avx512_psrli_q(<8 x i64> %a0) {
   1463 ; CHECK-LABEL: test_x86_avx512_psrli_q:
   1464 ; CHECK:       ## %bb.0:
   1465 ; CHECK-NEXT:    vpsrlq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x73,0xd0,0x07]
   1466 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1467   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   1468   ret <8 x i64> %res
   1469 }
   1470 
   1471 define <8 x i64> @test_x86_avx512_mask_psrli_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   1472 ; X86-LABEL: test_x86_avx512_mask_psrli_q:
   1473 ; X86:       ## %bb.0:
   1474 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1475 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1476 ; X86-NEXT:    vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
   1477 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1478 ; X86-NEXT:    retl ## encoding: [0xc3]
   1479 ;
   1480 ; X64-LABEL: test_x86_avx512_mask_psrli_q:
   1481 ; X64:       ## %bb.0:
   1482 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1483 ; X64-NEXT:    vpsrlq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x07]
   1484 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1485 ; X64-NEXT:    retq ## encoding: [0xc3]
   1486   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   1487   ret <8 x i64> %res
   1488 }
   1489 
   1490 define <8 x i64> @test_x86_avx512_maskz_psrli_q(<8 x i64> %a0, i8 %mask) {
   1491 ; X86-LABEL: test_x86_avx512_maskz_psrli_q:
   1492 ; X86:       ## %bb.0:
   1493 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1494 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1495 ; X86-NEXT:    vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
   1496 ; X86-NEXT:    retl ## encoding: [0xc3]
   1497 ;
   1498 ; X64-LABEL: test_x86_avx512_maskz_psrli_q:
   1499 ; X64:       ## %bb.0:
   1500 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1501 ; X64-NEXT:    vpsrlq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x07]
   1502 ; X64-NEXT:    retq ## encoding: [0xc3]
   1503   %res = call <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   1504   ret <8 x i64> %res
   1505 }
   1506 
   1507 declare <8 x i64> @llvm.x86.avx512.mask.psrli.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
   1508 
   1509 define <16 x i32> @test_x86_avx512_psrai_d(<16 x i32> %a0) {
   1510 ; CHECK-LABEL: test_x86_avx512_psrai_d:
   1511 ; CHECK:       ## %bb.0:
   1512 ; CHECK-NEXT:    vpsrad $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0x72,0xe0,0x07]
   1513 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1514   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 -1)
   1515   ret <16 x i32> %res
   1516 }
   1517 
   1518 define <16 x i32> @test_x86_avx512_mask_psrai_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   1519 ; X86-LABEL: test_x86_avx512_mask_psrai_d:
   1520 ; X86:       ## %bb.0:
   1521 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1522 ; X86-NEXT:    vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
   1523 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1524 ; X86-NEXT:    retl ## encoding: [0xc3]
   1525 ;
   1526 ; X64-LABEL: test_x86_avx512_mask_psrai_d:
   1527 ; X64:       ## %bb.0:
   1528 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1529 ; X64-NEXT:    vpsrad $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x07]
   1530 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1531 ; X64-NEXT:    retq ## encoding: [0xc3]
   1532   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> %a1, i16 %mask)
   1533   ret <16 x i32> %res
   1534 }
   1535 
   1536 define <16 x i32> @test_x86_avx512_maskz_psrai_d(<16 x i32> %a0, i16 %mask) {
   1537 ; X86-LABEL: test_x86_avx512_maskz_psrai_d:
   1538 ; X86:       ## %bb.0:
   1539 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1540 ; X86-NEXT:    vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
   1541 ; X86-NEXT:    retl ## encoding: [0xc3]
   1542 ;
   1543 ; X64-LABEL: test_x86_avx512_maskz_psrai_d:
   1544 ; X64:       ## %bb.0:
   1545 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1546 ; X64-NEXT:    vpsrad $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x07]
   1547 ; X64-NEXT:    retq ## encoding: [0xc3]
   1548   %res = call <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32> %a0, i32 7, <16 x i32> zeroinitializer, i16 %mask)
   1549   ret <16 x i32> %res
   1550 }
   1551 
   1552 declare <16 x i32> @llvm.x86.avx512.mask.psrai.d(<16 x i32>, i32, <16 x i32>, i16) nounwind readnone
   1553 
   1554 define <8 x i64> @test_x86_avx512_psrai_q(<8 x i64> %a0) {
   1555 ; CHECK-LABEL: test_x86_avx512_psrai_q:
   1556 ; CHECK:       ## %bb.0:
   1557 ; CHECK-NEXT:    vpsraq $7, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x72,0xe0,0x07]
   1558 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1559   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 -1)
   1560   ret <8 x i64> %res
   1561 }
   1562 
   1563 define <8 x i64> @test_x86_avx512_mask_psrai_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   1564 ; X86-LABEL: test_x86_avx512_mask_psrai_q:
   1565 ; X86:       ## %bb.0:
   1566 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1567 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1568 ; X86-NEXT:    vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
   1569 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1570 ; X86-NEXT:    retl ## encoding: [0xc3]
   1571 ;
   1572 ; X64-LABEL: test_x86_avx512_mask_psrai_q:
   1573 ; X64:       ## %bb.0:
   1574 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1575 ; X64-NEXT:    vpsraq $7, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x07]
   1576 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1577 ; X64-NEXT:    retq ## encoding: [0xc3]
   1578   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> %a1, i8 %mask)
   1579   ret <8 x i64> %res
   1580 }
   1581 
   1582 define <8 x i64> @test_x86_avx512_maskz_psrai_q(<8 x i64> %a0, i8 %mask) {
   1583 ; X86-LABEL: test_x86_avx512_maskz_psrai_q:
   1584 ; X86:       ## %bb.0:
   1585 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1586 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1587 ; X86-NEXT:    vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
   1588 ; X86-NEXT:    retl ## encoding: [0xc3]
   1589 ;
   1590 ; X64-LABEL: test_x86_avx512_maskz_psrai_q:
   1591 ; X64:       ## %bb.0:
   1592 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1593 ; X64-NEXT:    vpsraq $7, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x07]
   1594 ; X64-NEXT:    retq ## encoding: [0xc3]
   1595   %res = call <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64> %a0, i32 7, <8 x i64> zeroinitializer, i8 %mask)
   1596   ret <8 x i64> %res
   1597 }
   1598 
   1599 declare <8 x i64> @llvm.x86.avx512.mask.psrai.q(<8 x i64>, i32, <8 x i64>, i8) nounwind readnone
   1600 
   1601 declare void @llvm.x86.avx512.storent.q.512(i8*, <8 x i64>)
   1602 
   1603 define void@test_storent_q_512(<8 x i64> %data, i8* %ptr) {
   1604 ; X86-LABEL: test_storent_q_512:
   1605 ; X86:       ## %bb.0:
   1606 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1607 ; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
   1608 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1609 ; X86-NEXT:    retl ## encoding: [0xc3]
   1610 ;
   1611 ; X64-LABEL: test_storent_q_512:
   1612 ; X64:       ## %bb.0:
   1613 ; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
   1614 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1615 ; X64-NEXT:    retq ## encoding: [0xc3]
   1616   call void @llvm.x86.avx512.storent.q.512(i8* %ptr, <8 x i64> %data)
   1617   ret void
   1618 }
   1619 
   1620 declare void @llvm.x86.avx512.storent.pd.512(i8*, <8 x double>)
   1621 
   1622 define void @test_storent_pd_512(<8 x double> %data, i8* %ptr) {
   1623 ; X86-LABEL: test_storent_pd_512:
   1624 ; X86:       ## %bb.0:
   1625 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1626 ; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
   1627 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1628 ; X86-NEXT:    retl ## encoding: [0xc3]
   1629 ;
   1630 ; X64-LABEL: test_storent_pd_512:
   1631 ; X64:       ## %bb.0:
   1632 ; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
   1633 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1634 ; X64-NEXT:    retq ## encoding: [0xc3]
   1635   call void @llvm.x86.avx512.storent.pd.512(i8* %ptr, <8 x double> %data)
   1636   ret void
   1637 }
   1638 
   1639 declare void @llvm.x86.avx512.storent.ps.512(i8*, <16 x float>)
   1640 
   1641 define void @test_storent_ps_512(<16 x float> %data, i8* %ptr) {
   1642 ; X86-LABEL: test_storent_ps_512:
   1643 ; X86:       ## %bb.0:
   1644 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1645 ; X86-NEXT:    vmovntps %zmm0, (%eax) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x00]
   1646 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1647 ; X86-NEXT:    retl ## encoding: [0xc3]
   1648 ;
   1649 ; X64-LABEL: test_storent_ps_512:
   1650 ; X64:       ## %bb.0:
   1651 ; X64-NEXT:    vmovntps %zmm0, (%rdi) ## encoding: [0x62,0xf1,0x7c,0x48,0x2b,0x07]
   1652 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   1653 ; X64-NEXT:    retq ## encoding: [0xc3]
   1654   call void @llvm.x86.avx512.storent.ps.512(i8* %ptr, <16 x float> %data)
   1655   ret void
   1656 }
   1657 
   1658 define <16 x i32> @test_xor_epi32(<16 x i32> %a, <16 x i32> %b) {
   1659 ; CHECK-LABEL: test_xor_epi32:
   1660 ; CHECK:       ## %bb.0:
   1661 ; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
   1662 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1663   %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   1664   ret < 16 x i32> %res
   1665 }
   1666 
   1667 define <16 x i32> @test_mask_xor_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   1668 ; X86-LABEL: test_mask_xor_epi32:
   1669 ; X86:       ## %bb.0:
   1670 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1671 ; X86-NEXT:    vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
   1672 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1673 ; X86-NEXT:    retl ## encoding: [0xc3]
   1674 ;
   1675 ; X64-LABEL: test_mask_xor_epi32:
   1676 ; X64:       ## %bb.0:
   1677 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1678 ; X64-NEXT:    vpxord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xef,0xd1]
   1679 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1680 ; X64-NEXT:    retq ## encoding: [0xc3]
   1681   %res = call <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1682   ret < 16 x i32> %res
   1683 }
   1684 
   1685 declare <16 x i32> @llvm.x86.avx512.mask.pxor.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1686 
   1687 define <16 x i32> @test_or_epi32(<16 x i32> %a, <16 x i32> %b) {
   1688 ; CHECK-LABEL: test_or_epi32:
   1689 ; CHECK:       ## %bb.0:
   1690 ; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
   1691 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1692   %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   1693   ret < 16 x i32> %res
   1694 }
   1695 
   1696 define <16 x i32> @test_mask_or_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   1697 ; X86-LABEL: test_mask_or_epi32:
   1698 ; X86:       ## %bb.0:
   1699 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1700 ; X86-NEXT:    vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
   1701 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1702 ; X86-NEXT:    retl ## encoding: [0xc3]
   1703 ;
   1704 ; X64-LABEL: test_mask_or_epi32:
   1705 ; X64:       ## %bb.0:
   1706 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1707 ; X64-NEXT:    vpord %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xeb,0xd1]
   1708 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1709 ; X64-NEXT:    retq ## encoding: [0xc3]
   1710   %res = call <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1711   ret < 16 x i32> %res
   1712 }
   1713 
   1714 declare <16 x i32> @llvm.x86.avx512.mask.por.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1715 
   1716 define <16 x i32> @test_and_epi32(<16 x i32> %a, <16 x i32> %b) {
   1717 ; CHECK-LABEL: test_and_epi32:
   1718 ; CHECK:       ## %bb.0:
   1719 ; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
   1720 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1721   %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a,<16 x i32> %b, <16 x i32>zeroinitializer, i16 -1)
   1722   ret < 16 x i32> %res
   1723 }
   1724 
   1725 define <16 x i32> @test_mask_and_epi32(<16 x i32> %a,<16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   1726 ; X86-LABEL: test_mask_and_epi32:
   1727 ; X86:       ## %bb.0:
   1728 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1729 ; X86-NEXT:    vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
   1730 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1731 ; X86-NEXT:    retl ## encoding: [0xc3]
   1732 ;
   1733 ; X64-LABEL: test_mask_and_epi32:
   1734 ; X64:       ## %bb.0:
   1735 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1736 ; X64-NEXT:    vpandd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xdb,0xd1]
   1737 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1738 ; X64-NEXT:    retq ## encoding: [0xc3]
   1739   %res = call <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1740   ret < 16 x i32> %res
   1741 }
   1742 
   1743 declare <16 x i32> @llvm.x86.avx512.mask.pand.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1744 
   1745 define <8 x i64> @test_xor_epi64(<8 x i64> %a, <8 x i64> %b) {
   1746 ; CHECK-LABEL: test_xor_epi64:
   1747 ; CHECK:       ## %bb.0:
   1748 ; CHECK-NEXT:    vpxorq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xef,0xc1]
   1749 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1750   %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   1751   ret < 8 x i64> %res
   1752 }
   1753 
   1754 define <8 x i64> @test_mask_xor_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   1755 ; X86-LABEL: test_mask_xor_epi64:
   1756 ; X86:       ## %bb.0:
   1757 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1758 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1759 ; X86-NEXT:    vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
   1760 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1761 ; X86-NEXT:    retl ## encoding: [0xc3]
   1762 ;
   1763 ; X64-LABEL: test_mask_xor_epi64:
   1764 ; X64:       ## %bb.0:
   1765 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1766 ; X64-NEXT:    vpxorq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xef,0xd1]
   1767 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1768 ; X64-NEXT:    retq ## encoding: [0xc3]
   1769   %res = call <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   1770   ret < 8 x i64> %res
   1771 }
   1772 
   1773 declare <8 x i64> @llvm.x86.avx512.mask.pxor.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   1774 
   1775 define <8 x i64> @test_or_epi64(<8 x i64> %a, <8 x i64> %b) {
   1776 ; CHECK-LABEL: test_or_epi64:
   1777 ; CHECK:       ## %bb.0:
   1778 ; CHECK-NEXT:    vporq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xeb,0xc1]
   1779 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1780   %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   1781   ret < 8 x i64> %res
   1782 }
   1783 
   1784 define <8 x i64> @test_mask_or_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   1785 ; X86-LABEL: test_mask_or_epi64:
   1786 ; X86:       ## %bb.0:
   1787 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1788 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1789 ; X86-NEXT:    vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
   1790 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1791 ; X86-NEXT:    retl ## encoding: [0xc3]
   1792 ;
   1793 ; X64-LABEL: test_mask_or_epi64:
   1794 ; X64:       ## %bb.0:
   1795 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1796 ; X64-NEXT:    vporq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xeb,0xd1]
   1797 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1798 ; X64-NEXT:    retq ## encoding: [0xc3]
   1799   %res = call <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   1800   ret < 8 x i64> %res
   1801 }
   1802 
   1803 declare <8 x i64> @llvm.x86.avx512.mask.por.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   1804 
   1805 define <8 x i64> @test_and_epi64(<8 x i64> %a, <8 x i64> %b) {
   1806 ; CHECK-LABEL: test_and_epi64:
   1807 ; CHECK:       ## %bb.0:
   1808 ; CHECK-NEXT:    vpandq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xdb,0xc1]
   1809 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1810   %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a,<8 x i64> %b, <8 x i64>zeroinitializer, i8 -1)
   1811   ret < 8 x i64> %res
   1812 }
   1813 
   1814 define <8 x i64> @test_mask_and_epi64(<8 x i64> %a,<8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   1815 ; X86-LABEL: test_mask_and_epi64:
   1816 ; X86:       ## %bb.0:
   1817 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1818 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   1819 ; X86-NEXT:    vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
   1820 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1821 ; X86-NEXT:    retl ## encoding: [0xc3]
   1822 ;
   1823 ; X64-LABEL: test_mask_and_epi64:
   1824 ; X64:       ## %bb.0:
   1825 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1826 ; X64-NEXT:    vpandq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xdb,0xd1]
   1827 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1828 ; X64-NEXT:    retq ## encoding: [0xc3]
   1829   %res = call <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   1830   ret < 8 x i64> %res
   1831 }
   1832 
   1833 declare <8 x i64> @llvm.x86.avx512.mask.pand.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   1834 
   1835 define <16 x i32> @test_mask_add_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
   1836 ; CHECK-LABEL: test_mask_add_epi32_rr:
   1837 ; CHECK:       ## %bb.0:
   1838 ; CHECK-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
   1839 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1840   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   1841   ret < 16 x i32> %res
   1842 }
   1843 
   1844 define <16 x i32> @test_mask_add_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   1845 ; X86-LABEL: test_mask_add_epi32_rrk:
   1846 ; X86:       ## %bb.0:
   1847 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1848 ; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
   1849 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1850 ; X86-NEXT:    retl ## encoding: [0xc3]
   1851 ;
   1852 ; X64-LABEL: test_mask_add_epi32_rrk:
   1853 ; X64:       ## %bb.0:
   1854 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1855 ; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0xd1]
   1856 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1857 ; X64-NEXT:    retq ## encoding: [0xc3]
   1858   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1859   ret < 16 x i32> %res
   1860 }
   1861 
   1862 define <16 x i32> @test_mask_add_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
   1863 ; X86-LABEL: test_mask_add_epi32_rrkz:
   1864 ; X86:       ## %bb.0:
   1865 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1866 ; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
   1867 ; X86-NEXT:    retl ## encoding: [0xc3]
   1868 ;
   1869 ; X64-LABEL: test_mask_add_epi32_rrkz:
   1870 ; X64:       ## %bb.0:
   1871 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1872 ; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0xc1]
   1873 ; X64-NEXT:    retq ## encoding: [0xc3]
   1874   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   1875   ret < 16 x i32> %res
   1876 }
   1877 
   1878 define <16 x i32> @test_mask_add_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   1879 ; X86-LABEL: test_mask_add_epi32_rm:
   1880 ; X86:       ## %bb.0:
   1881 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1882 ; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x00]
   1883 ; X86-NEXT:    retl ## encoding: [0xc3]
   1884 ;
   1885 ; X64-LABEL: test_mask_add_epi32_rm:
   1886 ; X64:       ## %bb.0:
   1887 ; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0x07]
   1888 ; X64-NEXT:    retq ## encoding: [0xc3]
   1889   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1890   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   1891   ret < 16 x i32> %res
   1892 }
   1893 
   1894 define <16 x i32> @test_mask_add_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   1895 ; X86-LABEL: test_mask_add_epi32_rmk:
   1896 ; X86:       ## %bb.0:
   1897 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1898 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1899 ; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x08]
   1900 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1901 ; X86-NEXT:    retl ## encoding: [0xc3]
   1902 ;
   1903 ; X64-LABEL: test_mask_add_epi32_rmk:
   1904 ; X64:       ## %bb.0:
   1905 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1906 ; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfe,0x0f]
   1907 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1908 ; X64-NEXT:    retq ## encoding: [0xc3]
   1909   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1910   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1911   ret < 16 x i32> %res
   1912 }
   1913 
   1914 define <16 x i32> @test_mask_add_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
   1915 ; X86-LABEL: test_mask_add_epi32_rmkz:
   1916 ; X86:       ## %bb.0:
   1917 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1918 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1919 ; X86-NEXT:    vpaddd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x00]
   1920 ; X86-NEXT:    retl ## encoding: [0xc3]
   1921 ;
   1922 ; X64-LABEL: test_mask_add_epi32_rmkz:
   1923 ; X64:       ## %bb.0:
   1924 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1925 ; X64-NEXT:    vpaddd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfe,0x07]
   1926 ; X64-NEXT:    retq ## encoding: [0xc3]
   1927   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1928   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   1929   ret < 16 x i32> %res
   1930 }
   1931 
   1932 define <16 x i32> @test_mask_add_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
   1933 ; X86-LABEL: test_mask_add_epi32_rmb:
   1934 ; X86:       ## %bb.0:
   1935 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1936 ; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x00]
   1937 ; X86-NEXT:    retl ## encoding: [0xc3]
   1938 ;
   1939 ; X64-LABEL: test_mask_add_epi32_rmb:
   1940 ; X64:       ## %bb.0:
   1941 ; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfe,0x07]
   1942 ; X64-NEXT:    retq ## encoding: [0xc3]
   1943   %q = load i32, i32* %ptr_b
   1944   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1945   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1946   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   1947   ret < 16 x i32> %res
   1948 }
   1949 
   1950 define <16 x i32> @test_mask_add_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   1951 ; X86-LABEL: test_mask_add_epi32_rmbk:
   1952 ; X86:       ## %bb.0:
   1953 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1954 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1955 ; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x08]
   1956 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1957 ; X86-NEXT:    retl ## encoding: [0xc3]
   1958 ;
   1959 ; X64-LABEL: test_mask_add_epi32_rmbk:
   1960 ; X64:       ## %bb.0:
   1961 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1962 ; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfe,0x0f]
   1963 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1964 ; X64-NEXT:    retq ## encoding: [0xc3]
   1965   %q = load i32, i32* %ptr_b
   1966   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1967   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1968   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   1969   ret < 16 x i32> %res
   1970 }
   1971 
   1972 define <16 x i32> @test_mask_add_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
   1973 ; X86-LABEL: test_mask_add_epi32_rmbkz:
   1974 ; X86:       ## %bb.0:
   1975 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   1976 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1977 ; X86-NEXT:    vpaddd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x00]
   1978 ; X86-NEXT:    retl ## encoding: [0xc3]
   1979 ;
   1980 ; X64-LABEL: test_mask_add_epi32_rmbkz:
   1981 ; X64:       ## %bb.0:
   1982 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1983 ; X64-NEXT:    vpaddd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfe,0x07]
   1984 ; X64-NEXT:    retq ## encoding: [0xc3]
   1985   %q = load i32, i32* %ptr_b
   1986   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1987   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1988   %res = call <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   1989   ret < 16 x i32> %res
   1990 }
   1991 
   1992 declare <16 x i32> @llvm.x86.avx512.mask.padd.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   1993 
   1994 define <16 x i32> @test_mask_sub_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
   1995 ; CHECK-LABEL: test_mask_sub_epi32_rr:
   1996 ; CHECK:       ## %bb.0:
   1997 ; CHECK-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0xc1]
   1998 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   1999   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2000   ret < 16 x i32> %res
   2001 }
   2002 
   2003 define <16 x i32> @test_mask_sub_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   2004 ; X86-LABEL: test_mask_sub_epi32_rrk:
   2005 ; X86:       ## %bb.0:
   2006 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2007 ; X86-NEXT:    vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
   2008 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2009 ; X86-NEXT:    retl ## encoding: [0xc3]
   2010 ;
   2011 ; X64-LABEL: test_mask_sub_epi32_rrk:
   2012 ; X64:       ## %bb.0:
   2013 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2014 ; X64-NEXT:    vpsubd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0xd1]
   2015 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2016 ; X64-NEXT:    retq ## encoding: [0xc3]
   2017   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2018   ret < 16 x i32> %res
   2019 }
   2020 
   2021 define <16 x i32> @test_mask_sub_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
   2022 ; X86-LABEL: test_mask_sub_epi32_rrkz:
   2023 ; X86:       ## %bb.0:
   2024 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2025 ; X86-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
   2026 ; X86-NEXT:    retl ## encoding: [0xc3]
   2027 ;
   2028 ; X64-LABEL: test_mask_sub_epi32_rrkz:
   2029 ; X64:       ## %bb.0:
   2030 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2031 ; X64-NEXT:    vpsubd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0xc1]
   2032 ; X64-NEXT:    retq ## encoding: [0xc3]
   2033   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2034   ret < 16 x i32> %res
   2035 }
   2036 
   2037 define <16 x i32> @test_mask_sub_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   2038 ; X86-LABEL: test_mask_sub_epi32_rm:
   2039 ; X86:       ## %bb.0:
   2040 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2041 ; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x00]
   2042 ; X86-NEXT:    retl ## encoding: [0xc3]
   2043 ;
   2044 ; X64-LABEL: test_mask_sub_epi32_rm:
   2045 ; X64:       ## %bb.0:
   2046 ; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfa,0x07]
   2047 ; X64-NEXT:    retq ## encoding: [0xc3]
   2048   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2049   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2050   ret < 16 x i32> %res
   2051 }
   2052 
   2053 define <16 x i32> @test_mask_sub_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   2054 ; X86-LABEL: test_mask_sub_epi32_rmk:
   2055 ; X86:       ## %bb.0:
   2056 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2057 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2058 ; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x08]
   2059 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2060 ; X86-NEXT:    retl ## encoding: [0xc3]
   2061 ;
   2062 ; X64-LABEL: test_mask_sub_epi32_rmk:
   2063 ; X64:       ## %bb.0:
   2064 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2065 ; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfa,0x0f]
   2066 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2067 ; X64-NEXT:    retq ## encoding: [0xc3]
   2068   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2069   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2070   ret < 16 x i32> %res
   2071 }
   2072 
   2073 define <16 x i32> @test_mask_sub_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
   2074 ; X86-LABEL: test_mask_sub_epi32_rmkz:
   2075 ; X86:       ## %bb.0:
   2076 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2077 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2078 ; X86-NEXT:    vpsubd (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x00]
   2079 ; X86-NEXT:    retl ## encoding: [0xc3]
   2080 ;
   2081 ; X64-LABEL: test_mask_sub_epi32_rmkz:
   2082 ; X64:       ## %bb.0:
   2083 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2084 ; X64-NEXT:    vpsubd (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfa,0x07]
   2085 ; X64-NEXT:    retq ## encoding: [0xc3]
   2086   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2087   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2088   ret < 16 x i32> %res
   2089 }
   2090 
   2091 define <16 x i32> @test_mask_sub_epi32_rmb(<16 x i32> %a, i32* %ptr_b) {
   2092 ; X86-LABEL: test_mask_sub_epi32_rmb:
   2093 ; X86:       ## %bb.0:
   2094 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2095 ; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x00]
   2096 ; X86-NEXT:    retl ## encoding: [0xc3]
   2097 ;
   2098 ; X64-LABEL: test_mask_sub_epi32_rmb:
   2099 ; X64:       ## %bb.0:
   2100 ; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x58,0xfa,0x07]
   2101 ; X64-NEXT:    retq ## encoding: [0xc3]
   2102   %q = load i32, i32* %ptr_b
   2103   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2104   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2105   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2106   ret < 16 x i32> %res
   2107 }
   2108 
   2109 define <16 x i32> @test_mask_sub_epi32_rmbk(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   2110 ; X86-LABEL: test_mask_sub_epi32_rmbk:
   2111 ; X86:       ## %bb.0:
   2112 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2113 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2114 ; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x08]
   2115 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2116 ; X86-NEXT:    retl ## encoding: [0xc3]
   2117 ;
   2118 ; X64-LABEL: test_mask_sub_epi32_rmbk:
   2119 ; X64:       ## %bb.0:
   2120 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2121 ; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x59,0xfa,0x0f]
   2122 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2123 ; X64-NEXT:    retq ## encoding: [0xc3]
   2124   %q = load i32, i32* %ptr_b
   2125   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2126   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2127   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2128   ret < 16 x i32> %res
   2129 }
   2130 
   2131 define <16 x i32> @test_mask_sub_epi32_rmbkz(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
   2132 ; X86-LABEL: test_mask_sub_epi32_rmbkz:
   2133 ; X86:       ## %bb.0:
   2134 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2135 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2136 ; X86-NEXT:    vpsubd (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x00]
   2137 ; X86-NEXT:    retl ## encoding: [0xc3]
   2138 ;
   2139 ; X64-LABEL: test_mask_sub_epi32_rmbkz:
   2140 ; X64:       ## %bb.0:
   2141 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2142 ; X64-NEXT:    vpsubd (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xd9,0xfa,0x07]
   2143 ; X64-NEXT:    retq ## encoding: [0xc3]
   2144   %q = load i32, i32* %ptr_b
   2145   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2146   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2147   %res = call <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2148   ret < 16 x i32> %res
   2149 }
   2150 
   2151 declare <16 x i32> @llvm.x86.avx512.mask.psub.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2152 
   2153 define <8 x i64> @test_mask_add_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
   2154 ; CHECK-LABEL: test_mask_add_epi64_rr:
   2155 ; CHECK:       ## %bb.0:
   2156 ; CHECK-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
   2157 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   2158   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2159   ret < 8 x i64> %res
   2160 }
   2161 
   2162 define <8 x i64> @test_mask_add_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   2163 ; X86-LABEL: test_mask_add_epi64_rrk:
   2164 ; X86:       ## %bb.0:
   2165 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2166 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2167 ; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
   2168 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2169 ; X86-NEXT:    retl ## encoding: [0xc3]
   2170 ;
   2171 ; X64-LABEL: test_mask_add_epi64_rrk:
   2172 ; X64:       ## %bb.0:
   2173 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2174 ; X64-NEXT:    vpaddq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xd1]
   2175 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2176 ; X64-NEXT:    retq ## encoding: [0xc3]
   2177   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2178   ret < 8 x i64> %res
   2179 }
   2180 
   2181 define <8 x i64> @test_mask_add_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
   2182 ; X86-LABEL: test_mask_add_epi64_rrkz:
   2183 ; X86:       ## %bb.0:
   2184 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2185 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2186 ; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
   2187 ; X86-NEXT:    retl ## encoding: [0xc3]
   2188 ;
   2189 ; X64-LABEL: test_mask_add_epi64_rrkz:
   2190 ; X64:       ## %bb.0:
   2191 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2192 ; X64-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
   2193 ; X64-NEXT:    retq ## encoding: [0xc3]
   2194   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2195   ret < 8 x i64> %res
   2196 }
   2197 
   2198 define <8 x i64> @test_mask_add_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
   2199 ; X86-LABEL: test_mask_add_epi64_rm:
   2200 ; X86:       ## %bb.0:
   2201 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2202 ; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x00]
   2203 ; X86-NEXT:    retl ## encoding: [0xc3]
   2204 ;
   2205 ; X64-LABEL: test_mask_add_epi64_rm:
   2206 ; X64:       ## %bb.0:
   2207 ; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0x07]
   2208 ; X64-NEXT:    retq ## encoding: [0xc3]
   2209   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2210   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2211   ret < 8 x i64> %res
   2212 }
   2213 
   2214 define <8 x i64> @test_mask_add_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   2215 ; X86-LABEL: test_mask_add_epi64_rmk:
   2216 ; X86:       ## %bb.0:
   2217 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2218 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2219 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   2220 ; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x08]
   2221 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2222 ; X86-NEXT:    retl ## encoding: [0xc3]
   2223 ;
   2224 ; X64-LABEL: test_mask_add_epi64_rmk:
   2225 ; X64:       ## %bb.0:
   2226 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2227 ; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0x0f]
   2228 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2229 ; X64-NEXT:    retq ## encoding: [0xc3]
   2230   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2231   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2232   ret < 8 x i64> %res
   2233 }
   2234 
   2235 define <8 x i64> @test_mask_add_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
   2236 ; X86-LABEL: test_mask_add_epi64_rmkz:
   2237 ; X86:       ## %bb.0:
   2238 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2239 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2240 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   2241 ; X86-NEXT:    vpaddq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x00]
   2242 ; X86-NEXT:    retl ## encoding: [0xc3]
   2243 ;
   2244 ; X64-LABEL: test_mask_add_epi64_rmkz:
   2245 ; X64:       ## %bb.0:
   2246 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2247 ; X64-NEXT:    vpaddq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0x07]
   2248 ; X64-NEXT:    retq ## encoding: [0xc3]
   2249   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2250   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2251   ret < 8 x i64> %res
   2252 }
   2253 
   2254 define <8 x i64> @test_mask_add_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
   2255 ; X86-LABEL: test_mask_add_epi64_rmb:
   2256 ; X86:       ## %bb.0:
   2257 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2258 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2259 ; X86-NEXT:    ## xmm1 = mem[0],zero
   2260 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2261 ; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc1]
   2262 ; X86-NEXT:    retl ## encoding: [0xc3]
   2263 ;
   2264 ; X64-LABEL: test_mask_add_epi64_rmb:
   2265 ; X64:       ## %bb.0:
   2266 ; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xd4,0x07]
   2267 ; X64-NEXT:    retq ## encoding: [0xc3]
   2268   %q = load i64, i64* %ptr_b
   2269   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2270   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2271   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2272   ret < 8 x i64> %res
   2273 }
   2274 
   2275 define <8 x i64> @test_mask_add_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   2276 ; X86-LABEL: test_mask_add_epi64_rmbk:
   2277 ; X86:       ## %bb.0:
   2278 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2279 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   2280 ; X86-NEXT:    ## xmm2 = mem[0],zero
   2281 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   2282 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2283 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2284 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd4,0xca]
   2285 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2286 ; X86-NEXT:    retl ## encoding: [0xc3]
   2287 ;
   2288 ; X64-LABEL: test_mask_add_epi64_rmbk:
   2289 ; X64:       ## %bb.0:
   2290 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2291 ; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xd4,0x0f]
   2292 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2293 ; X64-NEXT:    retq ## encoding: [0xc3]
   2294   %q = load i64, i64* %ptr_b
   2295   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2296   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2297   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2298   ret < 8 x i64> %res
   2299 }
   2300 
   2301 define <8 x i64> @test_mask_add_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
   2302 ; X86-LABEL: test_mask_add_epi64_rmbkz:
   2303 ; X86:       ## %bb.0:
   2304 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2305 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2306 ; X86-NEXT:    ## xmm1 = mem[0],zero
   2307 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2308 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2309 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2310 ; X86-NEXT:    vpaddq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd4,0xc1]
   2311 ; X86-NEXT:    retl ## encoding: [0xc3]
   2312 ;
   2313 ; X64-LABEL: test_mask_add_epi64_rmbkz:
   2314 ; X64:       ## %bb.0:
   2315 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2316 ; X64-NEXT:    vpaddq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xd4,0x07]
   2317 ; X64-NEXT:    retq ## encoding: [0xc3]
   2318   %q = load i64, i64* %ptr_b
   2319   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2320   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2321   %res = call <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2322   ret < 8 x i64> %res
   2323 }
   2324 
   2325 declare <8 x i64> @llvm.x86.avx512.mask.padd.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2326 
   2327 define <8 x i64> @test_mask_sub_epi64_rr(<8 x i64> %a, <8 x i64> %b) {
   2328 ; CHECK-LABEL: test_mask_sub_epi64_rr:
   2329 ; CHECK:       ## %bb.0:
   2330 ; CHECK-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
   2331 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   2332   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2333   ret < 8 x i64> %res
   2334 }
   2335 
   2336 define <8 x i64> @test_mask_sub_epi64_rrk(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask) {
   2337 ; X86-LABEL: test_mask_sub_epi64_rrk:
   2338 ; X86:       ## %bb.0:
   2339 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2340 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2341 ; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
   2342 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2343 ; X86-NEXT:    retl ## encoding: [0xc3]
   2344 ;
   2345 ; X64-LABEL: test_mask_sub_epi64_rrk:
   2346 ; X64:       ## %bb.0:
   2347 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2348 ; X64-NEXT:    vpsubq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xd1]
   2349 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2350 ; X64-NEXT:    retq ## encoding: [0xc3]
   2351   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2352   ret < 8 x i64> %res
   2353 }
   2354 
   2355 define <8 x i64> @test_mask_sub_epi64_rrkz(<8 x i64> %a, <8 x i64> %b, i8 %mask) {
   2356 ; X86-LABEL: test_mask_sub_epi64_rrkz:
   2357 ; X86:       ## %bb.0:
   2358 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2359 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2360 ; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
   2361 ; X86-NEXT:    retl ## encoding: [0xc3]
   2362 ;
   2363 ; X64-LABEL: test_mask_sub_epi64_rrkz:
   2364 ; X64:       ## %bb.0:
   2365 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2366 ; X64-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
   2367 ; X64-NEXT:    retq ## encoding: [0xc3]
   2368   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2369   ret < 8 x i64> %res
   2370 }
   2371 
   2372 define <8 x i64> @test_mask_sub_epi64_rm(<8 x i64> %a, <8 x i64>* %ptr_b) {
   2373 ; X86-LABEL: test_mask_sub_epi64_rm:
   2374 ; X86:       ## %bb.0:
   2375 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2376 ; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x00]
   2377 ; X86-NEXT:    retl ## encoding: [0xc3]
   2378 ;
   2379 ; X64-LABEL: test_mask_sub_epi64_rm:
   2380 ; X64:       ## %bb.0:
   2381 ; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0x07]
   2382 ; X64-NEXT:    retq ## encoding: [0xc3]
   2383   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2384   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2385   ret < 8 x i64> %res
   2386 }
   2387 
   2388 define <8 x i64> @test_mask_sub_epi64_rmk(<8 x i64> %a, <8 x i64>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   2389 ; X86-LABEL: test_mask_sub_epi64_rmk:
   2390 ; X86:       ## %bb.0:
   2391 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2392 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2393 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   2394 ; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x08]
   2395 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2396 ; X86-NEXT:    retl ## encoding: [0xc3]
   2397 ;
   2398 ; X64-LABEL: test_mask_sub_epi64_rmk:
   2399 ; X64:       ## %bb.0:
   2400 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2401 ; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0x0f]
   2402 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2403 ; X64-NEXT:    retq ## encoding: [0xc3]
   2404   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2405   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2406   ret < 8 x i64> %res
   2407 }
   2408 
   2409 define <8 x i64> @test_mask_sub_epi64_rmkz(<8 x i64> %a, <8 x i64>* %ptr_b, i8 %mask) {
   2410 ; X86-LABEL: test_mask_sub_epi64_rmkz:
   2411 ; X86:       ## %bb.0:
   2412 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2413 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2414 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   2415 ; X86-NEXT:    vpsubq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x00]
   2416 ; X86-NEXT:    retl ## encoding: [0xc3]
   2417 ;
   2418 ; X64-LABEL: test_mask_sub_epi64_rmkz:
   2419 ; X64:       ## %bb.0:
   2420 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2421 ; X64-NEXT:    vpsubq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0x07]
   2422 ; X64-NEXT:    retq ## encoding: [0xc3]
   2423   %b = load <8 x i64>, <8 x i64>* %ptr_b
   2424   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2425   ret < 8 x i64> %res
   2426 }
   2427 
   2428 define <8 x i64> @test_mask_sub_epi64_rmb(<8 x i64> %a, i64* %ptr_b) {
   2429 ; X86-LABEL: test_mask_sub_epi64_rmb:
   2430 ; X86:       ## %bb.0:
   2431 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2432 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2433 ; X86-NEXT:    ## xmm1 = mem[0],zero
   2434 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2435 ; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xfb,0xc1]
   2436 ; X86-NEXT:    retl ## encoding: [0xc3]
   2437 ;
   2438 ; X64-LABEL: test_mask_sub_epi64_rmb:
   2439 ; X64:       ## %bb.0:
   2440 ; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xfb,0x07]
   2441 ; X64-NEXT:    retq ## encoding: [0xc3]
   2442   %q = load i64, i64* %ptr_b
   2443   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2444   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2445   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   2446   ret < 8 x i64> %res
   2447 }
   2448 
   2449 define <8 x i64> @test_mask_sub_epi64_rmbk(<8 x i64> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   2450 ; X86-LABEL: test_mask_sub_epi64_rmbk:
   2451 ; X86:       ## %bb.0:
   2452 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2453 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   2454 ; X86-NEXT:    ## xmm2 = mem[0],zero
   2455 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   2456 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2457 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2458 ; X86-NEXT:    vpsubq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xfb,0xca]
   2459 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2460 ; X86-NEXT:    retl ## encoding: [0xc3]
   2461 ;
   2462 ; X64-LABEL: test_mask_sub_epi64_rmbk:
   2463 ; X64:       ## %bb.0:
   2464 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2465 ; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xfb,0x0f]
   2466 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2467 ; X64-NEXT:    retq ## encoding: [0xc3]
   2468   %q = load i64, i64* %ptr_b
   2469   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2470   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2471   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> %passThru, i8 %mask)
   2472   ret < 8 x i64> %res
   2473 }
   2474 
   2475 define <8 x i64> @test_mask_sub_epi64_rmbkz(<8 x i64> %a, i64* %ptr_b, i8 %mask) {
   2476 ; X86-LABEL: test_mask_sub_epi64_rmbkz:
   2477 ; X86:       ## %bb.0:
   2478 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2479 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   2480 ; X86-NEXT:    ## xmm1 = mem[0],zero
   2481 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   2482 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2483 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2484 ; X86-NEXT:    vpsubq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xfb,0xc1]
   2485 ; X86-NEXT:    retl ## encoding: [0xc3]
   2486 ;
   2487 ; X64-LABEL: test_mask_sub_epi64_rmbkz:
   2488 ; X64:       ## %bb.0:
   2489 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2490 ; X64-NEXT:    vpsubq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xfb,0x07]
   2491 ; X64-NEXT:    retq ## encoding: [0xc3]
   2492   %q = load i64, i64* %ptr_b
   2493   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   2494   %b = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   2495   %res = call <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64> %a, <8 x i64> %b, <8 x i64> zeroinitializer, i8 %mask)
   2496   ret < 8 x i64> %res
   2497 }
   2498 
   2499 declare <8 x i64> @llvm.x86.avx512.mask.psub.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2500 
   2501 define <16 x i32> @test_mask_mullo_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
   2502 ; CHECK-LABEL: test_mask_mullo_epi32_rr_512:
   2503 ; CHECK:       ## %bb.0:
   2504 ; CHECK-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0xc1]
   2505 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   2506   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2507   ret <16 x i32> %res
   2508 }
   2509 
   2510 define <16 x i32> @test_mask_mullo_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask) {
   2511 ; X86-LABEL: test_mask_mullo_epi32_rrk_512:
   2512 ; X86:       ## %bb.0:
   2513 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2514 ; X86-NEXT:    vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
   2515 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2516 ; X86-NEXT:    retl ## encoding: [0xc3]
   2517 ;
   2518 ; X64-LABEL: test_mask_mullo_epi32_rrk_512:
   2519 ; X64:       ## %bb.0:
   2520 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2521 ; X64-NEXT:    vpmulld %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0xd1]
   2522 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   2523 ; X64-NEXT:    retq ## encoding: [0xc3]
   2524   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2525   ret < 16 x i32> %res
   2526 }
   2527 
   2528 define <16 x i32> @test_mask_mullo_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
   2529 ; X86-LABEL: test_mask_mullo_epi32_rrkz_512:
   2530 ; X86:       ## %bb.0:
   2531 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2532 ; X86-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
   2533 ; X86-NEXT:    retl ## encoding: [0xc3]
   2534 ;
   2535 ; X64-LABEL: test_mask_mullo_epi32_rrkz_512:
   2536 ; X64:       ## %bb.0:
   2537 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2538 ; X64-NEXT:    vpmulld %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0xc1]
   2539 ; X64-NEXT:    retq ## encoding: [0xc3]
   2540   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2541   ret < 16 x i32> %res
   2542 }
   2543 
   2544 define <16 x i32> @test_mask_mullo_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
   2545 ; X86-LABEL: test_mask_mullo_epi32_rm_512:
   2546 ; X86:       ## %bb.0:
   2547 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2548 ; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x00]
   2549 ; X86-NEXT:    retl ## encoding: [0xc3]
   2550 ;
   2551 ; X64-LABEL: test_mask_mullo_epi32_rm_512:
   2552 ; X64:       ## %bb.0:
   2553 ; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x40,0x07]
   2554 ; X64-NEXT:    retq ## encoding: [0xc3]
   2555   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2556   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2557   ret < 16 x i32> %res
   2558 }
   2559 
   2560 define <16 x i32> @test_mask_mullo_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   2561 ; X86-LABEL: test_mask_mullo_epi32_rmk_512:
   2562 ; X86:       ## %bb.0:
   2563 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2564 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2565 ; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x08]
   2566 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2567 ; X86-NEXT:    retl ## encoding: [0xc3]
   2568 ;
   2569 ; X64-LABEL: test_mask_mullo_epi32_rmk_512:
   2570 ; X64:       ## %bb.0:
   2571 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2572 ; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x40,0x0f]
   2573 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2574 ; X64-NEXT:    retq ## encoding: [0xc3]
   2575   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2576   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2577   ret < 16 x i32> %res
   2578 }
   2579 
   2580 define <16 x i32> @test_mask_mullo_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i16 %mask) {
   2581 ; X86-LABEL: test_mask_mullo_epi32_rmkz_512:
   2582 ; X86:       ## %bb.0:
   2583 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2584 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2585 ; X86-NEXT:    vpmulld (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x00]
   2586 ; X86-NEXT:    retl ## encoding: [0xc3]
   2587 ;
   2588 ; X64-LABEL: test_mask_mullo_epi32_rmkz_512:
   2589 ; X64:       ## %bb.0:
   2590 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2591 ; X64-NEXT:    vpmulld (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x40,0x07]
   2592 ; X64-NEXT:    retq ## encoding: [0xc3]
   2593   %b = load <16 x i32>, <16 x i32>* %ptr_b
   2594   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2595   ret < 16 x i32> %res
   2596 }
   2597 
   2598 define <16 x i32> @test_mask_mullo_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
   2599 ; X86-LABEL: test_mask_mullo_epi32_rmb_512:
   2600 ; X86:       ## %bb.0:
   2601 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2602 ; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x00]
   2603 ; X86-NEXT:    retl ## encoding: [0xc3]
   2604 ;
   2605 ; X64-LABEL: test_mask_mullo_epi32_rmb_512:
   2606 ; X64:       ## %bb.0:
   2607 ; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x58,0x40,0x07]
   2608 ; X64-NEXT:    retq ## encoding: [0xc3]
   2609   %q = load i32, i32* %ptr_b
   2610   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2611   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2612   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 -1)
   2613   ret < 16 x i32> %res
   2614 }
   2615 
   2616 define <16 x i32> @test_mask_mullo_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <16 x i32> %passThru, i16 %mask) {
   2617 ; X86-LABEL: test_mask_mullo_epi32_rmbk_512:
   2618 ; X86:       ## %bb.0:
   2619 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2620 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2621 ; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x08]
   2622 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2623 ; X86-NEXT:    retl ## encoding: [0xc3]
   2624 ;
   2625 ; X64-LABEL: test_mask_mullo_epi32_rmbk_512:
   2626 ; X64:       ## %bb.0:
   2627 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2628 ; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x59,0x40,0x0f]
   2629 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   2630 ; X64-NEXT:    retq ## encoding: [0xc3]
   2631   %q = load i32, i32* %ptr_b
   2632   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2633   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2634   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> %passThru, i16 %mask)
   2635   ret < 16 x i32> %res
   2636 }
   2637 
   2638 define <16 x i32> @test_mask_mullo_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i16 %mask) {
   2639 ; X86-LABEL: test_mask_mullo_epi32_rmbkz_512:
   2640 ; X86:       ## %bb.0:
   2641 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   2642 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2643 ; X86-NEXT:    vpmulld (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x00]
   2644 ; X86-NEXT:    retl ## encoding: [0xc3]
   2645 ;
   2646 ; X64-LABEL: test_mask_mullo_epi32_rmbkz_512:
   2647 ; X64:       ## %bb.0:
   2648 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2649 ; X64-NEXT:    vpmulld (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xd9,0x40,0x07]
   2650 ; X64-NEXT:    retq ## encoding: [0xc3]
   2651   %q = load i32, i32* %ptr_b
   2652   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   2653   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   2654   %res = call <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32> %a, <16 x i32> %b, <16 x i32> zeroinitializer, i16 %mask)
   2655   ret < 16 x i32> %res
   2656 }
   2657 
   2658 declare <16 x i32> @llvm.x86.avx512.mask.pmull.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2659 
   2660 
   2661 declare <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float>, <16 x float>, i32, <16 x float>, i16)
   2662 
   2663 define <16 x float>@test_int_x86_avx512_mask_shuf_f32x4(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
   2664 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
   2665 ; X86:       ## %bb.0:
   2666 ; X86-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x23,0xd9,0x16]
   2667 ; X86-NEXT:    ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2668 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2669 ; X86-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
   2670 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2671 ; X86-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   2672 ; X86-NEXT:    retl ## encoding: [0xc3]
   2673 ;
   2674 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4:
   2675 ; X64:       ## %bb.0:
   2676 ; X64-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x23,0xd9,0x16]
   2677 ; X64-NEXT:    ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2678 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2679 ; X64-NEXT:    vshuff32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x23,0xd1,0x16]
   2680 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2681 ; X64-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   2682 ; X64-NEXT:    retq ## encoding: [0xc3]
   2683   %res = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
   2684   %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.f32x4(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
   2685   %res2 = fadd <16 x float> %res, %res1
   2686   ret <16 x float> %res2
   2687 }
   2688 
   2689 declare <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double>, <8 x double>, i32, <8 x double>, i8)
   2690 
   2691 define <8 x double>@test_int_x86_avx512_mask_shuf_f64x2(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
   2692 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
   2693 ; X86:       ## %bb.0:
   2694 ; X86-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
   2695 ; X86-NEXT:    ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2696 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2697 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2698 ; X86-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
   2699 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2700 ; X86-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   2701 ; X86-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
   2702 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2703 ; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   2704 ; X86-NEXT:    retl ## encoding: [0xc3]
   2705 ;
   2706 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2:
   2707 ; X64:       ## %bb.0:
   2708 ; X64-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x23,0xd9,0x16]
   2709 ; X64-NEXT:    ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2710 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2711 ; X64-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x23,0xd1,0x16]
   2712 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2713 ; X64-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   2714 ; X64-NEXT:    vshuff64x2 $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x23,0xc1,0x16]
   2715 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2716 ; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   2717 ; X64-NEXT:    retq ## encoding: [0xc3]
   2718   %res = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
   2719   %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
   2720   %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.f64x2(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
   2721 
   2722   %res3 = fadd <8 x double> %res, %res1
   2723   %res4 = fadd <8 x double> %res3, %res2
   2724   ret <8 x double> %res4
   2725 }
   2726 
   2727 declare <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
   2728 
   2729 define <16 x i32>@test_int_x86_avx512_mask_shuf_i32x4(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x3, i16 %x4) {
   2730 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
   2731 ; X86:       ## %bb.0:
   2732 ; X86-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x43,0xd9,0x16]
   2733 ; X86-NEXT:    ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2734 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2735 ; X86-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
   2736 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2737 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2738 ; X86-NEXT:    retl ## encoding: [0xc3]
   2739 ;
   2740 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4:
   2741 ; X64:       ## %bb.0:
   2742 ; X64-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x43,0xd9,0x16]
   2743 ; X64-NEXT:    ## zmm3 = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2744 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2745 ; X64-NEXT:    vshufi32x4 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x43,0xd1,0x16]
   2746 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[8,9,10,11,4,5,6,7],zmm1[4,5,6,7,0,1,2,3]
   2747 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2748 ; X64-NEXT:    retq ## encoding: [0xc3]
   2749   %res = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 %x4)
   2750   %res1 = call <16 x i32> @llvm.x86.avx512.mask.shuf.i32x4(<16 x i32> %x0, <16 x i32> %x1, i32 22, <16 x i32> %x3, i16 -1)
   2751   %res2 = add <16 x i32> %res, %res1
   2752   ret <16 x i32> %res2
   2753 }
   2754 
   2755 declare <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
   2756 
   2757 define <8 x i64>@test_int_x86_avx512_mask_shuf_i64x2(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x3, i8 %x4) {
   2758 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
   2759 ; X86:       ## %bb.0:
   2760 ; X86-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
   2761 ; X86-NEXT:    ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2762 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2763 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2764 ; X86-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
   2765 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2766 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2767 ; X86-NEXT:    retl ## encoding: [0xc3]
   2768 ;
   2769 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2:
   2770 ; X64:       ## %bb.0:
   2771 ; X64-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x43,0xd9,0x16]
   2772 ; X64-NEXT:    ## zmm3 = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2773 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2774 ; X64-NEXT:    vshufi64x2 $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x43,0xd1,0x16]
   2775 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[4,5,2,3],zmm1[2,3,0,1]
   2776 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2777 ; X64-NEXT:    retq ## encoding: [0xc3]
   2778   %res = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 %x4)
   2779   %res1 = call <8 x i64> @llvm.x86.avx512.mask.shuf.i64x2(<8 x i64> %x0, <8 x i64> %x1, i32 22, <8 x i64> %x3, i8 -1)
   2780   %res2 = add <8 x i64> %res, %res1
   2781   ret <8 x i64> %res2
   2782 }
   2783 
   2784 declare <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double>, <8 x double>, i32, <8 x double>, i8)
   2785 
   2786 define <8 x double>@test_int_x86_avx512_mask_shuf_pd_512(<8 x double> %x0, <8 x double> %x1, <8 x double> %x3, i8 %x4) {
   2787 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
   2788 ; X86:       ## %bb.0:
   2789 ; X86-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16]
   2790 ; X86-NEXT:    ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2791 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2792 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2793 ; X86-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
   2794 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2795 ; X86-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   2796 ; X86-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
   2797 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2798 ; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   2799 ; X86-NEXT:    retl ## encoding: [0xc3]
   2800 ;
   2801 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_512:
   2802 ; X64:       ## %bb.0:
   2803 ; X64-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0xc6,0xd9,0x16]
   2804 ; X64-NEXT:    ## zmm3 = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2805 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2806 ; X64-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xc6,0xd1,0x16]
   2807 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2808 ; X64-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   2809 ; X64-NEXT:    vshufpd $22, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xc6,0xc1,0x16]
   2810 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm0[0],zmm1[1],zmm0[3],zmm1[2],zmm0[5],zmm1[4],zmm0[6],zmm1[6]
   2811 ; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   2812 ; X64-NEXT:    retq ## encoding: [0xc3]
   2813   %res = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 %x4)
   2814   %res1 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> %x3, i8 -1)
   2815   %res2 = call <8 x double> @llvm.x86.avx512.mask.shuf.pd.512(<8 x double> %x0, <8 x double> %x1, i32 22, <8 x double> zeroinitializer, i8 %x4)
   2816 
   2817   %res3 = fadd <8 x double> %res, %res1
   2818   %res4 = fadd <8 x double> %res3, %res2
   2819   ret <8 x double> %res4
   2820 }
   2821 
   2822 declare <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float>, <16 x float>, i32, <16 x float>, i16)
   2823 
   2824 define <16 x float>@test_int_x86_avx512_mask_shuf_ps_512(<16 x float> %x0, <16 x float> %x1, <16 x float> %x3, i16 %x4) {
   2825 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
   2826 ; X86:       ## %bb.0:
   2827 ; X86-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16]
   2828 ; X86-NEXT:    ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
   2829 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2830 ; X86-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
   2831 ; X86-NEXT:    ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
   2832 ; X86-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   2833 ; X86-NEXT:    retl ## encoding: [0xc3]
   2834 ;
   2835 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_512:
   2836 ; X64:       ## %bb.0:
   2837 ; X64-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0xc6,0xd9,0x16]
   2838 ; X64-NEXT:    ## zmm3 = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
   2839 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2840 ; X64-NEXT:    vshufps $22, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0xc6,0xd1,0x16]
   2841 ; X64-NEXT:    ## zmm2 {%k1} = zmm0[2,1],zmm1[1,0],zmm0[6,5],zmm1[5,4],zmm0[10,9],zmm1[9,8],zmm0[14,13],zmm1[13,12]
   2842 ; X64-NEXT:    vaddps %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc3]
   2843 ; X64-NEXT:    retq ## encoding: [0xc3]
   2844   %res = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 %x4)
   2845   %res1 = call <16 x float> @llvm.x86.avx512.mask.shuf.ps.512(<16 x float> %x0, <16 x float> %x1, i32 22, <16 x float> %x3, i16 -1)
   2846   %res2 = fadd <16 x float> %res, %res1
   2847   ret <16 x float> %res2
   2848 }
   2849 
   2850 declare <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2851 
   2852 define <16 x i32>@test_int_x86_avx512_mask_pmaxs_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   2853 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
   2854 ; X86:       ## %bb.0:
   2855 ; X86-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9]
   2856 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2857 ; X86-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
   2858 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2859 ; X86-NEXT:    retl ## encoding: [0xc3]
   2860 ;
   2861 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_512:
   2862 ; X64:       ## %bb.0:
   2863 ; X64-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3d,0xd9]
   2864 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2865 ; X64-NEXT:    vpmaxsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3d,0xd1]
   2866 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2867 ; X64-NEXT:    retq ## encoding: [0xc3]
   2868   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   2869   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxs.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   2870   %res2 = add <16 x i32> %res, %res1
   2871   ret <16 x i32> %res2
   2872 }
   2873 
   2874 declare <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2875 
   2876 define <8 x i64>@test_int_x86_avx512_mask_pmaxs_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   2877 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
   2878 ; X86:       ## %bb.0:
   2879 ; X86-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9]
   2880 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2881 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2882 ; X86-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
   2883 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2884 ; X86-NEXT:    retl ## encoding: [0xc3]
   2885 ;
   2886 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_512:
   2887 ; X64:       ## %bb.0:
   2888 ; X64-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3d,0xd9]
   2889 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2890 ; X64-NEXT:    vpmaxsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3d,0xd1]
   2891 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2892 ; X64-NEXT:    retq ## encoding: [0xc3]
   2893   %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   2894   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxs.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   2895   %res2 = add <8 x i64> %res, %res1
   2896   ret <8 x i64> %res2
   2897 }
   2898 
   2899 declare <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2900 
   2901 define <16 x i32>@test_int_x86_avx512_mask_pmaxu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   2902 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
   2903 ; X86:       ## %bb.0:
   2904 ; X86-NEXT:    vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9]
   2905 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2906 ; X86-NEXT:    vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
   2907 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2908 ; X86-NEXT:    retl ## encoding: [0xc3]
   2909 ;
   2910 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_512:
   2911 ; X64:       ## %bb.0:
   2912 ; X64-NEXT:    vpmaxud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3f,0xd9]
   2913 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2914 ; X64-NEXT:    vpmaxud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3f,0xd1]
   2915 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2916 ; X64-NEXT:    retq ## encoding: [0xc3]
   2917   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   2918   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaxu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   2919   %res2 = add <16 x i32> %res, %res1
   2920   ret <16 x i32> %res2
   2921 }
   2922 
   2923 declare <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2924 
   2925 define <8 x i64>@test_int_x86_avx512_mask_pmaxu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   2926 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
   2927 ; X86:       ## %bb.0:
   2928 ; X86-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9]
   2929 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2930 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2931 ; X86-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
   2932 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2933 ; X86-NEXT:    retl ## encoding: [0xc3]
   2934 ;
   2935 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_512:
   2936 ; X64:       ## %bb.0:
   2937 ; X64-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3f,0xd9]
   2938 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2939 ; X64-NEXT:    vpmaxuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3f,0xd1]
   2940 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2941 ; X64-NEXT:    retq ## encoding: [0xc3]
   2942   %res = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   2943   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmaxu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   2944   %res2 = add <8 x i64> %res, %res1
   2945   ret <8 x i64> %res2
   2946 }
   2947 
   2948 declare <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2949 
   2950 define <16 x i32>@test_int_x86_avx512_mask_pmins_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   2951 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_512:
   2952 ; X86:       ## %bb.0:
   2953 ; X86-NEXT:    vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9]
   2954 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2955 ; X86-NEXT:    vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
   2956 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2957 ; X86-NEXT:    retl ## encoding: [0xc3]
   2958 ;
   2959 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_512:
   2960 ; X64:       ## %bb.0:
   2961 ; X64-NEXT:    vpminsd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x39,0xd9]
   2962 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2963 ; X64-NEXT:    vpminsd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x39,0xd1]
   2964 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2965 ; X64-NEXT:    retq ## encoding: [0xc3]
   2966   %res = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   2967   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmins.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   2968   %res2 = add <16 x i32> %res, %res1
   2969   ret <16 x i32> %res2
   2970 }
   2971 
   2972 declare <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   2973 
   2974 define <8 x i64>@test_int_x86_avx512_mask_pmins_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   2975 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_512:
   2976 ; X86:       ## %bb.0:
   2977 ; X86-NEXT:    vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9]
   2978 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2979 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   2980 ; X86-NEXT:    vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
   2981 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2982 ; X86-NEXT:    retl ## encoding: [0xc3]
   2983 ;
   2984 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_512:
   2985 ; X64:       ## %bb.0:
   2986 ; X64-NEXT:    vpminsq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x39,0xd9]
   2987 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2988 ; X64-NEXT:    vpminsq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x39,0xd1]
   2989 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   2990 ; X64-NEXT:    retq ## encoding: [0xc3]
   2991   %res = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   2992   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmins.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   2993   %res2 = add <8 x i64> %res, %res1
   2994   ret <8 x i64> %res2
   2995 }
   2996 
   2997 declare <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   2998 
   2999 define <16 x i32>@test_int_x86_avx512_mask_pminu_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   3000 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_512:
   3001 ; X86:       ## %bb.0:
   3002 ; X86-NEXT:    vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9]
   3003 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3004 ; X86-NEXT:    vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
   3005 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   3006 ; X86-NEXT:    retl ## encoding: [0xc3]
   3007 ;
   3008 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_512:
   3009 ; X64:       ## %bb.0:
   3010 ; X64-NEXT:    vpminud %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x3b,0xd9]
   3011 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3012 ; X64-NEXT:    vpminud %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x3b,0xd1]
   3013 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   3014 ; X64-NEXT:    retq ## encoding: [0xc3]
   3015   %res = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   3016   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pminu.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   3017   %res2 = add <16 x i32> %res, %res1
   3018   ret <16 x i32> %res2
   3019 }
   3020 
   3021 declare <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   3022 
   3023 define <8 x i64>@test_int_x86_avx512_mask_pminu_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   3024 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_512:
   3025 ; X86:       ## %bb.0:
   3026 ; X86-NEXT:    vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9]
   3027 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3028 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3029 ; X86-NEXT:    vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
   3030 ; X86-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   3031 ; X86-NEXT:    retl ## encoding: [0xc3]
   3032 ;
   3033 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_512:
   3034 ; X64:       ## %bb.0:
   3035 ; X64-NEXT:    vpminuq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x3b,0xd9]
   3036 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3037 ; X64-NEXT:    vpminuq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x3b,0xd1]
   3038 ; X64-NEXT:    vpaddq %zmm3, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc3]
   3039 ; X64-NEXT:    retq ## encoding: [0xc3]
   3040   %res = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   3041   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pminu.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   3042   %res2 = add <8 x i64> %res, %res1
   3043   ret <8 x i64> %res2
   3044 }
   3045 
   3046 define <4 x float> @test_mm_mask_move_ss(<4 x float> %__W, i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
   3047 ; X86-LABEL: test_mm_mask_move_ss:
   3048 ; X86:       ## %bb.0: ## %entry
   3049 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   3050 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3051 ; X86-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
   3052 ; X86-NEXT:    retl ## encoding: [0xc3]
   3053 ;
   3054 ; X64-LABEL: test_mm_mask_move_ss:
   3055 ; X64:       ## %bb.0: ## %entry
   3056 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3057 ; X64-NEXT:    vmovss %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x76,0x09,0x10,0xc2]
   3058 ; X64-NEXT:    retq ## encoding: [0xc3]
   3059 entry:
   3060   %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> %__W, i8 %__U)
   3061   ret <4 x float> %res
   3062 }
   3063 
   3064 
   3065 define <4 x float> @test_mm_maskz_move_ss(i8 zeroext %__U, <4 x float> %__A, <4 x float> %__B) {
   3066 ; X86-LABEL: test_mm_maskz_move_ss:
   3067 ; X86:       ## %bb.0: ## %entry
   3068 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   3069 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3070 ; X86-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
   3071 ; X86-NEXT:    retl ## encoding: [0xc3]
   3072 ;
   3073 ; X64-LABEL: test_mm_maskz_move_ss:
   3074 ; X64:       ## %bb.0: ## %entry
   3075 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3076 ; X64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc1]
   3077 ; X64-NEXT:    retq ## encoding: [0xc3]
   3078 entry:
   3079   %res = call <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float> %__A, <4 x float> %__B, <4 x float> zeroinitializer, i8 %__U)
   3080   ret <4 x float> %res
   3081 }
   3082 
   3083 define <2 x double> @test_mm_mask_move_sd(<2 x double> %__W, i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
   3084 ; X86-LABEL: test_mm_mask_move_sd:
   3085 ; X86:       ## %bb.0: ## %entry
   3086 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   3087 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3088 ; X86-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
   3089 ; X86-NEXT:    retl ## encoding: [0xc3]
   3090 ;
   3091 ; X64-LABEL: test_mm_mask_move_sd:
   3092 ; X64:       ## %bb.0: ## %entry
   3093 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3094 ; X64-NEXT:    vmovsd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xf7,0x09,0x10,0xc2]
   3095 ; X64-NEXT:    retq ## encoding: [0xc3]
   3096 entry:
   3097   %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> %__W, i8 %__U)
   3098   ret <2 x double> %res
   3099 }
   3100 
   3101 define <2 x double> @test_mm_maskz_move_sd(i8 zeroext %__U, <2 x double> %__A, <2 x double> %__B) {
   3102 ; X86-LABEL: test_mm_maskz_move_sd:
   3103 ; X86:       ## %bb.0: ## %entry
   3104 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   3105 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3106 ; X86-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
   3107 ; X86-NEXT:    retl ## encoding: [0xc3]
   3108 ;
   3109 ; X64-LABEL: test_mm_maskz_move_sd:
   3110 ; X64:       ## %bb.0: ## %entry
   3111 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3112 ; X64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc1]
   3113 ; X64-NEXT:    retq ## encoding: [0xc3]
   3114 entry:
   3115   %res = call <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double> %__A, <2 x double> %__B, <2 x double> zeroinitializer, i8 %__U)
   3116   ret <2 x double> %res
   3117 }
   3118 
   3119 declare <4 x float> @llvm.x86.avx512.mask.move.ss(<4 x float>, <4 x float>, <4 x float>, i8)
   3120 declare <2 x double> @llvm.x86.avx512.mask.move.sd(<2 x double>, <2 x double>, <2 x double>, i8)
   3121 
   3122 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8>, <16 x i32>, i16)
   3123 
   3124 define <16 x i32>@test_int_x86_avx512_mask_pmovzxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
   3125 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
   3126 ; X86:       ## %bb.0:
   3127 ; X86-NEXT:    vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0]
   3128 ; X86-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3129 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3130 ; X86-NEXT:    vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
   3131 ; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3132 ; X86-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
   3133 ; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3134 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3135 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3136 ; X86-NEXT:    retl ## encoding: [0xc3]
   3137 ;
   3138 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_512:
   3139 ; X64:       ## %bb.0:
   3140 ; X64-NEXT:    vpmovzxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x31,0xd0]
   3141 ; X64-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3142 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3143 ; X64-NEXT:    vpmovzxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x31,0xc8]
   3144 ; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3145 ; X64-NEXT:    vpmovzxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x31,0xc0]
   3146 ; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero,xmm0[8],zero,zero,zero,xmm0[9],zero,zero,zero,xmm0[10],zero,zero,zero,xmm0[11],zero,zero,zero,xmm0[12],zero,zero,zero,xmm0[13],zero,zero,zero,xmm0[14],zero,zero,zero,xmm0[15],zero,zero,zero
   3147 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3148 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3149 ; X64-NEXT:    retq ## encoding: [0xc3]
   3150   %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
   3151   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
   3152   %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
   3153   %res3 = add <16 x i32> %res, %res1
   3154   %res4 = add <16 x i32> %res3, %res2
   3155   ret <16 x i32> %res4
   3156 }
   3157 
   3158 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8>, <8 x i64>, i8)
   3159 
   3160 define <8 x i64>@test_int_x86_avx512_mask_pmovzxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
   3161 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
   3162 ; X86:       ## %bb.0:
   3163 ; X86-NEXT:    vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0]
   3164 ; X86-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3165 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3166 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3167 ; X86-NEXT:    vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
   3168 ; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3169 ; X86-NEXT:    vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
   3170 ; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3171 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3172 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3173 ; X86-NEXT:    retl ## encoding: [0xc3]
   3174 ;
   3175 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_512:
   3176 ; X64:       ## %bb.0:
   3177 ; X64-NEXT:    vpmovzxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x32,0xd0]
   3178 ; X64-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3179 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3180 ; X64-NEXT:    vpmovzxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x32,0xc8]
   3181 ; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3182 ; X64-NEXT:    vpmovzxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x32,0xc0]
   3183 ; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero,xmm0[4],zero,zero,zero,zero,zero,zero,zero,xmm0[5],zero,zero,zero,zero,zero,zero,zero,xmm0[6],zero,zero,zero,zero,zero,zero,zero,xmm0[7],zero,zero,zero,zero,zero,zero,zero
   3184 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3185 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3186 ; X64-NEXT:    retq ## encoding: [0xc3]
   3187   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
   3188   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3189   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
   3190   %res3 = add <8 x i64> %res, %res1
   3191   %res4 = add <8 x i64> %res3, %res2
   3192   ret <8 x i64> %res4
   3193 }
   3194 
   3195 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32>, <8 x i64>, i8)
   3196 
   3197 define <8 x i64>@test_int_x86_avx512_mask_pmovzxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
   3198 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
   3199 ; X86:       ## %bb.0:
   3200 ; X86-NEXT:    vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0]
   3201 ; X86-NEXT:    ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3202 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3203 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3204 ; X86-NEXT:    vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
   3205 ; X86-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3206 ; X86-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
   3207 ; X86-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3208 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3209 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3210 ; X86-NEXT:    retl ## encoding: [0xc3]
   3211 ;
   3212 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_512:
   3213 ; X64:       ## %bb.0:
   3214 ; X64-NEXT:    vpmovzxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x35,0xd0]
   3215 ; X64-NEXT:    ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3216 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3217 ; X64-NEXT:    vpmovzxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x35,0xc8]
   3218 ; X64-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3219 ; X64-NEXT:    vpmovzxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x35,0xc0]
   3220 ; X64-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero
   3221 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3222 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3223 ; X64-NEXT:    retq ## encoding: [0xc3]
   3224   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
   3225   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3226   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
   3227   %res3 = add <8 x i64> %res, %res1
   3228   %res4 = add <8 x i64> %res3, %res2
   3229   ret <8 x i64> %res4
   3230 }
   3231 
   3232 declare <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16>, <16 x i32>, i16)
   3233 
   3234 define <16 x i32>@test_int_x86_avx512_mask_pmovzxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
   3235 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
   3236 ; X86:       ## %bb.0:
   3237 ; X86-NEXT:    vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0]
   3238 ; X86-NEXT:    ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3239 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3240 ; X86-NEXT:    vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
   3241 ; X86-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3242 ; X86-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
   3243 ; X86-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3244 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3245 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3246 ; X86-NEXT:    retl ## encoding: [0xc3]
   3247 ;
   3248 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_512:
   3249 ; X64:       ## %bb.0:
   3250 ; X64-NEXT:    vpmovzxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x33,0xd0]
   3251 ; X64-NEXT:    ## zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3252 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3253 ; X64-NEXT:    vpmovzxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x33,0xc8]
   3254 ; X64-NEXT:    ## zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3255 ; X64-NEXT:    vpmovzxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x33,0xc0]
   3256 ; X64-NEXT:    ## zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero
   3257 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3258 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3259 ; X64-NEXT:    retq ## encoding: [0xc3]
   3260   %res = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
   3261   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
   3262   %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovzxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
   3263   %res3 = add <16 x i32> %res, %res1
   3264   %res4 = add <16 x i32> %res3, %res2
   3265   ret <16 x i32> %res4
   3266 }
   3267 
   3268 declare <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16>, <8 x i64>, i8)
   3269 
   3270 define <8 x i64>@test_int_x86_avx512_mask_pmovzxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
   3271 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
   3272 ; X86:       ## %bb.0:
   3273 ; X86-NEXT:    vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0]
   3274 ; X86-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3275 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3276 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3277 ; X86-NEXT:    vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
   3278 ; X86-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3279 ; X86-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
   3280 ; X86-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3281 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3282 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3283 ; X86-NEXT:    retl ## encoding: [0xc3]
   3284 ;
   3285 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_512:
   3286 ; X64:       ## %bb.0:
   3287 ; X64-NEXT:    vpmovzxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x34,0xd0]
   3288 ; X64-NEXT:    ## zmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3289 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3290 ; X64-NEXT:    vpmovzxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x34,0xc8]
   3291 ; X64-NEXT:    ## zmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3292 ; X64-NEXT:    vpmovzxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x34,0xc0]
   3293 ; X64-NEXT:    ## zmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   3294 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3295 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3296 ; X64-NEXT:    retq ## encoding: [0xc3]
   3297   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
   3298   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3299   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovzxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
   3300   %res3 = add <8 x i64> %res, %res1
   3301   %res4 = add <8 x i64> %res3, %res2
   3302   ret <8 x i64> %res4
   3303 }
   3304 
   3305 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8>, <16 x i32>, i16)
   3306 
   3307 define <16 x i32>@test_int_x86_avx512_mask_pmovsxb_d_512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2) {
   3308 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
   3309 ; X86:       ## %bb.0:
   3310 ; X86-NEXT:    vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0]
   3311 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3312 ; X86-NEXT:    vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
   3313 ; X86-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
   3314 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3315 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3316 ; X86-NEXT:    retl ## encoding: [0xc3]
   3317 ;
   3318 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_512:
   3319 ; X64:       ## %bb.0:
   3320 ; X64-NEXT:    vpmovsxbd %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x21,0xd0]
   3321 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3322 ; X64-NEXT:    vpmovsxbd %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x21,0xc8]
   3323 ; X64-NEXT:    vpmovsxbd %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x21,0xc0]
   3324 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3325 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3326 ; X64-NEXT:    retq ## encoding: [0xc3]
   3327   %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 %x2)
   3328   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> zeroinitializer, i16 %x2)
   3329   %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxb.d.512(<16 x i8> %x0, <16 x i32> %x1, i16 -1)
   3330   %res3 = add <16 x i32> %res, %res1
   3331   %res4 = add <16 x i32> %res3, %res2
   3332   ret <16 x i32> %res4
   3333 }
   3334 
   3335 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8>, <8 x i64>, i8)
   3336 
   3337 define <8 x i64>@test_int_x86_avx512_mask_pmovsxb_q_512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2) {
   3338 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
   3339 ; X86:       ## %bb.0:
   3340 ; X86-NEXT:    vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0]
   3341 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3342 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3343 ; X86-NEXT:    vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
   3344 ; X86-NEXT:    vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
   3345 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3346 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3347 ; X86-NEXT:    retl ## encoding: [0xc3]
   3348 ;
   3349 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_512:
   3350 ; X64:       ## %bb.0:
   3351 ; X64-NEXT:    vpmovsxbq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x22,0xd0]
   3352 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3353 ; X64-NEXT:    vpmovsxbq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x22,0xc8]
   3354 ; X64-NEXT:    vpmovsxbq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x22,0xc0]
   3355 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3356 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3357 ; X64-NEXT:    retq ## encoding: [0xc3]
   3358   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 %x2)
   3359   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3360   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxb.q.512(<16 x i8> %x0, <8 x i64> %x1, i8 -1)
   3361   %res3 = add <8 x i64> %res, %res1
   3362   %res4 = add <8 x i64> %res3, %res2
   3363   ret <8 x i64> %res4
   3364 }
   3365 
   3366 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32>, <8 x i64>, i8)
   3367 
   3368 define <8 x i64>@test_int_x86_avx512_mask_pmovsxd_q_512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2) {
   3369 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
   3370 ; X86:       ## %bb.0:
   3371 ; X86-NEXT:    vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0]
   3372 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3373 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3374 ; X86-NEXT:    vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
   3375 ; X86-NEXT:    vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
   3376 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3377 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3378 ; X86-NEXT:    retl ## encoding: [0xc3]
   3379 ;
   3380 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_512:
   3381 ; X64:       ## %bb.0:
   3382 ; X64-NEXT:    vpmovsxdq %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x25,0xd0]
   3383 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3384 ; X64-NEXT:    vpmovsxdq %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x25,0xc8]
   3385 ; X64-NEXT:    vpmovsxdq %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x25,0xc0]
   3386 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3387 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3388 ; X64-NEXT:    retq ## encoding: [0xc3]
   3389   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 %x2)
   3390   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3391   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxd.q.512(<8 x i32> %x0, <8 x i64> %x1, i8 -1)
   3392   %res3 = add <8 x i64> %res, %res1
   3393   %res4 = add <8 x i64> %res3, %res2
   3394   ret <8 x i64> %res4
   3395 }
   3396 
   3397 
   3398 declare <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16>, <16 x i32>, i16)
   3399 
   3400 define <16 x i32>@test_int_x86_avx512_mask_pmovsxw_d_512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2) {
   3401 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
   3402 ; X86:       ## %bb.0:
   3403 ; X86-NEXT:    vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0]
   3404 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3405 ; X86-NEXT:    vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
   3406 ; X86-NEXT:    vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
   3407 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3408 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3409 ; X86-NEXT:    retl ## encoding: [0xc3]
   3410 ;
   3411 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_512:
   3412 ; X64:       ## %bb.0:
   3413 ; X64-NEXT:    vpmovsxwd %ymm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x23,0xd0]
   3414 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3415 ; X64-NEXT:    vpmovsxwd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x23,0xc8]
   3416 ; X64-NEXT:    vpmovsxwd %ymm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x23,0xc0]
   3417 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3418 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3419 ; X64-NEXT:    retq ## encoding: [0xc3]
   3420   %res = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 %x2)
   3421   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> zeroinitializer, i16 %x2)
   3422   %res2 = call <16 x i32> @llvm.x86.avx512.mask.pmovsxw.d.512(<16 x i16> %x0, <16 x i32> %x1, i16 -1)
   3423   %res3 = add <16 x i32> %res, %res1
   3424   %res4 = add <16 x i32> %res3, %res2
   3425   ret <16 x i32> %res4
   3426 }
   3427 
   3428 
   3429 declare <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16>, <8 x i64>, i8)
   3430 
   3431 define <8 x i64>@test_int_x86_avx512_mask_pmovsxw_q_512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2) {
   3432 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
   3433 ; X86:       ## %bb.0:
   3434 ; X86-NEXT:    vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0]
   3435 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3436 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3437 ; X86-NEXT:    vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
   3438 ; X86-NEXT:    vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
   3439 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3440 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3441 ; X86-NEXT:    retl ## encoding: [0xc3]
   3442 ;
   3443 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_512:
   3444 ; X64:       ## %bb.0:
   3445 ; X64-NEXT:    vpmovsxwq %xmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x24,0xd0]
   3446 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3447 ; X64-NEXT:    vpmovsxwq %xmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x24,0xc8]
   3448 ; X64-NEXT:    vpmovsxwq %xmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x24,0xc0]
   3449 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3450 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3451 ; X64-NEXT:    retq ## encoding: [0xc3]
   3452   %res = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 %x2)
   3453   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> zeroinitializer, i8 %x2)
   3454   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pmovsxw.q.512(<8 x i16> %x0, <8 x i64> %x1, i8 -1)
   3455   %res3 = add <8 x i64> %res, %res1
   3456   %res4 = add <8 x i64> %res3, %res2
   3457   ret <8 x i64> %res4
   3458 }
   3459 
   3460 declare <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64>, i32, <8 x i64>, i8)
   3461 
   3462 define <8 x i64>@test_int_x86_avx512_mask_psrl_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
   3463 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
   3464 ; X86:       ## %bb.0:
   3465 ; X86-NEXT:    vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04]
   3466 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3467 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3468 ; X86-NEXT:    vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
   3469 ; X86-NEXT:    vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04]
   3470 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   3471 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3472 ; X86-NEXT:    retl ## encoding: [0xc3]
   3473 ;
   3474 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_512:
   3475 ; X64:       ## %bb.0:
   3476 ; X64-NEXT:    vpsrlq $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xd0,0x04]
   3477 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3478 ; X64-NEXT:    vpsrlq $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xd0,0x04]
   3479 ; X64-NEXT:    vpsrlq $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xd0,0x04]
   3480 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   3481 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3482 ; X64-NEXT:    retq ## encoding: [0xc3]
   3483   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 %x3)
   3484   %res1 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> %x2, i8 -1)
   3485   %res2 = call <8 x i64> @llvm.x86.avx512.mask.psrl.qi.512(<8 x i64> %x0, i32 4, <8 x i64> zeroinitializer, i8 %x3)
   3486   %res3 = add <8 x i64> %res, %res1
   3487   %res4 = add <8 x i64> %res3, %res2
   3488   ret <8 x i64> %res4
   3489 }
   3490 
   3491 declare <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32>, i32, <16 x i32>, i16)
   3492 
   3493 define <16 x i32>@test_int_x86_avx512_mask_psrl_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
   3494 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_512:
   3495 ; X86:       ## %bb.0:
   3496 ; X86-NEXT:    vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04]
   3497 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3498 ; X86-NEXT:    vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
   3499 ; X86-NEXT:    vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04]
   3500 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   3501 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3502 ; X86-NEXT:    retl ## encoding: [0xc3]
   3503 ;
   3504 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_512:
   3505 ; X64:       ## %bb.0:
   3506 ; X64-NEXT:    vpsrld $4, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xd0,0x04]
   3507 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3508 ; X64-NEXT:    vpsrld $4, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xd0,0x04]
   3509 ; X64-NEXT:    vpsrld $4, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xd0,0x04]
   3510 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   3511 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3512 ; X64-NEXT:    retq ## encoding: [0xc3]
   3513   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 %x3)
   3514   %res1 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> %x2, i16 -1)
   3515   %res2 = call <16 x i32> @llvm.x86.avx512.mask.psrl.di.512(<16 x i32> %x0, i32 4, <16 x i32> zeroinitializer, i16 %x3)
   3516   %res3 = add <16 x i32> %res, %res1
   3517   %res4 = add <16 x i32> %res3, %res2
   3518   ret <16 x i32> %res4
   3519 }
   3520 
   3521 declare <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32>, i32, <16 x i32>, i16)
   3522 
   3523 define <16 x i32>@test_int_x86_avx512_mask_psra_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
   3524 ; X86-LABEL: test_int_x86_avx512_mask_psra_di_512:
   3525 ; X86:       ## %bb.0:
   3526 ; X86-NEXT:    vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03]
   3527 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3528 ; X86-NEXT:    vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
   3529 ; X86-NEXT:    vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03]
   3530 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3531 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3532 ; X86-NEXT:    retl ## encoding: [0xc3]
   3533 ;
   3534 ; X64-LABEL: test_int_x86_avx512_mask_psra_di_512:
   3535 ; X64:       ## %bb.0:
   3536 ; X64-NEXT:    vpsrad $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xe0,0x03]
   3537 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3538 ; X64-NEXT:    vpsrad $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xe0,0x03]
   3539 ; X64-NEXT:    vpsrad $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xe0,0x03]
   3540 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3541 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3542 ; X64-NEXT:    retq ## encoding: [0xc3]
   3543   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
   3544   %res1 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
   3545   %res2 = call <16 x i32> @llvm.x86.avx512.mask.psra.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
   3546   %res3 = add <16 x i32> %res, %res1
   3547   %res4 = add <16 x i32> %res3, %res2
   3548   ret <16 x i32> %res4
   3549 }
   3550 
   3551 declare <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64>, i32, <8 x i64>, i8)
   3552 
   3553 define <8 x i64>@test_int_x86_avx512_mask_psra_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
   3554 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_512:
   3555 ; X86:       ## %bb.0:
   3556 ; X86-NEXT:    vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03]
   3557 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3558 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3559 ; X86-NEXT:    vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
   3560 ; X86-NEXT:    vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03]
   3561 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3562 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3563 ; X86-NEXT:    retl ## encoding: [0xc3]
   3564 ;
   3565 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_512:
   3566 ; X64:       ## %bb.0:
   3567 ; X64-NEXT:    vpsraq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xe0,0x03]
   3568 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3569 ; X64-NEXT:    vpsraq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xe0,0x03]
   3570 ; X64-NEXT:    vpsraq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xe0,0x03]
   3571 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3572 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3573 ; X64-NEXT:    retq ## encoding: [0xc3]
   3574   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
   3575   %res1 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
   3576   %res2 = call <8 x i64> @llvm.x86.avx512.mask.psra.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
   3577   %res3 = add <8 x i64> %res, %res1
   3578   %res4 = add <8 x i64> %res3, %res2
   3579   ret <8 x i64> %res4
   3580 }
   3581 
   3582 declare <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32>, i32, <16 x i32>, i16)
   3583 
   3584 define <16 x i32>@test_int_x86_avx512_mask_psll_di_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
   3585 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_512:
   3586 ; X86:       ## %bb.0:
   3587 ; X86-NEXT:    vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03]
   3588 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3589 ; X86-NEXT:    vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
   3590 ; X86-NEXT:    vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03]
   3591 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3592 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3593 ; X86-NEXT:    retl ## encoding: [0xc3]
   3594 ;
   3595 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_512:
   3596 ; X64:       ## %bb.0:
   3597 ; X64-NEXT:    vpslld $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xf0,0x03]
   3598 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3599 ; X64-NEXT:    vpslld $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xf0,0x03]
   3600 ; X64-NEXT:    vpslld $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xf0,0x03]
   3601 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   3602 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   3603 ; X64-NEXT:    retq ## encoding: [0xc3]
   3604   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
   3605   %res1 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
   3606   %res2 = call <16 x i32> @llvm.x86.avx512.mask.psll.di.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
   3607   %res3 = add <16 x i32> %res, %res1
   3608   %res4 = add <16 x i32> %res3, %res2
   3609   ret <16 x i32> %res4
   3610 }
   3611 
   3612 declare <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64>, i32, <8 x i64>, i8)
   3613 
   3614 define <8 x i64>@test_int_x86_avx512_mask_psll_qi_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
   3615 ; X86-LABEL: test_int_x86_avx512_mask_psll_qi_512:
   3616 ; X86:       ## %bb.0:
   3617 ; X86-NEXT:    vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03]
   3618 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3619 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3620 ; X86-NEXT:    vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
   3621 ; X86-NEXT:    vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03]
   3622 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3623 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3624 ; X86-NEXT:    retl ## encoding: [0xc3]
   3625 ;
   3626 ; X64-LABEL: test_int_x86_avx512_mask_psll_qi_512:
   3627 ; X64:       ## %bb.0:
   3628 ; X64-NEXT:    vpsllq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x73,0xf0,0x03]
   3629 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3630 ; X64-NEXT:    vpsllq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x73,0xf0,0x03]
   3631 ; X64-NEXT:    vpsllq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x73,0xf0,0x03]
   3632 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   3633 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   3634 ; X64-NEXT:    retq ## encoding: [0xc3]
   3635   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
   3636   %res1 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
   3637   %res2 = call <8 x i64> @llvm.x86.avx512.mask.psll.qi.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
   3638   %res3 = add <8 x i64> %res, %res1
   3639   %res4 = add <8 x i64> %res3, %res2
   3640   ret <8 x i64> %res4
   3641 }
   3642 
   3643 define <16 x i32> @test_x86_avx512_psll_d(<16 x i32> %a0, <4 x i32> %a1) {
   3644 ; CHECK-LABEL: test_x86_avx512_psll_d:
   3645 ; CHECK:       ## %bb.0:
   3646 ; CHECK-NEXT:    vpslld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf2,0xc1]
   3647 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3648   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   3649   ret <16 x i32> %res
   3650 }
   3651 
   3652 define <16 x i32> @test_x86_avx512_mask_psll_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   3653 ; X86-LABEL: test_x86_avx512_mask_psll_d:
   3654 ; X86:       ## %bb.0:
   3655 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3656 ; X86-NEXT:    vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
   3657 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3658 ; X86-NEXT:    retl ## encoding: [0xc3]
   3659 ;
   3660 ; X64-LABEL: test_x86_avx512_mask_psll_d:
   3661 ; X64:       ## %bb.0:
   3662 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3663 ; X64-NEXT:    vpslld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf2,0xd1]
   3664 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3665 ; X64-NEXT:    retq ## encoding: [0xc3]
   3666   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   3667   ret <16 x i32> %res
   3668 }
   3669 
   3670 define <16 x i32> @test_x86_avx512_maskz_psll_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
   3671 ; X86-LABEL: test_x86_avx512_maskz_psll_d:
   3672 ; X86:       ## %bb.0:
   3673 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3674 ; X86-NEXT:    vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
   3675 ; X86-NEXT:    retl ## encoding: [0xc3]
   3676 ;
   3677 ; X64-LABEL: test_x86_avx512_maskz_psll_d:
   3678 ; X64:       ## %bb.0:
   3679 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3680 ; X64-NEXT:    vpslld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf2,0xc1]
   3681 ; X64-NEXT:    retq ## encoding: [0xc3]
   3682   %res = call <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   3683   ret <16 x i32> %res
   3684 }
   3685 
   3686 declare <16 x i32> @llvm.x86.avx512.mask.psll.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
   3687 
   3688 define <8 x i64> @test_x86_avx512_psll_q(<8 x i64> %a0, <2 x i64> %a1) {
   3689 ; CHECK-LABEL: test_x86_avx512_psll_q:
   3690 ; CHECK:       ## %bb.0:
   3691 ; CHECK-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf3,0xc1]
   3692 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3693   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   3694   ret <8 x i64> %res
   3695 }
   3696 
   3697 define <8 x i64> @test_x86_avx512_mask_psll_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   3698 ; X86-LABEL: test_x86_avx512_mask_psll_q:
   3699 ; X86:       ## %bb.0:
   3700 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3701 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3702 ; X86-NEXT:    vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
   3703 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3704 ; X86-NEXT:    retl ## encoding: [0xc3]
   3705 ;
   3706 ; X64-LABEL: test_x86_avx512_mask_psll_q:
   3707 ; X64:       ## %bb.0:
   3708 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3709 ; X64-NEXT:    vpsllq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf3,0xd1]
   3710 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3711 ; X64-NEXT:    retq ## encoding: [0xc3]
   3712   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   3713   ret <8 x i64> %res
   3714 }
   3715 
   3716 define <8 x i64> @test_x86_avx512_maskz_psll_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
   3717 ; X86-LABEL: test_x86_avx512_maskz_psll_q:
   3718 ; X86:       ## %bb.0:
   3719 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3720 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3721 ; X86-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
   3722 ; X86-NEXT:    retl ## encoding: [0xc3]
   3723 ;
   3724 ; X64-LABEL: test_x86_avx512_maskz_psll_q:
   3725 ; X64:       ## %bb.0:
   3726 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3727 ; X64-NEXT:    vpsllq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf3,0xc1]
   3728 ; X64-NEXT:    retq ## encoding: [0xc3]
   3729   %res = call <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   3730   ret <8 x i64> %res
   3731 }
   3732 
   3733 declare <8 x i64> @llvm.x86.avx512.mask.psll.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
   3734 
   3735 define <16 x i32> @test_x86_avx512_psrl_d(<16 x i32> %a0, <4 x i32> %a1) {
   3736 ; CHECK-LABEL: test_x86_avx512_psrl_d:
   3737 ; CHECK:       ## %bb.0:
   3738 ; CHECK-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd2,0xc1]
   3739 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3740   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   3741   ret <16 x i32> %res
   3742 }
   3743 
   3744 define <16 x i32> @test_x86_avx512_mask_psrl_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   3745 ; X86-LABEL: test_x86_avx512_mask_psrl_d:
   3746 ; X86:       ## %bb.0:
   3747 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3748 ; X86-NEXT:    vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
   3749 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3750 ; X86-NEXT:    retl ## encoding: [0xc3]
   3751 ;
   3752 ; X64-LABEL: test_x86_avx512_mask_psrl_d:
   3753 ; X64:       ## %bb.0:
   3754 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3755 ; X64-NEXT:    vpsrld %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd2,0xd1]
   3756 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3757 ; X64-NEXT:    retq ## encoding: [0xc3]
   3758   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   3759   ret <16 x i32> %res
   3760 }
   3761 
   3762 define <16 x i32> @test_x86_avx512_maskz_psrl_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
   3763 ; X86-LABEL: test_x86_avx512_maskz_psrl_d:
   3764 ; X86:       ## %bb.0:
   3765 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3766 ; X86-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
   3767 ; X86-NEXT:    retl ## encoding: [0xc3]
   3768 ;
   3769 ; X64-LABEL: test_x86_avx512_maskz_psrl_d:
   3770 ; X64:       ## %bb.0:
   3771 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3772 ; X64-NEXT:    vpsrld %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd2,0xc1]
   3773 ; X64-NEXT:    retq ## encoding: [0xc3]
   3774   %res = call <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   3775   ret <16 x i32> %res
   3776 }
   3777 
   3778 declare <16 x i32> @llvm.x86.avx512.mask.psrl.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
   3779 
   3780 define <8 x i64> @test_x86_avx512_psrl_q(<8 x i64> %a0, <2 x i64> %a1) {
   3781 ; CHECK-LABEL: test_x86_avx512_psrl_q:
   3782 ; CHECK:       ## %bb.0:
   3783 ; CHECK-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd3,0xc1]
   3784 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3785   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   3786   ret <8 x i64> %res
   3787 }
   3788 
   3789 define <8 x i64> @test_x86_avx512_mask_psrl_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   3790 ; X86-LABEL: test_x86_avx512_mask_psrl_q:
   3791 ; X86:       ## %bb.0:
   3792 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3793 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3794 ; X86-NEXT:    vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
   3795 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3796 ; X86-NEXT:    retl ## encoding: [0xc3]
   3797 ;
   3798 ; X64-LABEL: test_x86_avx512_mask_psrl_q:
   3799 ; X64:       ## %bb.0:
   3800 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3801 ; X64-NEXT:    vpsrlq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xd3,0xd1]
   3802 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3803 ; X64-NEXT:    retq ## encoding: [0xc3]
   3804   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   3805   ret <8 x i64> %res
   3806 }
   3807 
   3808 define <8 x i64> @test_x86_avx512_maskz_psrl_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
   3809 ; X86-LABEL: test_x86_avx512_maskz_psrl_q:
   3810 ; X86:       ## %bb.0:
   3811 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3812 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3813 ; X86-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
   3814 ; X86-NEXT:    retl ## encoding: [0xc3]
   3815 ;
   3816 ; X64-LABEL: test_x86_avx512_maskz_psrl_q:
   3817 ; X64:       ## %bb.0:
   3818 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3819 ; X64-NEXT:    vpsrlq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xd3,0xc1]
   3820 ; X64-NEXT:    retq ## encoding: [0xc3]
   3821   %res = call <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   3822   ret <8 x i64> %res
   3823 }
   3824 
   3825 declare <8 x i64> @llvm.x86.avx512.mask.psrl.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
   3826 
   3827 define <16 x i32> @test_x86_avx512_psra_d(<16 x i32> %a0, <4 x i32> %a1) {
   3828 ; CHECK-LABEL: test_x86_avx512_psra_d:
   3829 ; CHECK:       ## %bb.0:
   3830 ; CHECK-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xe2,0xc1]
   3831 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3832   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   3833   ret <16 x i32> %res
   3834 }
   3835 
   3836 define <16 x i32> @test_x86_avx512_mask_psra_d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   3837 ; X86-LABEL: test_x86_avx512_mask_psra_d:
   3838 ; X86:       ## %bb.0:
   3839 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3840 ; X86-NEXT:    vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
   3841 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3842 ; X86-NEXT:    retl ## encoding: [0xc3]
   3843 ;
   3844 ; X64-LABEL: test_x86_avx512_mask_psra_d:
   3845 ; X64:       ## %bb.0:
   3846 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3847 ; X64-NEXT:    vpsrad %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xe2,0xd1]
   3848 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3849 ; X64-NEXT:    retq ## encoding: [0xc3]
   3850   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> %a2, i16 %mask)
   3851   ret <16 x i32> %res
   3852 }
   3853 
   3854 define <16 x i32> @test_x86_avx512_maskz_psra_d(<16 x i32> %a0, <4 x i32> %a1, i16 %mask) {
   3855 ; X86-LABEL: test_x86_avx512_maskz_psra_d:
   3856 ; X86:       ## %bb.0:
   3857 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3858 ; X86-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
   3859 ; X86-NEXT:    retl ## encoding: [0xc3]
   3860 ;
   3861 ; X64-LABEL: test_x86_avx512_maskz_psra_d:
   3862 ; X64:       ## %bb.0:
   3863 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3864 ; X64-NEXT:    vpsrad %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xe2,0xc1]
   3865 ; X64-NEXT:    retq ## encoding: [0xc3]
   3866   %res = call <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32> %a0, <4 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   3867   ret <16 x i32> %res
   3868 }
   3869 
   3870 declare <16 x i32> @llvm.x86.avx512.mask.psra.d(<16 x i32>, <4 x i32>, <16 x i32>, i16) nounwind readnone
   3871 
   3872 define <8 x i64> @test_x86_avx512_psra_q(<8 x i64> %a0, <2 x i64> %a1) {
   3873 ; CHECK-LABEL: test_x86_avx512_psra_q:
   3874 ; CHECK:       ## %bb.0:
   3875 ; CHECK-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xe2,0xc1]
   3876 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3877   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   3878   ret <8 x i64> %res
   3879 }
   3880 
   3881 define <8 x i64> @test_x86_avx512_mask_psra_q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   3882 ; X86-LABEL: test_x86_avx512_mask_psra_q:
   3883 ; X86:       ## %bb.0:
   3884 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3885 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3886 ; X86-NEXT:    vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
   3887 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3888 ; X86-NEXT:    retl ## encoding: [0xc3]
   3889 ;
   3890 ; X64-LABEL: test_x86_avx512_mask_psra_q:
   3891 ; X64:       ## %bb.0:
   3892 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3893 ; X64-NEXT:    vpsraq %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xe2,0xd1]
   3894 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3895 ; X64-NEXT:    retq ## encoding: [0xc3]
   3896   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> %a2, i8 %mask)
   3897   ret <8 x i64> %res
   3898 }
   3899 
   3900 define <8 x i64> @test_x86_avx512_maskz_psra_q(<8 x i64> %a0, <2 x i64> %a1, i8 %mask) {
   3901 ; X86-LABEL: test_x86_avx512_maskz_psra_q:
   3902 ; X86:       ## %bb.0:
   3903 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3904 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3905 ; X86-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
   3906 ; X86-NEXT:    retl ## encoding: [0xc3]
   3907 ;
   3908 ; X64-LABEL: test_x86_avx512_maskz_psra_q:
   3909 ; X64:       ## %bb.0:
   3910 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3911 ; X64-NEXT:    vpsraq %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xe2,0xc1]
   3912 ; X64-NEXT:    retq ## encoding: [0xc3]
   3913   %res = call <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64> %a0, <2 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   3914   ret <8 x i64> %res
   3915 }
   3916 
   3917 declare <8 x i64> @llvm.x86.avx512.mask.psra.q(<8 x i64>, <2 x i64>, <8 x i64>, i8) nounwind readnone
   3918 
   3919 define <16 x i32> @test_x86_avx512_psllv_d(<16 x i32> %a0, <16 x i32> %a1) {
   3920 ; CHECK-LABEL: test_x86_avx512_psllv_d:
   3921 ; CHECK:       ## %bb.0:
   3922 ; CHECK-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x47,0xc1]
   3923 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3924   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   3925   ret <16 x i32> %res
   3926 }
   3927 
   3928 define <16 x i32> @test_x86_avx512_mask_psllv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   3929 ; X86-LABEL: test_x86_avx512_mask_psllv_d:
   3930 ; X86:       ## %bb.0:
   3931 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3932 ; X86-NEXT:    vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
   3933 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3934 ; X86-NEXT:    retl ## encoding: [0xc3]
   3935 ;
   3936 ; X64-LABEL: test_x86_avx512_mask_psllv_d:
   3937 ; X64:       ## %bb.0:
   3938 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3939 ; X64-NEXT:    vpsllvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x47,0xd1]
   3940 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3941 ; X64-NEXT:    retq ## encoding: [0xc3]
   3942   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   3943   ret <16 x i32> %res
   3944 }
   3945 
   3946 define <16 x i32> @test_x86_avx512_maskz_psllv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   3947 ; X86-LABEL: test_x86_avx512_maskz_psllv_d:
   3948 ; X86:       ## %bb.0:
   3949 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3950 ; X86-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
   3951 ; X86-NEXT:    retl ## encoding: [0xc3]
   3952 ;
   3953 ; X64-LABEL: test_x86_avx512_maskz_psllv_d:
   3954 ; X64:       ## %bb.0:
   3955 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3956 ; X64-NEXT:    vpsllvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x47,0xc1]
   3957 ; X64-NEXT:    retq ## encoding: [0xc3]
   3958   %res = call <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   3959   ret <16 x i32> %res
   3960 }
   3961 
   3962 declare <16 x i32> @llvm.x86.avx512.mask.psllv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
   3963 
   3964 define <8 x i64> @test_x86_avx512_psllv_q(<8 x i64> %a0, <8 x i64> %a1) {
   3965 ; CHECK-LABEL: test_x86_avx512_psllv_q:
   3966 ; CHECK:       ## %bb.0:
   3967 ; CHECK-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x47,0xc1]
   3968 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   3969   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   3970   ret <8 x i64> %res
   3971 }
   3972 
   3973 define <8 x i64> @test_x86_avx512_mask_psllv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   3974 ; X86-LABEL: test_x86_avx512_mask_psllv_q:
   3975 ; X86:       ## %bb.0:
   3976 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3977 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3978 ; X86-NEXT:    vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
   3979 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3980 ; X86-NEXT:    retl ## encoding: [0xc3]
   3981 ;
   3982 ; X64-LABEL: test_x86_avx512_mask_psllv_q:
   3983 ; X64:       ## %bb.0:
   3984 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3985 ; X64-NEXT:    vpsllvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x47,0xd1]
   3986 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   3987 ; X64-NEXT:    retq ## encoding: [0xc3]
   3988   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   3989   ret <8 x i64> %res
   3990 }
   3991 
   3992 define <8 x i64> @test_x86_avx512_maskz_psllv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   3993 ; X86-LABEL: test_x86_avx512_maskz_psllv_q:
   3994 ; X86:       ## %bb.0:
   3995 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3996 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   3997 ; X86-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
   3998 ; X86-NEXT:    retl ## encoding: [0xc3]
   3999 ;
   4000 ; X64-LABEL: test_x86_avx512_maskz_psllv_q:
   4001 ; X64:       ## %bb.0:
   4002 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4003 ; X64-NEXT:    vpsllvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x47,0xc1]
   4004 ; X64-NEXT:    retq ## encoding: [0xc3]
   4005   %res = call <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   4006   ret <8 x i64> %res
   4007 }
   4008 
   4009 declare <8 x i64> @llvm.x86.avx512.mask.psllv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
   4010 
   4011 
   4012 define <16 x i32> @test_x86_avx512_psrav_d(<16 x i32> %a0, <16 x i32> %a1) {
   4013 ; CHECK-LABEL: test_x86_avx512_psrav_d:
   4014 ; CHECK:       ## %bb.0:
   4015 ; CHECK-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x46,0xc1]
   4016 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4017   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   4018   ret <16 x i32> %res
   4019 }
   4020 
   4021 define <16 x i32> @test_x86_avx512_mask_psrav_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   4022 ; X86-LABEL: test_x86_avx512_mask_psrav_d:
   4023 ; X86:       ## %bb.0:
   4024 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4025 ; X86-NEXT:    vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
   4026 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4027 ; X86-NEXT:    retl ## encoding: [0xc3]
   4028 ;
   4029 ; X64-LABEL: test_x86_avx512_mask_psrav_d:
   4030 ; X64:       ## %bb.0:
   4031 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4032 ; X64-NEXT:    vpsravd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x46,0xd1]
   4033 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4034 ; X64-NEXT:    retq ## encoding: [0xc3]
   4035   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   4036   ret <16 x i32> %res
   4037 }
   4038 
   4039 define <16 x i32> @test_x86_avx512_maskz_psrav_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   4040 ; X86-LABEL: test_x86_avx512_maskz_psrav_d:
   4041 ; X86:       ## %bb.0:
   4042 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4043 ; X86-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
   4044 ; X86-NEXT:    retl ## encoding: [0xc3]
   4045 ;
   4046 ; X64-LABEL: test_x86_avx512_maskz_psrav_d:
   4047 ; X64:       ## %bb.0:
   4048 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4049 ; X64-NEXT:    vpsravd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x46,0xc1]
   4050 ; X64-NEXT:    retq ## encoding: [0xc3]
   4051   %res = call <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   4052   ret <16 x i32> %res
   4053 }
   4054 
   4055 declare <16 x i32> @llvm.x86.avx512.mask.psrav.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
   4056 
   4057 define <8 x i64> @test_x86_avx512_psrav_q(<8 x i64> %a0, <8 x i64> %a1) {
   4058 ; CHECK-LABEL: test_x86_avx512_psrav_q:
   4059 ; CHECK:       ## %bb.0:
   4060 ; CHECK-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x46,0xc1]
   4061 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4062   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   4063   ret <8 x i64> %res
   4064 }
   4065 
   4066 define <8 x i64> @test_x86_avx512_mask_psrav_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   4067 ; X86-LABEL: test_x86_avx512_mask_psrav_q:
   4068 ; X86:       ## %bb.0:
   4069 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4070 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4071 ; X86-NEXT:    vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
   4072 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4073 ; X86-NEXT:    retl ## encoding: [0xc3]
   4074 ;
   4075 ; X64-LABEL: test_x86_avx512_mask_psrav_q:
   4076 ; X64:       ## %bb.0:
   4077 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4078 ; X64-NEXT:    vpsravq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x46,0xd1]
   4079 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4080 ; X64-NEXT:    retq ## encoding: [0xc3]
   4081   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   4082   ret <8 x i64> %res
   4083 }
   4084 
   4085 define <8 x i64> @test_x86_avx512_maskz_psrav_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   4086 ; X86-LABEL: test_x86_avx512_maskz_psrav_q:
   4087 ; X86:       ## %bb.0:
   4088 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4089 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4090 ; X86-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
   4091 ; X86-NEXT:    retl ## encoding: [0xc3]
   4092 ;
   4093 ; X64-LABEL: test_x86_avx512_maskz_psrav_q:
   4094 ; X64:       ## %bb.0:
   4095 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4096 ; X64-NEXT:    vpsravq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x46,0xc1]
   4097 ; X64-NEXT:    retq ## encoding: [0xc3]
   4098   %res = call <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   4099   ret <8 x i64> %res
   4100 }
   4101 
   4102 declare <8 x i64> @llvm.x86.avx512.mask.psrav.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
   4103 
   4104 define <16 x i32> @test_x86_avx512_psrlv_d(<16 x i32> %a0, <16 x i32> %a1) {
   4105 ; CHECK-LABEL: test_x86_avx512_psrlv_d:
   4106 ; CHECK:       ## %bb.0:
   4107 ; CHECK-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x45,0xc1]
   4108 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4109   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 -1)
   4110   ret <16 x i32> %res
   4111 }
   4112 
   4113 define <16 x i32> @test_x86_avx512_mask_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask) {
   4114 ; X86-LABEL: test_x86_avx512_mask_psrlv_d:
   4115 ; X86:       ## %bb.0:
   4116 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4117 ; X86-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
   4118 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4119 ; X86-NEXT:    retl ## encoding: [0xc3]
   4120 ;
   4121 ; X64-LABEL: test_x86_avx512_mask_psrlv_d:
   4122 ; X64:       ## %bb.0:
   4123 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4124 ; X64-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x45,0xd1]
   4125 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4126 ; X64-NEXT:    retq ## encoding: [0xc3]
   4127   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> %a2, i16 %mask)
   4128   ret <16 x i32> %res
   4129 }
   4130 
   4131 define <16 x i32> @test_x86_avx512_maskz_psrlv_d(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   4132 ; X86-LABEL: test_x86_avx512_maskz_psrlv_d:
   4133 ; X86:       ## %bb.0:
   4134 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4135 ; X86-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
   4136 ; X86-NEXT:    retl ## encoding: [0xc3]
   4137 ;
   4138 ; X64-LABEL: test_x86_avx512_maskz_psrlv_d:
   4139 ; X64:       ## %bb.0:
   4140 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4141 ; X64-NEXT:    vpsrlvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x45,0xc1]
   4142 ; X64-NEXT:    retq ## encoding: [0xc3]
   4143   %res = call <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32> %a0, <16 x i32> %a1, <16 x i32> zeroinitializer, i16 %mask)
   4144   ret <16 x i32> %res
   4145 }
   4146 
   4147 declare <16 x i32> @llvm.x86.avx512.mask.psrlv.d(<16 x i32>, <16 x i32>, <16 x i32>, i16) nounwind readnone
   4148 
   4149 define <8 x i64> @test_x86_avx512_psrlv_q(<8 x i64> %a0, <8 x i64> %a1) {
   4150 ; CHECK-LABEL: test_x86_avx512_psrlv_q:
   4151 ; CHECK:       ## %bb.0:
   4152 ; CHECK-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0xc1]
   4153 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4154   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 -1)
   4155   ret <8 x i64> %res
   4156 }
   4157 
   4158 define <8 x i64> @test_x86_avx512_mask_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask) {
   4159 ; X86-LABEL: test_x86_avx512_mask_psrlv_q:
   4160 ; X86:       ## %bb.0:
   4161 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4162 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4163 ; X86-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
   4164 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4165 ; X86-NEXT:    retl ## encoding: [0xc3]
   4166 ;
   4167 ; X64-LABEL: test_x86_avx512_mask_psrlv_q:
   4168 ; X64:       ## %bb.0:
   4169 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4170 ; X64-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x45,0xd1]
   4171 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4172 ; X64-NEXT:    retq ## encoding: [0xc3]
   4173   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> %a2, i8 %mask)
   4174   ret <8 x i64> %res
   4175 }
   4176 
   4177 define <8 x i64> @test_x86_avx512_maskz_psrlv_q(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   4178 ; X86-LABEL: test_x86_avx512_maskz_psrlv_q:
   4179 ; X86:       ## %bb.0:
   4180 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4181 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4182 ; X86-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
   4183 ; X86-NEXT:    retl ## encoding: [0xc3]
   4184 ;
   4185 ; X64-LABEL: test_x86_avx512_maskz_psrlv_q:
   4186 ; X64:       ## %bb.0:
   4187 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4188 ; X64-NEXT:    vpsrlvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x45,0xc1]
   4189 ; X64-NEXT:    retq ## encoding: [0xc3]
   4190   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %a1, <8 x i64> zeroinitializer, i8 %mask)
   4191   ret <8 x i64> %res
   4192 }
   4193 
   4194 declare <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64>, <8 x i64>, <8 x i64>, i8) nounwind readnone
   4195 
   4196 define <8 x i64> @test_x86_avx512_psrlv_q_memop(<8 x i64> %a0, <8 x i64>* %ptr) {
   4197 ; X86-LABEL: test_x86_avx512_psrlv_q_memop:
   4198 ; X86:       ## %bb.0:
   4199 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4200 ; X86-NEXT:    vpsrlvq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x00]
   4201 ; X86-NEXT:    retl ## encoding: [0xc3]
   4202 ;
   4203 ; X64-LABEL: test_x86_avx512_psrlv_q_memop:
   4204 ; X64:       ## %bb.0:
   4205 ; X64-NEXT:    vpsrlvq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x45,0x07]
   4206 ; X64-NEXT:    retq ## encoding: [0xc3]
   4207   %b = load <8 x i64>, <8 x i64>* %ptr
   4208   %res = call <8 x i64> @llvm.x86.avx512.mask.psrlv.q(<8 x i64> %a0, <8 x i64> %b, <8 x i64> zeroinitializer, i8 -1)
   4209   ret <8 x i64> %res
   4210 }
   4211 
   4212 declare <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32>, <8 x double>, i8)
   4213 
   4214 define <8 x double>@test_int_x86_avx512_mask_cvt_dq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
   4215 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
   4216 ; X86:       ## %bb.0:
   4217 ; X86-NEXT:    vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0]
   4218 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4219 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4220 ; X86-NEXT:    vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
   4221 ; X86-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
   4222 ; X86-NEXT:    retl ## encoding: [0xc3]
   4223 ;
   4224 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_512:
   4225 ; X64:       ## %bb.0:
   4226 ; X64-NEXT:    vcvtdq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0xe6,0xd0]
   4227 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4228 ; X64-NEXT:    vcvtdq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0xe6,0xc8]
   4229 ; X64-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
   4230 ; X64-NEXT:    retq ## encoding: [0xc3]
   4231   %res = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
   4232   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtdq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
   4233   %res2 = fadd <8 x double> %res, %res1
   4234   ret <8 x double> %res2
   4235 }
   4236 
   4237 declare <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32>, <8 x double>, i8)
   4238 
   4239 define <8 x double>@test_int_x86_avx512_mask_cvt_udq2pd_512(<8 x i32> %x0, <8 x double> %x1, i8 %x2) {
   4240 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
   4241 ; X86:       ## %bb.0:
   4242 ; X86-NEXT:    vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0]
   4243 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4244 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4245 ; X86-NEXT:    vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
   4246 ; X86-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
   4247 ; X86-NEXT:    retl ## encoding: [0xc3]
   4248 ;
   4249 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_512:
   4250 ; X64:       ## %bb.0:
   4251 ; X64-NEXT:    vcvtudq2pd %ymm0, %zmm2 ## encoding: [0x62,0xf1,0x7e,0x48,0x7a,0xd0]
   4252 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4253 ; X64-NEXT:    vcvtudq2pd %ymm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x49,0x7a,0xc8]
   4254 ; X64-NEXT:    vaddpd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc2]
   4255 ; X64-NEXT:    retq ## encoding: [0xc3]
   4256   %res = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 %x2)
   4257   %res1 = call <8 x double> @llvm.x86.avx512.mask.cvtudq2pd.512(<8 x i32> %x0, <8 x double> %x1, i8 -1)
   4258   %res2 = fadd <8 x double> %res, %res1
   4259   ret <8 x double> %res2
   4260 }
   4261 
   4262 define <8 x i64> @test_valign_q(<8 x i64> %a, <8 x i64> %b) {
   4263 ; CHECK-LABEL: test_valign_q:
   4264 ; CHECK:       ## %bb.0:
   4265 ; CHECK-NEXT:    valignq $2, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x03,0xc1,0x02]
   4266 ; CHECK-NEXT:    ## zmm0 = zmm1[2,3,4,5,6,7],zmm0[0,1]
   4267 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4268   %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> zeroinitializer, i8 -1)
   4269   ret <8 x i64> %res
   4270 }
   4271 
   4272 define <8 x i64> @test_mask_valign_q(<8 x i64> %a, <8 x i64> %b, <8 x i64> %src, i8 %mask) {
   4273 ; X86-LABEL: test_mask_valign_q:
   4274 ; X86:       ## %bb.0:
   4275 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4276 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4277 ; X86-NEXT:    valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
   4278 ; X86-NEXT:    ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
   4279 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4280 ; X86-NEXT:    retl ## encoding: [0xc3]
   4281 ;
   4282 ; X64-LABEL: test_mask_valign_q:
   4283 ; X64:       ## %bb.0:
   4284 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4285 ; X64-NEXT:    valignq $2, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x03,0xd1,0x02]
   4286 ; X64-NEXT:    ## zmm2 {%k1} = zmm1[2,3,4,5,6,7],zmm0[0,1]
   4287 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4288 ; X64-NEXT:    retq ## encoding: [0xc3]
   4289   %res = call <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64> %a, <8 x i64> %b, i32 2, <8 x i64> %src, i8 %mask)
   4290   ret <8 x i64> %res
   4291 }
   4292 
   4293 declare <8 x i64> @llvm.x86.avx512.mask.valign.q.512(<8 x i64>, <8 x i64>, i32, <8 x i64>, i8)
   4294 
   4295 define <16 x i32> @test_maskz_valign_d(<16 x i32> %a, <16 x i32> %b, i16 %mask) {
   4296 ; X86-LABEL: test_maskz_valign_d:
   4297 ; X86:       ## %bb.0:
   4298 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4299 ; X86-NEXT:    valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
   4300 ; X86-NEXT:    ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
   4301 ; X86-NEXT:    retl ## encoding: [0xc3]
   4302 ;
   4303 ; X64-LABEL: test_maskz_valign_d:
   4304 ; X64:       ## %bb.0:
   4305 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4306 ; X64-NEXT:    valignd $5, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x03,0xc1,0x05]
   4307 ; X64-NEXT:    ## zmm0 {%k1} {z} = zmm1[5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1,2,3,4]
   4308 ; X64-NEXT:    retq ## encoding: [0xc3]
   4309   %res = call <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32> %a, <16 x i32> %b, i32 5, <16 x i32> zeroinitializer, i16 %mask)
   4310   ret <16 x i32> %res
   4311 }
   4312 
   4313 declare <16 x i32> @llvm.x86.avx512.mask.valign.d.512(<16 x i32>, <16 x i32>, i32, <16 x i32>, i16)
   4314 
   4315 declare <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
   4316 
   4317 define <8 x double>@test_int_x86_avx512_mask_vpermilvar_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
   4318 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
   4319 ; X86:       ## %bb.0:
   4320 ; X86-NEXT:    vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9]
   4321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4322 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4323 ; X86-NEXT:    vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
   4324 ; X86-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
   4325 ; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   4326 ; X86-NEXT:    vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0]
   4327 ; X86-NEXT:    retl ## encoding: [0xc3]
   4328 ;
   4329 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_512:
   4330 ; X64:       ## %bb.0:
   4331 ; X64-NEXT:    vpermilpd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x0d,0xd9]
   4332 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4333 ; X64-NEXT:    vpermilpd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x0d,0xd1]
   4334 ; X64-NEXT:    vpermilpd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x0d,0xc1]
   4335 ; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   4336 ; X64-NEXT:    vaddpd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0x58,0xc0]
   4337 ; X64-NEXT:    retq ## encoding: [0xc3]
   4338   %res = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
   4339   %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
   4340   %res2 = call <8 x double> @llvm.x86.avx512.mask.vpermilvar.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
   4341   %res3 = fadd <8 x double> %res, %res1
   4342   %res4 = fadd <8 x double> %res2, %res3
   4343   ret <8 x double> %res4
   4344 }
   4345 
   4346 declare <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
   4347 
   4348 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
   4349 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
   4350 ; X86:       ## %bb.0:
   4351 ; X86-NEXT:    vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9]
   4352 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4353 ; X86-NEXT:    vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
   4354 ; X86-NEXT:    vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
   4355 ; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   4356 ; X86-NEXT:    vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
   4357 ; X86-NEXT:    retl ## encoding: [0xc3]
   4358 ;
   4359 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512:
   4360 ; X64:       ## %bb.0:
   4361 ; X64-NEXT:    vpermilps %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0xd9]
   4362 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4363 ; X64-NEXT:    vpermilps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0xd1]
   4364 ; X64-NEXT:    vpermilps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0xc1]
   4365 ; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   4366 ; X64-NEXT:    vaddps %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x64,0x48,0x58,0xc0]
   4367 ; X64-NEXT:    retq ## encoding: [0xc3]
   4368   %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
   4369   %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
   4370   %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
   4371   %res3 = fadd <16 x float> %res, %res1
   4372   %res4 = fadd <16 x float> %res2, %res3
   4373   ret <16 x float> %res4
   4374 }
   4375 
   4376 ; Test case to make sure we can print shuffle decode comments for constant pool loads.
   4377 define <16 x float>@test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
   4378 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
   4379 ; X86:       ## %bb.0:
   4380 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4381 ; X86-NEXT:    vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
   4382 ; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
   4383 ; X86-NEXT:    ## fixup A - offset: 6, value: LCPI203_0, kind: FK_Data_4
   4384 ; X86-NEXT:    vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
   4385 ; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
   4386 ; X86-NEXT:    ## fixup A - offset: 6, value: LCPI203_1, kind: FK_Data_4
   4387 ; X86-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
   4388 ; X86-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
   4389 ; X86-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
   4390 ; X86-NEXT:    ## fixup A - offset: 6, value: LCPI203_2, kind: FK_Data_4
   4391 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
   4392 ; X86-NEXT:    retl ## encoding: [0xc3]
   4393 ;
   4394 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_512_constant_pool:
   4395 ; X64:       ## %bb.0:
   4396 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4397 ; X64-NEXT:    vpermilps {{.*#+}} zmm2 {%k1} = zmm0[2,3,0,1,7,6,5,4,9,8,11,10,12,13,14,15]
   4398 ; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x49,0x0c,0x15,A,A,A,A]
   4399 ; X64-NEXT:    ## fixup A - offset: 6, value: LCPI203_0-4, kind: reloc_riprel_4byte
   4400 ; X64-NEXT:    vpermilps {{.*#+}} zmm1 {%k1} {z} = zmm0[1,0,3,2,4,5,6,7,9,8,11,10,12,13,14,15]
   4401 ; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0xc9,0x0c,0x0d,A,A,A,A]
   4402 ; X64-NEXT:    ## fixup A - offset: 6, value: LCPI203_1-4, kind: reloc_riprel_4byte
   4403 ; X64-NEXT:    vaddps %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc9]
   4404 ; X64-NEXT:    vpermilps {{.*#+}} zmm0 = zmm0[1,0,3,2,4,5,6,7,10,11,8,9,14,15,13,12]
   4405 ; X64-NEXT:    ## encoding: [0x62,0xf2,0x7d,0x48,0x0c,0x05,A,A,A,A]
   4406 ; X64-NEXT:    ## fixup A - offset: 6, value: LCPI203_2-4, kind: reloc_riprel_4byte
   4407 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
   4408 ; X64-NEXT:    retq ## encoding: [0xc3]
   4409   %res = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 3, i32 2, i32 1, i32 0, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> %x2, i16 %x3)
   4410   %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3>, <16 x float> zeroinitializer, i16 %x3)
   4411   %res2 = call <16 x float> @llvm.x86.avx512.mask.vpermilvar.ps.512(<16 x float> %x0, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 0, i32 1, i32 2, i32 3, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 1, i32 0>, <16 x float> %x2, i16 -1)
   4412   %res3 = fadd <16 x float> %res, %res1
   4413   %res4 = fadd <16 x float> %res2, %res3
   4414   ret <16 x float> %res4
   4415 }
   4416 
   4417 define <8 x i64> @test_mask_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
   4418 ; CHECK-LABEL: test_mask_mul_epi32_rr:
   4419 ; CHECK:       ## %bb.0:
   4420 ; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
   4421 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4422   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4423   ret < 8 x i64> %res
   4424 }
   4425 
   4426 define <8 x i64> @test_mask_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
   4427 ; X86-LABEL: test_mask_mul_epi32_rrk:
   4428 ; X86:       ## %bb.0:
   4429 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4430 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4431 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
   4432 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4433 ; X86-NEXT:    retl ## encoding: [0xc3]
   4434 ;
   4435 ; X64-LABEL: test_mask_mul_epi32_rrk:
   4436 ; X64:       ## %bb.0:
   4437 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4438 ; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
   4439 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4440 ; X64-NEXT:    retq ## encoding: [0xc3]
   4441   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4442   ret < 8 x i64> %res
   4443 }
   4444 
   4445 define <8 x i64> @test_mask_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
   4446 ; X86-LABEL: test_mask_mul_epi32_rrkz:
   4447 ; X86:       ## %bb.0:
   4448 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4449 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4450 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   4451 ; X86-NEXT:    retl ## encoding: [0xc3]
   4452 ;
   4453 ; X64-LABEL: test_mask_mul_epi32_rrkz:
   4454 ; X64:       ## %bb.0:
   4455 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4456 ; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   4457 ; X64-NEXT:    retq ## encoding: [0xc3]
   4458   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4459   ret < 8 x i64> %res
   4460 }
   4461 
   4462 define <8 x i64> @test_mask_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   4463 ; X86-LABEL: test_mask_mul_epi32_rm:
   4464 ; X86:       ## %bb.0:
   4465 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4466 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
   4467 ; X86-NEXT:    retl ## encoding: [0xc3]
   4468 ;
   4469 ; X64-LABEL: test_mask_mul_epi32_rm:
   4470 ; X64:       ## %bb.0:
   4471 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
   4472 ; X64-NEXT:    retq ## encoding: [0xc3]
   4473   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4474   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4475   ret < 8 x i64> %res
   4476 }
   4477 
   4478 define <8 x i64> @test_mask_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   4479 ; X86-LABEL: test_mask_mul_epi32_rmk:
   4480 ; X86:       ## %bb.0:
   4481 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4482 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4483 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   4484 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
   4485 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4486 ; X86-NEXT:    retl ## encoding: [0xc3]
   4487 ;
   4488 ; X64-LABEL: test_mask_mul_epi32_rmk:
   4489 ; X64:       ## %bb.0:
   4490 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4491 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
   4492 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4493 ; X64-NEXT:    retq ## encoding: [0xc3]
   4494   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4495   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4496   ret < 8 x i64> %res
   4497 }
   4498 
   4499 define <8 x i64> @test_mask_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
   4500 ; X86-LABEL: test_mask_mul_epi32_rmkz:
   4501 ; X86:       ## %bb.0:
   4502 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4503 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4504 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   4505 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
   4506 ; X86-NEXT:    retl ## encoding: [0xc3]
   4507 ;
   4508 ; X64-LABEL: test_mask_mul_epi32_rmkz:
   4509 ; X64:       ## %bb.0:
   4510 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4511 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
   4512 ; X64-NEXT:    retq ## encoding: [0xc3]
   4513   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4514   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4515   ret < 8 x i64> %res
   4516 }
   4517 
   4518 define <8 x i64> @test_mask_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
   4519 ; X86-LABEL: test_mask_mul_epi32_rmb:
   4520 ; X86:       ## %bb.0:
   4521 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4522 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   4523 ; X86-NEXT:    ## xmm1 = mem[0],zero
   4524 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   4525 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
   4526 ; X86-NEXT:    retl ## encoding: [0xc3]
   4527 ;
   4528 ; X64-LABEL: test_mask_mul_epi32_rmb:
   4529 ; X64:       ## %bb.0:
   4530 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
   4531 ; X64-NEXT:    retq ## encoding: [0xc3]
   4532   %q = load i64, i64* %ptr_b
   4533   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4534   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4535   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4536   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4537   ret < 8 x i64> %res
   4538 }
   4539 
   4540 define <8 x i64> @test_mask_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   4541 ; X86-LABEL: test_mask_mul_epi32_rmbk:
   4542 ; X86:       ## %bb.0:
   4543 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4544 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   4545 ; X86-NEXT:    ## xmm2 = mem[0],zero
   4546 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   4547 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4548 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4549 ; X86-NEXT:    vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca]
   4550 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4551 ; X86-NEXT:    retl ## encoding: [0xc3]
   4552 ;
   4553 ; X64-LABEL: test_mask_mul_epi32_rmbk:
   4554 ; X64:       ## %bb.0:
   4555 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4556 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
   4557 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4558 ; X64-NEXT:    retq ## encoding: [0xc3]
   4559   %q = load i64, i64* %ptr_b
   4560   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4561   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4562   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4563   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4564   ret < 8 x i64> %res
   4565 }
   4566 
   4567 define <8 x i64> @test_mask_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
   4568 ; X86-LABEL: test_mask_mul_epi32_rmbkz:
   4569 ; X86:       ## %bb.0:
   4570 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4571 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   4572 ; X86-NEXT:    ## xmm1 = mem[0],zero
   4573 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   4574 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4575 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4576 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   4577 ; X86-NEXT:    retl ## encoding: [0xc3]
   4578 ;
   4579 ; X64-LABEL: test_mask_mul_epi32_rmbkz:
   4580 ; X64:       ## %bb.0:
   4581 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4582 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
   4583 ; X64-NEXT:    retq ## encoding: [0xc3]
   4584   %q = load i64, i64* %ptr_b
   4585   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4586   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4587   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4588   %res = call <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4589   ret < 8 x i64> %res
   4590 }
   4591 
   4592 declare <8 x i64> @llvm.x86.avx512.mask.pmul.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
   4593 
   4594 define <8 x i64> @test_mask_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
   4595 ; CHECK-LABEL: test_mask_mul_epu32_rr:
   4596 ; CHECK:       ## %bb.0:
   4597 ; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
   4598 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4599   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4600   ret < 8 x i64> %res
   4601 }
   4602 
   4603 define <8 x i64> @test_mask_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
   4604 ; X86-LABEL: test_mask_mul_epu32_rrk:
   4605 ; X86:       ## %bb.0:
   4606 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4607 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4608 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
   4609 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4610 ; X86-NEXT:    retl ## encoding: [0xc3]
   4611 ;
   4612 ; X64-LABEL: test_mask_mul_epu32_rrk:
   4613 ; X64:       ## %bb.0:
   4614 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4615 ; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
   4616 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   4617 ; X64-NEXT:    retq ## encoding: [0xc3]
   4618   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4619   ret < 8 x i64> %res
   4620 }
   4621 
   4622 define <8 x i64> @test_mask_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
   4623 ; X86-LABEL: test_mask_mul_epu32_rrkz:
   4624 ; X86:       ## %bb.0:
   4625 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4626 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4627 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   4628 ; X86-NEXT:    retl ## encoding: [0xc3]
   4629 ;
   4630 ; X64-LABEL: test_mask_mul_epu32_rrkz:
   4631 ; X64:       ## %bb.0:
   4632 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4633 ; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   4634 ; X64-NEXT:    retq ## encoding: [0xc3]
   4635   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4636   ret < 8 x i64> %res
   4637 }
   4638 
   4639 define <8 x i64> @test_mask_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   4640 ; X86-LABEL: test_mask_mul_epu32_rm:
   4641 ; X86:       ## %bb.0:
   4642 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4643 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
   4644 ; X86-NEXT:    retl ## encoding: [0xc3]
   4645 ;
   4646 ; X64-LABEL: test_mask_mul_epu32_rm:
   4647 ; X64:       ## %bb.0:
   4648 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
   4649 ; X64-NEXT:    retq ## encoding: [0xc3]
   4650   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4651   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4652   ret < 8 x i64> %res
   4653 }
   4654 
   4655 define <8 x i64> @test_mask_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   4656 ; X86-LABEL: test_mask_mul_epu32_rmk:
   4657 ; X86:       ## %bb.0:
   4658 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4659 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4660 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   4661 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
   4662 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4663 ; X86-NEXT:    retl ## encoding: [0xc3]
   4664 ;
   4665 ; X64-LABEL: test_mask_mul_epu32_rmk:
   4666 ; X64:       ## %bb.0:
   4667 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4668 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
   4669 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4670 ; X64-NEXT:    retq ## encoding: [0xc3]
   4671   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4672   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4673   ret < 8 x i64> %res
   4674 }
   4675 
   4676 define <8 x i64> @test_mask_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
   4677 ; X86-LABEL: test_mask_mul_epu32_rmkz:
   4678 ; X86:       ## %bb.0:
   4679 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4680 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4681 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   4682 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
   4683 ; X86-NEXT:    retl ## encoding: [0xc3]
   4684 ;
   4685 ; X64-LABEL: test_mask_mul_epu32_rmkz:
   4686 ; X64:       ## %bb.0:
   4687 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4688 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
   4689 ; X64-NEXT:    retq ## encoding: [0xc3]
   4690   %b = load <16 x i32>, <16 x i32>* %ptr_b
   4691   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4692   ret < 8 x i64> %res
   4693 }
   4694 
   4695 define <8 x i64> @test_mask_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
   4696 ; X86-LABEL: test_mask_mul_epu32_rmb:
   4697 ; X86:       ## %bb.0:
   4698 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4699 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   4700 ; X86-NEXT:    ## xmm1 = mem[0],zero
   4701 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   4702 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
   4703 ; X86-NEXT:    retl ## encoding: [0xc3]
   4704 ;
   4705 ; X64-LABEL: test_mask_mul_epu32_rmb:
   4706 ; X64:       ## %bb.0:
   4707 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
   4708 ; X64-NEXT:    retq ## encoding: [0xc3]
   4709   %q = load i64, i64* %ptr_b
   4710   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4711   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4712   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4713   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 -1)
   4714   ret < 8 x i64> %res
   4715 }
   4716 
   4717 define <8 x i64> @test_mask_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   4718 ; X86-LABEL: test_mask_mul_epu32_rmbk:
   4719 ; X86:       ## %bb.0:
   4720 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4721 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   4722 ; X86-NEXT:    ## xmm2 = mem[0],zero
   4723 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   4724 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4725 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4726 ; X86-NEXT:    vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca]
   4727 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4728 ; X86-NEXT:    retl ## encoding: [0xc3]
   4729 ;
   4730 ; X64-LABEL: test_mask_mul_epu32_rmbk:
   4731 ; X64:       ## %bb.0:
   4732 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4733 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
   4734 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   4735 ; X64-NEXT:    retq ## encoding: [0xc3]
   4736   %q = load i64, i64* %ptr_b
   4737   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4738   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4739   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4740   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask)
   4741   ret < 8 x i64> %res
   4742 }
   4743 
   4744 define <8 x i64> @test_mask_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
   4745 ; X86-LABEL: test_mask_mul_epu32_rmbkz:
   4746 ; X86:       ## %bb.0:
   4747 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4748 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   4749 ; X86-NEXT:    ## xmm1 = mem[0],zero
   4750 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   4751 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4752 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4753 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   4754 ; X86-NEXT:    retl ## encoding: [0xc3]
   4755 ;
   4756 ; X64-LABEL: test_mask_mul_epu32_rmbkz:
   4757 ; X64:       ## %bb.0:
   4758 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4759 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
   4760 ; X64-NEXT:    retq ## encoding: [0xc3]
   4761   %q = load i64, i64* %ptr_b
   4762   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   4763   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   4764   %b = bitcast <8 x i64> %b64 to <16 x i32>
   4765   %res = call <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b, <8 x i64> zeroinitializer, i8 %mask)
   4766   ret < 8 x i64> %res
   4767 }
   4768 
   4769 declare <8 x i64> @llvm.x86.avx512.mask.pmulu.dq.512(<16 x i32>, <16 x i32>, <8 x i64>, i8)
   4770 
   4771 define <4 x float> @test_mask_vextractf32x4(<4 x float> %b, <16 x float> %a, i8 %mask) {
   4772 ; X86-LABEL: test_mask_vextractf32x4:
   4773 ; X86:       ## %bb.0:
   4774 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4775 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4776 ; X86-NEXT:    vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
   4777 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   4778 ; X86-NEXT:    retl ## encoding: [0xc3]
   4779 ;
   4780 ; X64-LABEL: test_mask_vextractf32x4:
   4781 ; X64:       ## %bb.0:
   4782 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4783 ; X64-NEXT:    vextractf32x4 $2, %zmm1, %xmm0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x19,0xc8,0x02]
   4784 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   4785 ; X64-NEXT:    retq ## encoding: [0xc3]
   4786   %res = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float> %a, i32 2, <4 x float> %b, i8 %mask)
   4787   ret <4 x float> %res
   4788 }
   4789 
   4790 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.512(<16 x float>, i32, <4 x float>, i8)
   4791 
   4792 define <4 x i64> @test_mask_vextracti64x4(<4 x i64> %b, <8 x i64> %a, i8 %mask) {
   4793 ; X86-LABEL: test_mask_vextracti64x4:
   4794 ; X86:       ## %bb.0:
   4795 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4796 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4797 ; X86-NEXT:    vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
   4798 ; X86-NEXT:    retl ## encoding: [0xc3]
   4799 ;
   4800 ; X64-LABEL: test_mask_vextracti64x4:
   4801 ; X64:       ## %bb.0:
   4802 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4803 ; X64-NEXT:    vextracti64x4 $1, %zmm1, %ymm0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3b,0xc8,0x01]
   4804 ; X64-NEXT:    retq ## encoding: [0xc3]
   4805   %res = call <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64> %a, i32 1, <4 x i64> %b, i8 %mask)
   4806   ret <4 x i64> %res
   4807 }
   4808 
   4809 declare <4 x i64> @llvm.x86.avx512.mask.vextracti64x4.512(<8 x i64>, i32, <4 x i64>, i8)
   4810 
   4811 define <4 x i32> @test_maskz_vextracti32x4(<16 x i32> %a, i8 %mask) {
   4812 ; X86-LABEL: test_maskz_vextracti32x4:
   4813 ; X86:       ## %bb.0:
   4814 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4815 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4816 ; X86-NEXT:    vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
   4817 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   4818 ; X86-NEXT:    retl ## encoding: [0xc3]
   4819 ;
   4820 ; X64-LABEL: test_maskz_vextracti32x4:
   4821 ; X64:       ## %bb.0:
   4822 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4823 ; X64-NEXT:    vextracti32x4 $2, %zmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x39,0xc0,0x02]
   4824 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   4825 ; X64-NEXT:    retq ## encoding: [0xc3]
   4826   %res = call <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32> %a, i32 2, <4 x i32> zeroinitializer, i8 %mask)
   4827   ret <4 x i32> %res
   4828 }
   4829 
   4830 declare <4 x i32> @llvm.x86.avx512.mask.vextracti32x4.512(<16 x i32>, i32, <4 x i32>, i8)
   4831 
   4832 define <4 x double> @test_vextractf64x4(<8 x double> %a) {
   4833 ; CHECK-LABEL: test_vextractf64x4:
   4834 ; CHECK:       ## %bb.0:
   4835 ; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1b,0xc0,0x01]
   4836 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   4837   %res = call <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double> %a, i32 1, <4 x double> zeroinitializer, i8 -1)
   4838   ret <4 x double> %res
   4839 }
   4840 
   4841 declare <4 x double> @llvm.x86.avx512.mask.vextractf64x4.512(<8 x double>, i32, <4 x double>, i8)
   4842 
   4843 declare <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float>, <4 x float>, i32, <16 x float>, i16)
   4844 
   4845 define <16 x float>@test_int_x86_avx512_mask_insertf32x4_512(<16 x float> %x0, <4 x float> %x1, <16 x float> %x3, i16 %x4) {
   4846 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
   4847 ; X86:       ## %bb.0:
   4848 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
   4849 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4850 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
   4851 ; X86-NEXT:    vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3]
   4852 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
   4853 ; X86-NEXT:    vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
   4854 ; X86-NEXT:    retl ## encoding: [0xc3]
   4855 ;
   4856 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_512:
   4857 ; X64:       ## %bb.0:
   4858 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x18,0xd9,0x01]
   4859 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4860 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x18,0xd1,0x01]
   4861 ; X64-NEXT:    vaddps %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xd3]
   4862 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x18,0xc1,0x01]
   4863 ; X64-NEXT:    vaddps %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc2]
   4864 ; X64-NEXT:    retq ## encoding: [0xc3]
   4865   %res = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 %x4)
   4866   %res1 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> %x3, i16 -1)
   4867   %res2 = call <16 x float> @llvm.x86.avx512.mask.insertf32x4.512(<16 x float> %x0, <4 x float> %x1, i32 1, <16 x float> zeroinitializer, i16 %x4)
   4868   %res3 = fadd <16 x float> %res, %res1
   4869   %res4 = fadd <16 x float> %res2, %res3
   4870   ret <16 x float> %res4
   4871 }
   4872 
   4873 declare <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32>, <4 x i32>, i32, <16 x i32>, i16)
   4874 
   4875 define <16 x i32>@test_int_x86_avx512_mask_inserti32x4_512(<16 x i32> %x0, <4 x i32> %x1, <16 x i32> %x3, i16 %x4) {
   4876 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
   4877 ; X86:       ## %bb.0:
   4878 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
   4879 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4880 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
   4881 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
   4882 ; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
   4883 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   4884 ; X86-NEXT:    retl ## encoding: [0xc3]
   4885 ;
   4886 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_512:
   4887 ; X64:       ## %bb.0:
   4888 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0x7d,0x48,0x38,0xd9,0x01]
   4889 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4890 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x38,0xd1,0x01]
   4891 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xc9,0x38,0xc1,0x01]
   4892 ; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
   4893 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   4894 ; X64-NEXT:    retq ## encoding: [0xc3]
   4895   %res = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 %x4)
   4896   %res1 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> %x3, i16 -1)
   4897   %res2 = call <16 x i32> @llvm.x86.avx512.mask.inserti32x4.512(<16 x i32> %x0, <4 x i32> %x1, i32 1, <16 x i32> zeroinitializer, i16 %x4)
   4898   %res3 = add <16 x i32> %res, %res1
   4899   %res4 = add <16 x i32> %res2, %res3
   4900   ret <16 x i32> %res4
   4901 }
   4902 
   4903 declare <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double>, <4 x double>, i32, <8 x double>, i8)
   4904 
   4905 define <8 x double>@test_int_x86_avx512_mask_insertf64x4_512(<8 x double> %x0, <4 x double> %x1, <8 x double> %x3, i8 %x4) {
   4906 ; X86-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
   4907 ; X86:       ## %bb.0:
   4908 ; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
   4909 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4910 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4911 ; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
   4912 ; X86-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   4913 ; X86-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
   4914 ; X86-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
   4915 ; X86-NEXT:    retl ## encoding: [0xc3]
   4916 ;
   4917 ; X64-LABEL: test_int_x86_avx512_mask_insertf64x4_512:
   4918 ; X64:       ## %bb.0:
   4919 ; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd9,0x01]
   4920 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4921 ; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xd1,0x01]
   4922 ; X64-NEXT:    vaddpd %zmm3, %zmm2, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xd3]
   4923 ; X64-NEXT:    vinsertf64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc1,0x01]
   4924 ; X64-NEXT:    vaddpd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc2]
   4925 ; X64-NEXT:    retq ## encoding: [0xc3]
   4926   %res = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 %x4)
   4927   %res1 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> %x3, i8 -1)
   4928   %res2 = call <8 x double> @llvm.x86.avx512.mask.insertf64x4.512(<8 x double> %x0, <4 x double> %x1, i32 1, <8 x double> zeroinitializer, i8 %x4)
   4929   %res3 = fadd <8 x double> %res, %res1
   4930   %res4 = fadd <8 x double> %res2, %res3
   4931   ret <8 x double> %res4
   4932 }
   4933 
   4934 declare <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64>, <4 x i64>, i32, <8 x i64>, i8)
   4935 
   4936 define <8 x i64>@test_int_x86_avx512_mask_inserti64x4_512(<8 x i64> %x0, <4 x i64> %x1, <8 x i64> %x3, i8 %x4) {
   4937 ; X86-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
   4938 ; X86:       ## %bb.0:
   4939 ; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
   4940 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4941 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   4942 ; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
   4943 ; X86-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
   4944 ; X86-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
   4945 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   4946 ; X86-NEXT:    retl ## encoding: [0xc3]
   4947 ;
   4948 ; X64-LABEL: test_int_x86_avx512_mask_inserti64x4_512:
   4949 ; X64:       ## %bb.0:
   4950 ; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm3 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd9,0x01]
   4951 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4952 ; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xd1,0x01]
   4953 ; X64-NEXT:    vinserti64x4 $1, %ymm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc1,0x01]
   4954 ; X64-NEXT:    vpaddq %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0xe5,0x48,0xd4,0xc0]
   4955 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   4956 ; X64-NEXT:    retq ## encoding: [0xc3]
   4957   %res = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 %x4)
   4958   %res1 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> %x3, i8 -1)
   4959   %res2 = call <8 x i64> @llvm.x86.avx512.mask.inserti64x4.512(<8 x i64> %x0, <4 x i64> %x1, i32 1, <8 x i64> zeroinitializer, i8 %x4)
   4960   %res3 = add <8 x i64> %res, %res1
   4961   %res4 = add <8 x i64> %res2, %res3
   4962   ret <8 x i64> %res4
   4963 }
   4964 
   4965 define <8 x i64> @test_x86_avx512_movntdqa(i8* %a0) {
   4966 ; X86-LABEL: test_x86_avx512_movntdqa:
   4967 ; X86:       ## %bb.0:
   4968 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   4969 ; X86-NEXT:    vmovntdqa (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x00]
   4970 ; X86-NEXT:    retl ## encoding: [0xc3]
   4971 ;
   4972 ; X64-LABEL: test_x86_avx512_movntdqa:
   4973 ; X64:       ## %bb.0:
   4974 ; X64-NEXT:    vmovntdqa (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x2a,0x07]
   4975 ; X64-NEXT:    retq ## encoding: [0xc3]
   4976   %res = call <8 x i64> @llvm.x86.avx512.movntdqa(i8* %a0)
   4977   ret <8 x i64> %res
   4978 }
   4979 
   4980 declare <8 x i64> @llvm.x86.avx512.movntdqa(i8*) nounwind readonly
   4981 
   4982 define <8 x i16> @test_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
   4983 ; CHECK-LABEL: test_cmp_d_512:
   4984 ; CHECK:       ## %bb.0:
   4985 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
   4986 ; CHECK-NEXT:    vpcmpgtd %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf1,0x75,0x48,0x66,0xc8]
   4987 ; CHECK-NEXT:    vpcmpled %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd1,0x02]
   4988 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
   4989 ; CHECK-NEXT:    vpcmpnltd %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xe1,0x05]
   4990 ; CHECK-NEXT:    vpcmpgtd %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf1,0x7d,0x48,0x66,0xe9]
   4991 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   4992 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   4993 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4994 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   4995 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4996 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   4997 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4998 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   4999 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5000 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5001 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5002 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5003 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5004 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
   5005 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   5006 ; CHECK-NEXT:    ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   5007 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5008 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   5009   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   5010   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5011   %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   5012   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5013   %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   5014   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5015   %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   5016   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5017   %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   5018   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5019   %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   5020   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5021   %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   5022   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5023   %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   5024   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5025   ret <8 x i16> %vec7
   5026 }
   5027 
   5028 define <8 x i16> @test_mask_cmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   5029 ; X86-LABEL: test_mask_cmp_d_512:
   5030 ; X86:       ## %bb.0:
   5031 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5032 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5033 ; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
   5034 ; X86-NEXT:    vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
   5035 ; X86-NEXT:    vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
   5036 ; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
   5037 ; X86-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
   5038 ; X86-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
   5039 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5040 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5041 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5042 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
   5043 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5044 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
   5045 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5046 ; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
   5047 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5048 ; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
   5049 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5050 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
   5051 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5052 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5053 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5054 ; X86-NEXT:    retl ## encoding: [0xc3]
   5055 ;
   5056 ; X64-LABEL: test_mask_cmp_d_512:
   5057 ; X64:       ## %bb.0:
   5058 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5059 ; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
   5060 ; X64-NEXT:    vpcmpgtd %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x66,0xd0]
   5061 ; X64-NEXT:    vpcmpled %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xd9,0x02]
   5062 ; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
   5063 ; X64-NEXT:    vpcmpnltd %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe9,0x05]
   5064 ; X64-NEXT:    vpcmpgtd %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x66,0xc9]
   5065 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5066 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5067 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5068 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5069 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5070 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5071 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5072 ; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5073 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5074 ; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5075 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5076 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5077 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5078 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5079 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5080 ; X64-NEXT:    retq ## encoding: [0xc3]
   5081   %res0 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   5082   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5083   %res1 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   5084   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5085   %res2 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   5086   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5087   %res3 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   5088   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5089   %res4 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   5090   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5091   %res5 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   5092   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5093   %res6 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   5094   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5095   %res7 = call i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   5096   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5097   ret <8 x i16> %vec7
   5098 }
   5099 
   5100 declare i16 @llvm.x86.avx512.mask.cmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
   5101 
   5102 define <8 x i16> @test_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1) {
   5103 ; CHECK-LABEL: test_ucmp_d_512:
   5104 ; CHECK:       ## %bb.0:
   5105 ; CHECK-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x48,0x76,0xc1]
   5106 ; CHECK-NEXT:    vpcmpltud %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xc9,0x01]
   5107 ; CHECK-NEXT:    vpcmpleud %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xd1,0x02]
   5108 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xd9,0x04]
   5109 ; CHECK-NEXT:    vpcmpnltud %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe1,0x05]
   5110 ; CHECK-NEXT:    vpcmpnleud %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x48,0x1e,0xe9,0x06]
   5111 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5112 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5113 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5114 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5115 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5116 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5117 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5118 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5119 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5120 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5121 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5122 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5123 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5124 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf1,0x76,0xc9]
   5125 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   5126 ; CHECK-NEXT:    ## xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   5127 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5128 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   5129   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 -1)
   5130   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5131   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 -1)
   5132   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5133   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 -1)
   5134   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5135   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 -1)
   5136   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5137   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 -1)
   5138   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5139   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 -1)
   5140   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5141   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 -1)
   5142   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5143   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 -1)
   5144   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5145   ret <8 x i16> %vec7
   5146 }
   5147 
   5148 define <8 x i16> @test_mask_ucmp_d_512(<16 x i32> %a0, <16 x i32> %a1, i16 %mask) {
   5149 ; X86-LABEL: test_mask_ucmp_d_512:
   5150 ; X86:       ## %bb.0:
   5151 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5152 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5153 ; X86-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
   5154 ; X86-NEXT:    vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
   5155 ; X86-NEXT:    vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
   5156 ; X86-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
   5157 ; X86-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
   5158 ; X86-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
   5159 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5160 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5161 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5162 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
   5163 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5164 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
   5165 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5166 ; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
   5167 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5168 ; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
   5169 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5170 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
   5171 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5172 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5173 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5174 ; X86-NEXT:    retl ## encoding: [0xc3]
   5175 ;
   5176 ; X64-LABEL: test_mask_ucmp_d_512:
   5177 ; X64:       ## %bb.0:
   5178 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5179 ; X64-NEXT:    vpcmpeqd %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x76,0xc1]
   5180 ; X64-NEXT:    vpcmpltud %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd1,0x01]
   5181 ; X64-NEXT:    vpcmpleud %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xd9,0x02]
   5182 ; X64-NEXT:    vpcmpneqd %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1f,0xe1,0x04]
   5183 ; X64-NEXT:    vpcmpnltud %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xe9,0x05]
   5184 ; X64-NEXT:    vpcmpnleud %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x49,0x1e,0xc9,0x06]
   5185 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5186 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5187 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5188 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5189 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5190 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5191 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5192 ; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5193 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5194 ; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5195 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5196 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5197 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5198 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5199 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5200 ; X64-NEXT:    retq ## encoding: [0xc3]
   5201   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 0, i16 %mask)
   5202   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5203   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 1, i16 %mask)
   5204   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5205   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 2, i16 %mask)
   5206   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5207   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 3, i16 %mask)
   5208   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5209   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 4, i16 %mask)
   5210   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5211   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 5, i16 %mask)
   5212   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5213   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 6, i16 %mask)
   5214   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5215   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32> %a0, <16 x i32> %a1, i32 7, i16 %mask)
   5216   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5217   ret <8 x i16> %vec7
   5218 }
   5219 
   5220 declare i16 @llvm.x86.avx512.mask.ucmp.d.512(<16 x i32>, <16 x i32>, i32, i16) nounwind readnone
   5221 
   5222 define <8 x i8> @test_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
   5223 ; CHECK-LABEL: test_cmp_q_512:
   5224 ; CHECK:       ## %bb.0:
   5225 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
   5226 ; CHECK-NEXT:    vpcmpgtq %zmm0, %zmm1, %k1 ## encoding: [0x62,0xf2,0xf5,0x48,0x37,0xc8]
   5227 ; CHECK-NEXT:    vpcmpleq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd1,0x02]
   5228 ; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
   5229 ; CHECK-NEXT:    vpcmpnltq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xe1,0x05]
   5230 ; CHECK-NEXT:    vpcmpgtq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf2,0xfd,0x48,0x37,0xe9]
   5231 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5232 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5233 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5234 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5235 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5236 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5237 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5238 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5239 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5240 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5241 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5242 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5243 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5244 ; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
   5245 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5246 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5247 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   5248   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   5249   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5250   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   5251   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5252   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   5253   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5254   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   5255   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5256   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   5257   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5258   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   5259   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5260   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   5261   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5262   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   5263   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5264   ret <8 x i8> %vec7
   5265 }
   5266 
   5267 define <8 x i8> @test_mask_cmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   5268 ; X86-LABEL: test_mask_cmp_q_512:
   5269 ; X86:       ## %bb.0:
   5270 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5271 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5272 ; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
   5273 ; X86-NEXT:    vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
   5274 ; X86-NEXT:    vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
   5275 ; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
   5276 ; X86-NEXT:    vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
   5277 ; X86-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
   5278 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5279 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5280 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5281 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
   5282 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5283 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
   5284 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5285 ; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
   5286 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5287 ; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
   5288 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5289 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
   5290 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5291 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5292 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5293 ; X86-NEXT:    retl ## encoding: [0xc3]
   5294 ;
   5295 ; X64-LABEL: test_mask_cmp_q_512:
   5296 ; X64:       ## %bb.0:
   5297 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5298 ; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
   5299 ; X64-NEXT:    vpcmpgtq %zmm0, %zmm1, %k2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x37,0xd0]
   5300 ; X64-NEXT:    vpcmpleq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xd9,0x02]
   5301 ; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
   5302 ; X64-NEXT:    vpcmpnltq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe9,0x05]
   5303 ; X64-NEXT:    vpcmpgtq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x37,0xc9]
   5304 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5305 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5306 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5307 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5308 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5309 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5310 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5311 ; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5312 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5313 ; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5314 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5315 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5316 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5317 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5318 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5319 ; X64-NEXT:    retq ## encoding: [0xc3]
   5320   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   5321   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5322   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   5323   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5324   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   5325   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5326   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   5327   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5328   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   5329   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5330   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   5331   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5332   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   5333   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5334   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   5335   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5336   ret <8 x i8> %vec7
   5337 }
   5338 
   5339 declare i8 @llvm.x86.avx512.mask.cmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
   5340 
   5341 define <8 x i8> @test_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1) {
   5342 ; CHECK-LABEL: test_ucmp_q_512:
   5343 ; CHECK:       ## %bb.0:
   5344 ; CHECK-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x29,0xc1]
   5345 ; CHECK-NEXT:    vpcmpltuq %zmm1, %zmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xc9,0x01]
   5346 ; CHECK-NEXT:    vpcmpleuq %zmm1, %zmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xd1,0x02]
   5347 ; CHECK-NEXT:    vpcmpneqq %zmm1, %zmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x48,0x1f,0xd9,0x04]
   5348 ; CHECK-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe1,0x05]
   5349 ; CHECK-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x48,0x1e,0xe9,0x06]
   5350 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5351 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5352 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5353 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5354 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5355 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5356 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5357 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5358 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5359 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5360 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5361 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5362 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5363 ; CHECK-NEXT:    movl $255, %eax ## encoding: [0xb8,0xff,0x00,0x00,0x00]
   5364 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5365 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5366 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   5367   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 -1)
   5368   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5369   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 -1)
   5370   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5371   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 -1)
   5372   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5373   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 -1)
   5374   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5375   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 -1)
   5376   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5377   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 -1)
   5378   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5379   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 -1)
   5380   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5381   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 -1)
   5382   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5383   ret <8 x i8> %vec7
   5384 }
   5385 
   5386 define <8 x i8> @test_mask_ucmp_q_512(<8 x i64> %a0, <8 x i64> %a1, i8 %mask) {
   5387 ; X86-LABEL: test_mask_ucmp_q_512:
   5388 ; X86:       ## %bb.0:
   5389 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5390 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5391 ; X86-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
   5392 ; X86-NEXT:    vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
   5393 ; X86-NEXT:    vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
   5394 ; X86-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
   5395 ; X86-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
   5396 ; X86-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
   5397 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5398 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5399 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5400 ; X86-NEXT:    kmovw %k2, %ecx ## encoding: [0xc5,0xf8,0x93,0xca]
   5401 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5402 ; X86-NEXT:    kmovw %k3, %ecx ## encoding: [0xc5,0xf8,0x93,0xcb]
   5403 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5404 ; X86-NEXT:    kmovw %k4, %ecx ## encoding: [0xc5,0xf8,0x93,0xcc]
   5405 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5406 ; X86-NEXT:    kmovw %k5, %ecx ## encoding: [0xc5,0xf8,0x93,0xcd]
   5407 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5408 ; X86-NEXT:    kmovw %k1, %ecx ## encoding: [0xc5,0xf8,0x93,0xc9]
   5409 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5410 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5411 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5412 ; X86-NEXT:    retl ## encoding: [0xc3]
   5413 ;
   5414 ; X64-LABEL: test_mask_ucmp_q_512:
   5415 ; X64:       ## %bb.0:
   5416 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5417 ; X64-NEXT:    vpcmpeqq %zmm1, %zmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x29,0xc1]
   5418 ; X64-NEXT:    vpcmpltuq %zmm1, %zmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd1,0x01]
   5419 ; X64-NEXT:    vpcmpleuq %zmm1, %zmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xd9,0x02]
   5420 ; X64-NEXT:    vpcmpneqq %zmm1, %zmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1f,0xe1,0x04]
   5421 ; X64-NEXT:    vpcmpnltuq %zmm1, %zmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xe9,0x05]
   5422 ; X64-NEXT:    vpcmpnleuq %zmm1, %zmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1e,0xc9,0x06]
   5423 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5424 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xef,0xc0]
   5425 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5426 ; X64-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
   5427 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5428 ; X64-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
   5429 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5430 ; X64-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
   5431 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5432 ; X64-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
   5433 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5434 ; X64-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
   5435 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5436 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 ## encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5437 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5438 ; X64-NEXT:    retq ## encoding: [0xc3]
   5439   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 0, i8 %mask)
   5440   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5441   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 1, i8 %mask)
   5442   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5443   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 2, i8 %mask)
   5444   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5445   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 3, i8 %mask)
   5446   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5447   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 4, i8 %mask)
   5448   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5449   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 5, i8 %mask)
   5450   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5451   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 6, i8 %mask)
   5452   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5453   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64> %a0, <8 x i64> %a1, i32 7, i8 %mask)
   5454   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5455   ret <8 x i8> %vec7
   5456 }
   5457 
   5458 declare i8 @llvm.x86.avx512.mask.ucmp.q.512(<8 x i64>, <8 x i64>, i32, i8) nounwind readnone
   5459 
   5460 declare <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float>, <16 x float>, i16)
   5461 
   5462 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512(<4 x float> %x0, <16 x float> %x2, i16 %mask) {
   5463 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
   5464 ; X86:       ## %bb.0:
   5465 ; X86-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
   5466 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
   5467 ; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
   5468 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5469 ; X86-NEXT:    vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
   5470 ; X86-NEXT:    vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
   5471 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
   5472 ; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   5473 ; X86-NEXT:    retl ## encoding: [0xc3]
   5474 ;
   5475 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512:
   5476 ; X64:       ## %bb.0:
   5477 ; X64-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
   5478 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x18,0xc0,0x01]
   5479 ; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xc0,0x01]
   5480 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5481 ; X64-NEXT:    vmovaps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x28,0xc8]
   5482 ; X64-NEXT:    vmovaps %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x28,0xd0]
   5483 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
   5484 ; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   5485 ; X64-NEXT:    retq ## encoding: [0xc3]
   5486 
   5487   %res1 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 -1)
   5488   %res2 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
   5489   %res3 = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> zeroinitializer, i16 %mask)
   5490   %res4 = fadd <16 x float> %res1, %res2
   5491   %res5 = fadd <16 x float> %res3, %res4
   5492   ret <16 x float> %res5
   5493 }
   5494 
   5495 define <16 x float>@test_int_x86_avx512_mask_broadcastf32x4_512_load(<4 x float>* %x0ptr, <16 x float> %x2, i16 %mask) {
   5496 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
   5497 ; X86:       ## %bb.0:
   5498 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5499 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   5500 ; X86-NEXT:    vbroadcastf32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x00]
   5501 ; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   5502 ; X86-NEXT:    retl ## encoding: [0xc3]
   5503 ;
   5504 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_512_load:
   5505 ; X64:       ## %bb.0:
   5506 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5507 ; X64-NEXT:    vbroadcastf32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1a,0x07]
   5508 ; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   5509 ; X64-NEXT:    retq ## encoding: [0xc3]
   5510   %x0 = load <4 x float>, <4 x float>* %x0ptr
   5511   %res = call <16 x float> @llvm.x86.avx512.mask.broadcastf32x4.512(<4 x float> %x0, <16 x float> %x2, i16 %mask)
   5512   ret <16 x float> %res
   5513 }
   5514 
   5515 declare <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double>, <8 x double>, i8)
   5516 
   5517 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512(<4 x double> %x0, <8 x double> %x2, i8 %mask) {
   5518 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
   5519 ; X86:       ## %bb.0:
   5520 ; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
   5521 ; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
   5522 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5523 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5524 ; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
   5525 ; X86-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
   5526 ; X86-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
   5527 ; X86-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
   5528 ; X86-NEXT:    retl ## encoding: [0xc3]
   5529 ;
   5530 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512:
   5531 ; X64:       ## %bb.0:
   5532 ; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
   5533 ; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x1a,0xd0,0x01]
   5534 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5535 ; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x1a,0xc8,0x01]
   5536 ; X64-NEXT:    vaddpd %zmm1, %zmm2, %zmm1 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc9]
   5537 ; X64-NEXT:    vinsertf64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x1a,0xc0,0x01]
   5538 ; X64-NEXT:    vaddpd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc1]
   5539 ; X64-NEXT:    retq ## encoding: [0xc3]
   5540 
   5541   %res1 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 -1)
   5542   %res2 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
   5543   %res3 = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> zeroinitializer, i8 %mask)
   5544   %res4 = fadd <8 x double> %res1, %res2
   5545   %res5 = fadd <8 x double> %res3, %res4
   5546   ret <8 x double> %res5
   5547 }
   5548 
   5549 define <8 x double>@test_int_x86_avx512_mask_broadcastf64x4_512_load(<4 x double>* %x0ptr, <8 x double> %x2, i8 %mask) {
   5550 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
   5551 ; X86:       ## %bb.0:
   5552 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   5554 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   5555 ; X86-NEXT:    vbroadcastf64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x00]
   5556 ; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   5557 ; X86-NEXT:    retl ## encoding: [0xc3]
   5558 ;
   5559 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf64x4_512_load:
   5560 ; X64:       ## %bb.0:
   5561 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5562 ; X64-NEXT:    vbroadcastf64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1b,0x07]
   5563 ; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   5564 ; X64-NEXT:    retq ## encoding: [0xc3]
   5565 
   5566   %x0 = load <4 x double>, <4 x double>* %x0ptr
   5567   %res = call <8 x double> @llvm.x86.avx512.mask.broadcastf64x4.512(<4 x double> %x0, <8 x double> %x2, i8 %mask)
   5568   ret <8 x double> %res
   5569 }
   5570 
   5571 declare <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32>, <16 x i32>, i16)
   5572 
   5573 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask) {
   5574 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
   5575 ; X86:       ## %bb.0:
   5576 ; X86-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
   5577 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
   5578 ; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
   5579 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5580 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
   5581 ; X86-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
   5582 ; X86-NEXT:    vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
   5583 ; X86-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
   5584 ; X86-NEXT:    retl ## encoding: [0xc3]
   5585 ;
   5586 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512:
   5587 ; X64:       ## %bb.0:
   5588 ; X64-NEXT:    ## kill: def $xmm0 killed $xmm0 def $ymm0
   5589 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0 ## encoding: [0xc4,0xe3,0x7d,0x38,0xc0,0x01]
   5590 ; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xc0,0x01]
   5591 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5592 ; X64-NEXT:    vmovdqa32 %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0x6f,0xc8]
   5593 ; X64-NEXT:    vmovdqa32 %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x6f,0xd0]
   5594 ; X64-NEXT:    vpaddd %zmm2, %zmm1, %zmm1 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xca]
   5595 ; X64-NEXT:    vpaddd %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc1]
   5596 ; X64-NEXT:    retq ## encoding: [0xc3]
   5597 
   5598   %res1 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 -1)
   5599   %res2 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
   5600   %res3 = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> zeroinitializer, i16 %mask)
   5601   %res4 = add <16 x i32> %res1, %res2
   5602   %res5 = add <16 x i32> %res3, %res4
   5603   ret <16 x i32> %res5
   5604 }
   5605 
   5606 define <16 x i32>@test_int_x86_avx512_mask_broadcasti32x4_512_load(<4 x i32>* %x0ptr, <16 x i32> %x2, i16 %mask) {
   5607 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
   5608 ; X86:       ## %bb.0:
   5609 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5610 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   5611 ; X86-NEXT:    vbroadcasti32x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x00]
   5612 ; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   5613 ; X86-NEXT:    retl ## encoding: [0xc3]
   5614 ;
   5615 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_512_load:
   5616 ; X64:       ## %bb.0:
   5617 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5618 ; X64-NEXT:    vbroadcasti32x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x5a,0x07]
   5619 ; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   5620 ; X64-NEXT:    retq ## encoding: [0xc3]
   5621 
   5622   %x0 = load <4 x i32>, <4 x i32>* %x0ptr
   5623   %res = call <16 x i32> @llvm.x86.avx512.mask.broadcasti32x4.512(<4 x i32> %x0, <16 x i32> %x2, i16 %mask)
   5624   ret <16 x i32> %res
   5625 }
   5626 
   5627 declare <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64>, <8 x i64>, i8)
   5628 
   5629 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask) {
   5630 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
   5631 ; X86:       ## %bb.0:
   5632 ; X86-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
   5633 ; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
   5634 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5635 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5636 ; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
   5637 ; X86-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
   5638 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   5639 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   5640 ; X86-NEXT:    retl ## encoding: [0xc3]
   5641 ;
   5642 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512:
   5643 ; X64:       ## %bb.0:
   5644 ; X64-NEXT:    ## kill: def $ymm0 killed $ymm0 def $zmm0
   5645 ; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm2 ## encoding: [0x62,0xf3,0xfd,0x48,0x3a,0xd0,0x01]
   5646 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5647 ; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x49,0x3a,0xc8,0x01]
   5648 ; X64-NEXT:    vinserti64x4 $1, %ymm0, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xc9,0x3a,0xc0,0x01]
   5649 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   5650 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   5651 ; X64-NEXT:    retq ## encoding: [0xc3]
   5652 
   5653   %res1 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 -1)
   5654   %res2 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
   5655   %res3 = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> zeroinitializer, i8 %mask)
   5656   %res4 = add <8 x i64> %res1, %res2
   5657   %res5 = add <8 x i64> %res3, %res4
   5658   ret <8 x i64> %res5
   5659 }
   5660 
   5661 define <8 x i64>@test_int_x86_avx512_mask_broadcasti64x4_512_load(<4 x i64>* %x0ptr, <8 x i64> %x2, i8 %mask) {
   5662 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
   5663 ; X86:       ## %bb.0:
   5664 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5665 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   5666 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   5667 ; X86-NEXT:    vbroadcasti64x4 (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x00]
   5668 ; X86-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   5669 ; X86-NEXT:    retl ## encoding: [0xc3]
   5670 ;
   5671 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti64x4_512_load:
   5672 ; X64:       ## %bb.0:
   5673 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5674 ; X64-NEXT:    vbroadcasti64x4 (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x5b,0x07]
   5675 ; X64-NEXT:    ## zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   5676 ; X64-NEXT:    retq ## encoding: [0xc3]
   5677 
   5678   %x0 = load <4 x i64>, <4 x i64>* %x0ptr
   5679   %res = call <8 x i64> @llvm.x86.avx512.mask.broadcasti64x4.512(<4 x i64> %x0, <8 x i64> %x2, i8 %mask)
   5680   ret <8 x i64> %res
   5681 }
   5682 
   5683 declare <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32>, <16 x i32>, i16)
   5684 
   5685 define <16 x i32>@test_int_x86_avx512_mask_pabs_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
   5686 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_512:
   5687 ; X86:       ## %bb.0:
   5688 ; X86-NEXT:    vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0]
   5689 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5690 ; X86-NEXT:    vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
   5691 ; X86-NEXT:    vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2]
   5692 ; X86-NEXT:    retl ## encoding: [0xc3]
   5693 ;
   5694 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_512:
   5695 ; X64:       ## %bb.0:
   5696 ; X64-NEXT:    vpabsd %zmm0, %zmm2 ## encoding: [0x62,0xf2,0x7d,0x48,0x1e,0xd0]
   5697 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5698 ; X64-NEXT:    vpabsd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x1e,0xc8]
   5699 ; X64-NEXT:    vpaddd %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc2]
   5700 ; X64-NEXT:    retq ## encoding: [0xc3]
   5701   %res = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
   5702   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pabs.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 -1)
   5703   %res2 = add <16 x i32> %res, %res1
   5704   ret <16 x i32> %res2
   5705 }
   5706 
   5707 declare <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64>, <8 x i64>, i8)
   5708 
   5709 define <8 x i64>@test_int_x86_avx512_mask_pabs_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
   5710 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_512:
   5711 ; X86:       ## %bb.0:
   5712 ; X86-NEXT:    vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0]
   5713 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5714 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   5715 ; X86-NEXT:    vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
   5716 ; X86-NEXT:    vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2]
   5717 ; X86-NEXT:    retl ## encoding: [0xc3]
   5718 ;
   5719 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_512:
   5720 ; X64:       ## %bb.0:
   5721 ; X64-NEXT:    vpabsq %zmm0, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x1f,0xd0]
   5722 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5723 ; X64-NEXT:    vpabsq %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x1f,0xc8]
   5724 ; X64-NEXT:    vpaddq %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc2]
   5725 ; X64-NEXT:    retq ## encoding: [0xc3]
   5726   %res = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
   5727   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pabs.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 -1)
   5728   %res2 = add <8 x i64> %res, %res1
   5729   ret <8 x i64> %res2
   5730 }
   5731 
   5732 define i8 @test_vptestmq(<8 x i64> %a0, <8 x i64> %a1, i8 %m) {
   5733 ; X86-LABEL: test_vptestmq:
   5734 ; X86:       ## %bb.0:
   5735 ; X86-NEXT:    vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
   5736 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5737 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   5738 ; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
   5739 ; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   5740 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5741 ; X86-NEXT:    retl ## encoding: [0xc3]
   5742 ;
   5743 ; X64-LABEL: test_vptestmq:
   5744 ; X64:       ## %bb.0:
   5745 ; X64-NEXT:    vptestmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x48,0x27,0xc1]
   5746 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5747 ; X64-NEXT:    andb %al, %dil ## encoding: [0x40,0x20,0xc7]
   5748 ; X64-NEXT:    addb %dil, %al ## encoding: [0x40,0x00,0xf8]
   5749 ; X64-NEXT:    ## kill: def $al killed $al killed $eax
   5750 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5751 ; X64-NEXT:    retq ## encoding: [0xc3]
   5752   %res = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 -1)
   5753   %res1 = call i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64> %a0, <8 x i64> %a1, i8 %m)
   5754   %res2 = add i8 %res1, %res
   5755   ret i8 %res2
   5756 }
   5757 declare i8 @llvm.x86.avx512.ptestm.q.512(<8 x i64>, <8 x i64>, i8)
   5758 
   5759 define i16 @test_vptestmd(<16 x i32> %a0, <16 x i32> %a1, i16 %m) {
   5760 ; X86-LABEL: test_vptestmd:
   5761 ; X86:       ## %bb.0:
   5762 ; X86-NEXT:    vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
   5763 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5764 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5765 ; X86-NEXT:    andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
   5766 ; X86-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5767 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5768 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5769 ; X86-NEXT:    retl ## encoding: [0xc3]
   5770 ;
   5771 ; X64-LABEL: test_vptestmd:
   5772 ; X64:       ## %bb.0:
   5773 ; X64-NEXT:    vptestmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x48,0x27,0xc1]
   5774 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5775 ; X64-NEXT:    andl %eax, %edi ## encoding: [0x21,0xc7]
   5776 ; X64-NEXT:    addl %edi, %eax ## encoding: [0x01,0xf8]
   5777 ; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
   5778 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5779 ; X64-NEXT:    retq ## encoding: [0xc3]
   5780   %res = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 -1)
   5781   %res1 = call i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32> %a0, <16 x i32> %a1, i16 %m)
   5782   %res2 = add i16 %res1, %res
   5783   ret i16 %res2
   5784 }
   5785 declare i16 @llvm.x86.avx512.ptestm.d.512(<16 x i32>, <16 x i32>, i16)
   5786 
   5787 declare i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32>, <16 x i32>, i16 %x2)
   5788 
   5789 define i16@test_int_x86_avx512_ptestnm_d_512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2) {
   5790 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_512:
   5791 ; X86:       ## %bb.0:
   5792 ; X86-NEXT:    vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
   5793 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5794 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5795 ; X86-NEXT:    andw %cx, %ax ## encoding: [0x66,0x21,0xc8]
   5796 ; X86-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5797 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5798 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5799 ; X86-NEXT:    retl ## encoding: [0xc3]
   5800 ;
   5801 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_512:
   5802 ; X64:       ## %bb.0:
   5803 ; X64-NEXT:    vptestnmd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x48,0x27,0xc1]
   5804 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5805 ; X64-NEXT:    andl %eax, %edi ## encoding: [0x21,0xc7]
   5806 ; X64-NEXT:    addl %edi, %eax ## encoding: [0x01,0xf8]
   5807 ; X64-NEXT:    ## kill: def $ax killed $ax killed $eax
   5808 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5809 ; X64-NEXT:    retq ## encoding: [0xc3]
   5810   %res = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16 %x2)
   5811   %res1 = call i16 @llvm.x86.avx512.ptestnm.d.512(<16 x i32> %x0, <16 x i32> %x1, i16-1)
   5812   %res2 = add i16 %res, %res1
   5813   ret i16 %res2
   5814 }
   5815 
   5816 declare i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64>, <8 x i64>, i8 %x2)
   5817 
   5818 define i8@test_int_x86_avx512_ptestnm_q_512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2) {
   5819 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_512:
   5820 ; X86:       ## %bb.0:
   5821 ; X86-NEXT:    vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
   5822 ; X86-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5823 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   5824 ; X86-NEXT:    andb %cl, %al ## encoding: [0x20,0xc8]
   5825 ; X86-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   5826 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5827 ; X86-NEXT:    retl ## encoding: [0xc3]
   5828 ;
   5829 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_512:
   5830 ; X64:       ## %bb.0:
   5831 ; X64-NEXT:    vptestnmq %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x48,0x27,0xc1]
   5832 ; X64-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5833 ; X64-NEXT:    andb %al, %dil ## encoding: [0x40,0x20,0xc7]
   5834 ; X64-NEXT:    addb %dil, %al ## encoding: [0x40,0x00,0xf8]
   5835 ; X64-NEXT:    ## kill: def $al killed $al killed $eax
   5836 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5837 ; X64-NEXT:    retq ## encoding: [0xc3]
   5838   %res = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8 %x2)
   5839   %res1 = call i8 @llvm.x86.avx512.ptestnm.q.512(<8 x i64> %x0, <8 x i64> %x1, i8-1)
   5840   %res2 = add i8 %res, %res1
   5841   ret i8 %res2
   5842 }
   5843 
   5844 declare i16 @llvm.x86.avx512.kand.w(i16, i16) nounwind readnone
   5845 define i16 @test_kand(i16 %a0, i16 %a1) {
   5846 ; X86-LABEL: test_kand:
   5847 ; X86:       ## %bb.0:
   5848 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5849 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
   5850 ; X86-NEXT:    andl $8, %eax ## encoding: [0x83,0xe0,0x08]
   5851 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5852 ; X86-NEXT:    retl ## encoding: [0xc3]
   5853 ;
   5854 ; X64-LABEL: test_kand:
   5855 ; X64:       ## %bb.0:
   5856 ; X64-NEXT:    andl %esi, %edi ## encoding: [0x21,0xf7]
   5857 ; X64-NEXT:    andl $8, %edi ## encoding: [0x83,0xe7,0x08]
   5858 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5859 ; X64-NEXT:    retq ## encoding: [0xc3]
   5860   %t1 = call i16 @llvm.x86.avx512.kand.w(i16 %a0, i16 8)
   5861   %t2 = call i16 @llvm.x86.avx512.kand.w(i16 %t1, i16 %a1)
   5862   ret i16 %t2
   5863 }
   5864 
   5865 declare i16 @llvm.x86.avx512.kandn.w(i16, i16) nounwind readnone
   5866 define i16 @test_kandn(i16 %a0, i16 %a1) {
   5867 ; X86-LABEL: test_kandn:
   5868 ; X86:       ## %bb.0:
   5869 ; X86-NEXT:    movl $65527, %eax ## encoding: [0xb8,0xf7,0xff,0x00,0x00]
   5870 ; X86-NEXT:    ## imm = 0xFFF7
   5871 ; X86-NEXT:    orl {{[0-9]+}}(%esp), %eax ## encoding: [0x0b,0x44,0x24,0x04]
   5872 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x23,0x44,0x24,0x08]
   5873 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5874 ; X86-NEXT:    retl ## encoding: [0xc3]
   5875 ;
   5876 ; X64-LABEL: test_kandn:
   5877 ; X64:       ## %bb.0:
   5878 ; X64-NEXT:    orl $-9, %edi ## encoding: [0x83,0xcf,0xf7]
   5879 ; X64-NEXT:    andl %esi, %edi ## encoding: [0x21,0xf7]
   5880 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5881 ; X64-NEXT:    retq ## encoding: [0xc3]
   5882   %t1 = call i16 @llvm.x86.avx512.kandn.w(i16 %a0, i16 8)
   5883   %t2 = call i16 @llvm.x86.avx512.kandn.w(i16 %t1, i16 %a1)
   5884   ret i16 %t2
   5885 }
   5886 
   5887 declare i16 @llvm.x86.avx512.knot.w(i16) nounwind readnone
   5888 define i16 @test_knot(i16 %a0) {
   5889 ; X86-LABEL: test_knot:
   5890 ; X86:       ## %bb.0:
   5891 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   5892 ; X86-NEXT:    notl %eax ## encoding: [0xf7,0xd0]
   5893 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5894 ; X86-NEXT:    retl ## encoding: [0xc3]
   5895 ;
   5896 ; X64-LABEL: test_knot:
   5897 ; X64:       ## %bb.0:
   5898 ; X64-NEXT:    notl %edi ## encoding: [0xf7,0xd7]
   5899 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5900 ; X64-NEXT:    retq ## encoding: [0xc3]
   5901   %res = call i16 @llvm.x86.avx512.knot.w(i16 %a0)
   5902   ret i16 %res
   5903 }
   5904 
   5905 declare i16 @llvm.x86.avx512.kor.w(i16, i16) nounwind readnone
   5906 define i16 @test_kor(i16 %a0, i16 %a1) {
   5907 ; X86-LABEL: test_kor:
   5908 ; X86:       ## %bb.0:
   5909 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5910 ; X86-NEXT:    orw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x0b,0x44,0x24,0x08]
   5911 ; X86-NEXT:    orl $8, %eax ## encoding: [0x83,0xc8,0x08]
   5912 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5913 ; X86-NEXT:    retl ## encoding: [0xc3]
   5914 ;
   5915 ; X64-LABEL: test_kor:
   5916 ; X64:       ## %bb.0:
   5917 ; X64-NEXT:    orl %esi, %edi ## encoding: [0x09,0xf7]
   5918 ; X64-NEXT:    orl $8, %edi ## encoding: [0x83,0xcf,0x08]
   5919 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5920 ; X64-NEXT:    retq ## encoding: [0xc3]
   5921   %t1 = call i16 @llvm.x86.avx512.kor.w(i16 %a0, i16 8)
   5922   %t2 = call i16 @llvm.x86.avx512.kor.w(i16 %t1, i16 %a1)
   5923   ret i16 %t2
   5924 }
   5925 
   5926 declare i16 @llvm.x86.avx512.kxnor.w(i16, i16) nounwind readnone
   5927 ; TODO: the two kxnor instructions here a no op and should be elimintaed,
   5928 ; probably by FoldConstantArithmetic in SelectionDAG.
   5929 define i16 @test_kxnor(i16 %a0, i16 %a1) {
   5930 ; X86-LABEL: test_kxnor:
   5931 ; X86:       ## %bb.0:
   5932 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5933 ; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
   5934 ; X86-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
   5935 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5936 ; X86-NEXT:    retl ## encoding: [0xc3]
   5937 ;
   5938 ; X64-LABEL: test_kxnor:
   5939 ; X64:       ## %bb.0:
   5940 ; X64-NEXT:    xorl %esi, %edi ## encoding: [0x31,0xf7]
   5941 ; X64-NEXT:    xorl $8, %edi ## encoding: [0x83,0xf7,0x08]
   5942 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5943 ; X64-NEXT:    retq ## encoding: [0xc3]
   5944   %t1 = call i16 @llvm.x86.avx512.kxnor.w(i16 %a0, i16 8)
   5945   %t2 = call i16 @llvm.x86.avx512.kxnor.w(i16 %t1, i16 %a1)
   5946   ret i16 %t2
   5947 }
   5948 
   5949 declare i16 @llvm.x86.avx512.kxor.w(i16, i16) nounwind readnone
   5950 define i16 @test_kxor(i16 %a0, i16 %a1) {
   5951 ; X86-LABEL: test_kxor:
   5952 ; X86:       ## %bb.0:
   5953 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5954 ; X86-NEXT:    xorw {{[0-9]+}}(%esp), %ax ## encoding: [0x66,0x33,0x44,0x24,0x08]
   5955 ; X86-NEXT:    xorl $8, %eax ## encoding: [0x83,0xf0,0x08]
   5956 ; X86-NEXT:    ## kill: def $ax killed $ax killed $eax
   5957 ; X86-NEXT:    retl ## encoding: [0xc3]
   5958 ;
   5959 ; X64-LABEL: test_kxor:
   5960 ; X64:       ## %bb.0:
   5961 ; X64-NEXT:    xorl %esi, %edi ## encoding: [0x31,0xf7]
   5962 ; X64-NEXT:    xorl $8, %edi ## encoding: [0x83,0xf7,0x08]
   5963 ; X64-NEXT:    movl %edi, %eax ## encoding: [0x89,0xf8]
   5964 ; X64-NEXT:    retq ## encoding: [0xc3]
   5965   %t1 = call i16 @llvm.x86.avx512.kxor.w(i16 %a0, i16 8)
   5966   %t2 = call i16 @llvm.x86.avx512.kxor.w(i16 %t1, i16 %a1)
   5967   ret i16 %t2
   5968 }
   5969 
   5970 declare i32 @llvm.x86.avx512.kortestz.w(i16, i16) nounwind readnone
   5971 define i32 @test_kortestz(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
   5972 ; CHECK-LABEL: test_kortestz:
   5973 ; CHECK:       ## %bb.0: ## %entry
   5974 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
   5975 ; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
   5976 ; CHECK-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
   5977 ; CHECK-NEXT:    kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
   5978 ; CHECK-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
   5979 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   5980 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   5981 entry:
   5982   %0 = bitcast <8 x i64> %A to <16 x i32>
   5983   %1 = bitcast <8 x i64> %B to <16 x i32>
   5984   %2 = icmp ne <16 x i32> %0, %1
   5985   %3 = bitcast <8 x i64> %C to <16 x i32>
   5986   %4 = bitcast <8 x i64> %D to <16 x i32>
   5987   %5 = icmp ne <16 x i32> %3, %4
   5988   %6 = bitcast <16 x i1> %2 to i16
   5989   %7 = bitcast <16 x i1> %5 to i16
   5990   %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
   5991   ret i32 %res
   5992 }
   5993 
   5994 declare i32 @llvm.x86.avx512.kortestc.w(i16, i16) nounwind readnone
   5995 define i32 @test_kortestc(<8 x i64> %A, <8 x i64> %B, <8 x i64> %C, <8 x i64> %D) {
   5996 ; CHECK-LABEL: test_kortestc:
   5997 ; CHECK:       ## %bb.0: ## %entry
   5998 ; CHECK-NEXT:    vpcmpneqd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x48,0x1f,0xc1,0x04]
   5999 ; CHECK-NEXT:    vpcmpneqd %zmm3, %zmm2, %k1 ## encoding: [0x62,0xf3,0x6d,0x48,0x1f,0xcb,0x04]
   6000 ; CHECK-NEXT:    xorl %eax, %eax ## encoding: [0x31,0xc0]
   6001 ; CHECK-NEXT:    kortestw %k1, %k0 ## encoding: [0xc5,0xf8,0x98,0xc1]
   6002 ; CHECK-NEXT:    sete %al ## encoding: [0x0f,0x94,0xc0]
   6003 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   6004 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6005 entry:
   6006   %0 = bitcast <8 x i64> %A to <16 x i32>
   6007   %1 = bitcast <8 x i64> %B to <16 x i32>
   6008   %2 = icmp ne <16 x i32> %0, %1
   6009   %3 = bitcast <8 x i64> %C to <16 x i32>
   6010   %4 = bitcast <8 x i64> %D to <16 x i32>
   6011   %5 = icmp ne <16 x i32> %3, %4
   6012   %6 = bitcast <16 x i1> %2 to i16
   6013   %7 = bitcast <16 x i1> %5 to i16
   6014   %res = call i32 @llvm.x86.avx512.kortestz.w(i16 %6, i16 %7)
   6015   ret i32 %res
   6016 }
   6017 
   6018 define i16 @test_cmpps(<16 x float> %a, <16 x float> %b) {
   6019 ; CHECK-LABEL: test_cmpps:
   6020 ; CHECK:       ## %bb.0:
   6021 ; CHECK-NEXT:    vcmpleps {sae}, %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0x7c,0x18,0xc2,0xc1,0x02]
   6022 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   6023 ; CHECK-NEXT:    ## kill: def $ax killed $ax killed $eax
   6024 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   6025 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6026   %res = call i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> %a, <16 x float> %b, i32 2, i16 -1, i32 8)
   6027   ret i16 %res
   6028 }
   6029 declare i16 @llvm.x86.avx512.mask.cmp.ps.512(<16 x float> , <16 x float> , i32, i16, i32)
   6030 
   6031 define i8 @test_cmppd(<8 x double> %a, <8 x double> %b) {
   6032 ; CHECK-LABEL: test_cmppd:
   6033 ; CHECK:       ## %bb.0:
   6034 ; CHECK-NEXT:    vcmpneqpd %zmm1, %zmm0, %k0 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xc1,0x04]
   6035 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   6036 ; CHECK-NEXT:    ## kill: def $al killed $al killed $eax
   6037 ; CHECK-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   6038 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6039   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> %a, <8 x double> %b, i32 4, i8 -1, i32 4)
   6040   ret i8 %res
   6041 }
   6042 declare i8 @llvm.x86.avx512.mask.cmp.pd.512(<8 x double> , <8 x double> , i32, i8, i32)
   6043 
   6044 define <8 x i64> @test_mul_epi32_rr(<16 x i32> %a, <16 x i32> %b) {
   6045 ; CHECK-LABEL: test_mul_epi32_rr:
   6046 ; CHECK:       ## %bb.0:
   6047 ; CHECK-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
   6048 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6049   %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6050   ret < 8 x i64> %res
   6051 }
   6052 
   6053 define <8 x i64> @test_mul_epi32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
   6054 ; X86-LABEL: test_mul_epi32_rrk:
   6055 ; X86:       ## %bb.0:
   6056 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6057 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6058 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
   6059 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   6060 ; X86-NEXT:    retl ## encoding: [0xc3]
   6061 ;
   6062 ; X64-LABEL: test_mul_epi32_rrk:
   6063 ; X64:       ## %bb.0:
   6064 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6065 ; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xd1]
   6066 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   6067 ; X64-NEXT:    retq ## encoding: [0xc3]
   6068   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6069   %mask.cast = bitcast i8 %mask to <8 x i1>
   6070   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6071   ret < 8 x i64> %res
   6072 }
   6073 
   6074 define <8 x i64> @test_mul_epi32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
   6075 ; X86-LABEL: test_mul_epi32_rrkz:
   6076 ; X86:       ## %bb.0:
   6077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6078 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6079 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   6080 ; X86-NEXT:    retl ## encoding: [0xc3]
   6081 ;
   6082 ; X64-LABEL: test_mul_epi32_rrkz:
   6083 ; X64:       ## %bb.0:
   6084 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6085 ; X64-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   6086 ; X64-NEXT:    retq ## encoding: [0xc3]
   6087   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6088   %mask.cast = bitcast i8 %mask to <8 x i1>
   6089   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6090   ret < 8 x i64> %res
   6091 }
   6092 
   6093 define <8 x i64> @test_mul_epi32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   6094 ; X86-LABEL: test_mul_epi32_rm:
   6095 ; X86:       ## %bb.0:
   6096 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6097 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x00]
   6098 ; X86-NEXT:    retl ## encoding: [0xc3]
   6099 ;
   6100 ; X64-LABEL: test_mul_epi32_rm:
   6101 ; X64:       ## %bb.0:
   6102 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0x07]
   6103 ; X64-NEXT:    retq ## encoding: [0xc3]
   6104   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6105   %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6106   ret < 8 x i64> %res
   6107 }
   6108 
   6109 define <8 x i64> @test_mul_epi32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   6110 ; X86-LABEL: test_mul_epi32_rmk:
   6111 ; X86:       ## %bb.0:
   6112 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6113 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   6114 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   6115 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x08]
   6116 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6117 ; X86-NEXT:    retl ## encoding: [0xc3]
   6118 ;
   6119 ; X64-LABEL: test_mul_epi32_rmk:
   6120 ; X64:       ## %bb.0:
   6121 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6122 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0x0f]
   6123 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6124 ; X64-NEXT:    retq ## encoding: [0xc3]
   6125   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6126   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6127   %mask.cast = bitcast i8 %mask to <8 x i1>
   6128   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6129   ret < 8 x i64> %res
   6130 }
   6131 
   6132 define <8 x i64> @test_mul_epi32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
   6133 ; X86-LABEL: test_mul_epi32_rmkz:
   6134 ; X86:       ## %bb.0:
   6135 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   6137 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   6138 ; X86-NEXT:    vpmuldq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x00]
   6139 ; X86-NEXT:    retl ## encoding: [0xc3]
   6140 ;
   6141 ; X64-LABEL: test_mul_epi32_rmkz:
   6142 ; X64:       ## %bb.0:
   6143 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6144 ; X64-NEXT:    vpmuldq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0x07]
   6145 ; X64-NEXT:    retq ## encoding: [0xc3]
   6146   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6147   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6148   %mask.cast = bitcast i8 %mask to <8 x i1>
   6149   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6150   ret < 8 x i64> %res
   6151 }
   6152 
   6153 define <8 x i64> @test_mul_epi32_rmb(<16 x i32> %a, i64* %ptr_b) {
   6154 ; X86-LABEL: test_mul_epi32_rmb:
   6155 ; X86:       ## %bb.0:
   6156 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6157 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   6158 ; X86-NEXT:    ## xmm1 = mem[0],zero
   6159 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   6160 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x28,0xc1]
   6161 ; X86-NEXT:    retl ## encoding: [0xc3]
   6162 ;
   6163 ; X64-LABEL: test_mul_epi32_rmb:
   6164 ; X64:       ## %bb.0:
   6165 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf2,0xfd,0x58,0x28,0x07]
   6166 ; X64-NEXT:    retq ## encoding: [0xc3]
   6167   %q = load i64, i64* %ptr_b
   6168   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6169   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6170   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6171   %res = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6172   ret < 8 x i64> %res
   6173 }
   6174 
   6175 define <8 x i64> @test_mul_epi32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   6176 ; X86-LABEL: test_mul_epi32_rmbk:
   6177 ; X86:       ## %bb.0:
   6178 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6179 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   6180 ; X86-NEXT:    ## xmm2 = mem[0],zero
   6181 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   6182 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6183 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6184 ; X86-NEXT:    vpmuldq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x28,0xca]
   6185 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6186 ; X86-NEXT:    retl ## encoding: [0xc3]
   6187 ;
   6188 ; X64-LABEL: test_mul_epi32_rmbk:
   6189 ; X64:       ## %bb.0:
   6190 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6191 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x59,0x28,0x0f]
   6192 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6193 ; X64-NEXT:    retq ## encoding: [0xc3]
   6194   %q = load i64, i64* %ptr_b
   6195   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6196   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6197   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6198   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6199   %mask.cast = bitcast i8 %mask to <8 x i1>
   6200   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6201   ret < 8 x i64> %res
   6202 }
   6203 
   6204 define <8 x i64> @test_mul_epi32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
   6205 ; X86-LABEL: test_mul_epi32_rmbkz:
   6206 ; X86:       ## %bb.0:
   6207 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6208 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   6209 ; X86-NEXT:    ## xmm1 = mem[0],zero
   6210 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   6211 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6212 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6213 ; X86-NEXT:    vpmuldq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x28,0xc1]
   6214 ; X86-NEXT:    retl ## encoding: [0xc3]
   6215 ;
   6216 ; X64-LABEL: test_mul_epi32_rmbkz:
   6217 ; X64:       ## %bb.0:
   6218 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6219 ; X64-NEXT:    vpmuldq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x28,0x07]
   6220 ; X64-NEXT:    retq ## encoding: [0xc3]
   6221   %q = load i64, i64* %ptr_b
   6222   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6223   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6224   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6225   %mul = call <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32> %a, <16 x i32> %b)
   6226   %mask.cast = bitcast i8 %mask to <8 x i1>
   6227   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6228   ret < 8 x i64> %res
   6229 }
   6230 
   6231 declare <8 x i64> @llvm.x86.avx512.pmul.dq.512(<16 x i32>, <16 x i32>)
   6232 
   6233 define <8 x i64> @test_mul_epu32_rr(<16 x i32> %a, <16 x i32> %b) {
   6234 ; CHECK-LABEL: test_mul_epu32_rr:
   6235 ; CHECK:       ## %bb.0:
   6236 ; CHECK-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
   6237 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6238   %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6239   ret < 8 x i64> %res
   6240 }
   6241 
   6242 define <8 x i64> @test_mul_epu32_rrk(<16 x i32> %a, <16 x i32> %b, <8 x i64> %passThru, i8 %mask) {
   6243 ; X86-LABEL: test_mul_epu32_rrk:
   6244 ; X86:       ## %bb.0:
   6245 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6246 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6247 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
   6248 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   6249 ; X86-NEXT:    retl ## encoding: [0xc3]
   6250 ;
   6251 ; X64-LABEL: test_mul_epu32_rrk:
   6252 ; X64:       ## %bb.0:
   6253 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6254 ; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xd1]
   6255 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   6256 ; X64-NEXT:    retq ## encoding: [0xc3]
   6257   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6258   %mask.cast = bitcast i8 %mask to <8 x i1>
   6259   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6260   ret < 8 x i64> %res
   6261 }
   6262 
   6263 define <8 x i64> @test_mul_epu32_rrkz(<16 x i32> %a, <16 x i32> %b, i8 %mask) {
   6264 ; X86-LABEL: test_mul_epu32_rrkz:
   6265 ; X86:       ## %bb.0:
   6266 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6267 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6268 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   6269 ; X86-NEXT:    retl ## encoding: [0xc3]
   6270 ;
   6271 ; X64-LABEL: test_mul_epu32_rrkz:
   6272 ; X64:       ## %bb.0:
   6273 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6274 ; X64-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   6275 ; X64-NEXT:    retq ## encoding: [0xc3]
   6276   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6277   %mask.cast = bitcast i8 %mask to <8 x i1>
   6278   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6279   ret < 8 x i64> %res
   6280 }
   6281 
   6282 define <8 x i64> @test_mul_epu32_rm(<16 x i32> %a, <16 x i32>* %ptr_b) {
   6283 ; X86-LABEL: test_mul_epu32_rm:
   6284 ; X86:       ## %bb.0:
   6285 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6286 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x00]
   6287 ; X86-NEXT:    retl ## encoding: [0xc3]
   6288 ;
   6289 ; X64-LABEL: test_mul_epu32_rm:
   6290 ; X64:       ## %bb.0:
   6291 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0x07]
   6292 ; X64-NEXT:    retq ## encoding: [0xc3]
   6293   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6294   %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6295   ret < 8 x i64> %res
   6296 }
   6297 
   6298 define <8 x i64> @test_mul_epu32_rmk(<16 x i32> %a, <16 x i32>* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   6299 ; X86-LABEL: test_mul_epu32_rmk:
   6300 ; X86:       ## %bb.0:
   6301 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6302 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   6303 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   6304 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x08]
   6305 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6306 ; X86-NEXT:    retl ## encoding: [0xc3]
   6307 ;
   6308 ; X64-LABEL: test_mul_epu32_rmk:
   6309 ; X64:       ## %bb.0:
   6310 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6311 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0x0f]
   6312 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6313 ; X64-NEXT:    retq ## encoding: [0xc3]
   6314   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6315   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6316   %mask.cast = bitcast i8 %mask to <8 x i1>
   6317   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6318   ret < 8 x i64> %res
   6319 }
   6320 
   6321 define <8 x i64> @test_mul_epu32_rmkz(<16 x i32> %a, <16 x i32>* %ptr_b, i8 %mask) {
   6322 ; X86-LABEL: test_mul_epu32_rmkz:
   6323 ; X86:       ## %bb.0:
   6324 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   6326 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   6327 ; X86-NEXT:    vpmuludq (%eax), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x00]
   6328 ; X86-NEXT:    retl ## encoding: [0xc3]
   6329 ;
   6330 ; X64-LABEL: test_mul_epu32_rmkz:
   6331 ; X64:       ## %bb.0:
   6332 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6333 ; X64-NEXT:    vpmuludq (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0x07]
   6334 ; X64-NEXT:    retq ## encoding: [0xc3]
   6335   %b = load <16 x i32>, <16 x i32>* %ptr_b
   6336   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6337   %mask.cast = bitcast i8 %mask to <8 x i1>
   6338   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6339   ret < 8 x i64> %res
   6340 }
   6341 
   6342 define <8 x i64> @test_mul_epu32_rmb(<16 x i32> %a, i64* %ptr_b) {
   6343 ; X86-LABEL: test_mul_epu32_rmb:
   6344 ; X86:       ## %bb.0:
   6345 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6346 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   6347 ; X86-NEXT:    ## xmm1 = mem[0],zero
   6348 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   6349 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xf4,0xc1]
   6350 ; X86-NEXT:    retl ## encoding: [0xc3]
   6351 ;
   6352 ; X64-LABEL: test_mul_epu32_rmb:
   6353 ; X64:       ## %bb.0:
   6354 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x58,0xf4,0x07]
   6355 ; X64-NEXT:    retq ## encoding: [0xc3]
   6356   %q = load i64, i64* %ptr_b
   6357   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6358   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6359   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6360   %res = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6361   ret < 8 x i64> %res
   6362 }
   6363 
   6364 define <8 x i64> @test_mul_epu32_rmbk(<16 x i32> %a, i64* %ptr_b, <8 x i64> %passThru, i8 %mask) {
   6365 ; X86-LABEL: test_mul_epu32_rmbk:
   6366 ; X86:       ## %bb.0:
   6367 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6368 ; X86-NEXT:    vmovq (%eax), %xmm2 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   6369 ; X86-NEXT:    ## xmm2 = mem[0],zero
   6370 ; X86-NEXT:    vpbroadcastq %xmm2, %zmm2 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xd2]
   6371 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6372 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6373 ; X86-NEXT:    vpmuludq %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0xf4,0xca]
   6374 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6375 ; X86-NEXT:    retl ## encoding: [0xc3]
   6376 ;
   6377 ; X64-LABEL: test_mul_epu32_rmbk:
   6378 ; X64:       ## %bb.0:
   6379 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6380 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x59,0xf4,0x0f]
   6381 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   6382 ; X64-NEXT:    retq ## encoding: [0xc3]
   6383   %q = load i64, i64* %ptr_b
   6384   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6385   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6386   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6387   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6388   %mask.cast = bitcast i8 %mask to <8 x i1>
   6389   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> %passThru
   6390   ret < 8 x i64> %res
   6391 }
   6392 
   6393 define <8 x i64> @test_mul_epu32_rmbkz(<16 x i32> %a, i64* %ptr_b, i8 %mask) {
   6394 ; X86-LABEL: test_mul_epu32_rmbkz:
   6395 ; X86:       ## %bb.0:
   6396 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6397 ; X86-NEXT:    vmovq (%eax), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   6398 ; X86-NEXT:    ## xmm1 = mem[0],zero
   6399 ; X86-NEXT:    vpbroadcastq %xmm1, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x59,0xc9]
   6400 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6401 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6402 ; X86-NEXT:    vpmuludq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0xf4,0xc1]
   6403 ; X86-NEXT:    retl ## encoding: [0xc3]
   6404 ;
   6405 ; X64-LABEL: test_mul_epu32_rmbkz:
   6406 ; X64:       ## %bb.0:
   6407 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6408 ; X64-NEXT:    vpmuludq (%rdi){1to8}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0xf4,0x07]
   6409 ; X64-NEXT:    retq ## encoding: [0xc3]
   6410   %q = load i64, i64* %ptr_b
   6411   %vecinit.i = insertelement <8 x i64> undef, i64 %q, i32 0
   6412   %b64 = shufflevector <8 x i64> %vecinit.i, <8 x i64> undef, <8 x i32> zeroinitializer
   6413   %b = bitcast <8 x i64> %b64 to <16 x i32>
   6414   %mul = call <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32> %a, <16 x i32> %b)
   6415   %mask.cast = bitcast i8 %mask to <8 x i1>
   6416   %res = select <8 x i1> %mask.cast, <8 x i64> %mul, <8 x i64> zeroinitializer
   6417   ret < 8 x i64> %res
   6418 }
   6419 
   6420 declare <8 x i64> @llvm.x86.avx512.pmulu.dq.512(<16 x i32>, <16 x i32>)
   6421 
   6422 define <2 x double> @test_x86_avx512_mm_cvtu32_sd(<2 x double> %a, i32 %b)
   6423 ; X86-LABEL: test_x86_avx512_mm_cvtu32_sd:
   6424 ; X86:       ## %bb.0:
   6425 ; X86-NEXT:    vcvtusi2sdl {{[0-9]+}}(%esp), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0x44,0x24,0x01]
   6426 ; X86-NEXT:    retl ## encoding: [0xc3]
   6427 ;
   6428 ; X64-LABEL: test_x86_avx512_mm_cvtu32_sd:
   6429 ; X64:       ## %bb.0:
   6430 ; X64-NEXT:    vcvtusi2sdl %edi, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7f,0x08,0x7b,0xc7]
   6431 ; X64-NEXT:    retq ## encoding: [0xc3]
   6432 {
   6433   %res = call <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double> %a, i32 %b) ; <<<2 x double>> [#uses=1]
   6434   ret <2 x double> %res
   6435 }
   6436 declare <2 x double> @llvm.x86.avx512.cvtusi2sd(<2 x double>, i32) nounwind readnone
   6437 
   6438 define <16 x float> @test_x86_vbroadcast_ss_512(i8* %a0) {
   6439 ; X86-LABEL: test_x86_vbroadcast_ss_512:
   6440 ; X86:       ## %bb.0:
   6441 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6442 ; X86-NEXT:    vbroadcastss (%eax), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x00]
   6443 ; X86-NEXT:    retl ## encoding: [0xc3]
   6444 ;
   6445 ; X64-LABEL: test_x86_vbroadcast_ss_512:
   6446 ; X64:       ## %bb.0:
   6447 ; X64-NEXT:    vbroadcastss (%rdi), %zmm0 ## encoding: [0x62,0xf2,0x7d,0x48,0x18,0x07]
   6448 ; X64-NEXT:    retq ## encoding: [0xc3]
   6449   %res = call <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8* %a0) ; <<16 x float>> [#uses=1]
   6450   ret <16 x float> %res
   6451 }
   6452 declare <16 x float> @llvm.x86.avx512.vbroadcast.ss.512(i8*) nounwind readonly
   6453 
   6454 define <8 x double> @test_x86_vbroadcast_sd_512(i8* %a0) {
   6455 ; X86-LABEL: test_x86_vbroadcast_sd_512:
   6456 ; X86:       ## %bb.0:
   6457 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6458 ; X86-NEXT:    vbroadcastsd (%eax), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x00]
   6459 ; X86-NEXT:    retl ## encoding: [0xc3]
   6460 ;
   6461 ; X64-LABEL: test_x86_vbroadcast_sd_512:
   6462 ; X64:       ## %bb.0:
   6463 ; X64-NEXT:    vbroadcastsd (%rdi), %zmm0 ## encoding: [0x62,0xf2,0xfd,0x48,0x19,0x07]
   6464 ; X64-NEXT:    retq ## encoding: [0xc3]
   6465   %res = call <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8* %a0) ; <<8 x double>> [#uses=1]
   6466   ret <8 x double> %res
   6467 }
   6468 declare <8 x double> @llvm.x86.avx512.vbroadcast.sd.512(i8*) nounwind readonly
   6469 
   6470 declare <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double>, <8 x i64>, <8 x double>, i8)
   6471 
   6472 define <8 x double>@test_int_x86_avx512_mask_permvar_df_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
   6473 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_512:
   6474 ; X86:       ## %bb.0:
   6475 ; X86-NEXT:    vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8]
   6476 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6477 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6478 ; X86-NEXT:    vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
   6479 ; X86-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
   6480 ; X86-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   6481 ; X86-NEXT:    vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
   6482 ; X86-NEXT:    retl ## encoding: [0xc3]
   6483 ;
   6484 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_512:
   6485 ; X64:       ## %bb.0:
   6486 ; X64-NEXT:    vpermpd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x16,0xd8]
   6487 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6488 ; X64-NEXT:    vpermpd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x16,0xd0]
   6489 ; X64-NEXT:    vpermpd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x16,0xc0]
   6490 ; X64-NEXT:    vaddpd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc0]
   6491 ; X64-NEXT:    vaddpd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x58,0xc3]
   6492 ; X64-NEXT:    retq ## encoding: [0xc3]
   6493   %res = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
   6494   %res1 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> zeroinitializer, i8 %x3)
   6495   %res2 = call <8 x double> @llvm.x86.avx512.mask.permvar.df.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
   6496   %res3 = fadd <8 x double> %res, %res1
   6497   %res4 = fadd <8 x double> %res3, %res2
   6498   ret <8 x double> %res4
   6499 }
   6500 
   6501 declare <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   6502 
   6503 define <8 x i64>@test_int_x86_avx512_mask_permvar_di_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   6504 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_512:
   6505 ; X86:       ## %bb.0:
   6506 ; X86-NEXT:    vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8]
   6507 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6508 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6509 ; X86-NEXT:    vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
   6510 ; X86-NEXT:    vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
   6511 ; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6512 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   6513 ; X86-NEXT:    retl ## encoding: [0xc3]
   6514 ;
   6515 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_512:
   6516 ; X64:       ## %bb.0:
   6517 ; X64-NEXT:    vpermq %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x36,0xd8]
   6518 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6519 ; X64-NEXT:    vpermq %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x36,0xd0]
   6520 ; X64-NEXT:    vpermq %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xc9,0x36,0xc0]
   6521 ; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6522 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   6523 ; X64-NEXT:    retq ## encoding: [0xc3]
   6524   %res = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   6525   %res1 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
   6526   %res2 = call <8 x i64> @llvm.x86.avx512.mask.permvar.di.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   6527   %res3 = add <8 x i64> %res, %res1
   6528   %res4 = add <8 x i64> %res3, %res2
   6529   ret <8 x i64> %res4
   6530 }
   6531 
   6532 declare <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float>, <16 x i32>, <16 x float>, i16)
   6533 
   6534 define <16 x float>@test_int_x86_avx512_mask_permvar_sf_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
   6535 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
   6536 ; X86:       ## %bb.0:
   6537 ; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8]
   6538 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6539 ; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
   6540 ; X86-NEXT:    vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
   6541 ; X86-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   6542 ; X86-NEXT:    vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3]
   6543 ; X86-NEXT:    retl ## encoding: [0xc3]
   6544 ;
   6545 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_512:
   6546 ; X64:       ## %bb.0:
   6547 ; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x16,0xd8]
   6548 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6549 ; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x16,0xd0]
   6550 ; X64-NEXT:    vpermps %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x16,0xc0]
   6551 ; X64-NEXT:    vaddps %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6c,0x48,0x58,0xc0]
   6552 ; X64-NEXT:    vaddps %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc3]
   6553 ; X64-NEXT:    retq ## encoding: [0xc3]
   6554   %res = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
   6555   %res1 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> zeroinitializer, i16 %x3)
   6556   %res2 = call <16 x float> @llvm.x86.avx512.mask.permvar.sf.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
   6557   %res3 = fadd <16 x float> %res, %res1
   6558   %res4 = fadd <16 x float> %res3, %res2
   6559   ret <16 x float> %res4
   6560 }
   6561 
   6562 declare <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   6563 
   6564 define <16 x i32>@test_int_x86_avx512_mask_permvar_si_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   6565 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_512:
   6566 ; X86:       ## %bb.0:
   6567 ; X86-NEXT:    vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8]
   6568 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6569 ; X86-NEXT:    vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
   6570 ; X86-NEXT:    vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
   6571 ; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6572 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   6573 ; X86-NEXT:    retl ## encoding: [0xc3]
   6574 ;
   6575 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_512:
   6576 ; X64:       ## %bb.0:
   6577 ; X64-NEXT:    vpermd %zmm0, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x36,0xd8]
   6578 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6579 ; X64-NEXT:    vpermd %zmm0, %zmm1, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x36,0xd0]
   6580 ; X64-NEXT:    vpermd %zmm0, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xc9,0x36,0xc0]
   6581 ; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6582 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   6583 ; X64-NEXT:    retq ## encoding: [0xc3]
   6584   %res = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   6585   %res1 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
   6586   %res2 = call <16 x i32> @llvm.x86.avx512.mask.permvar.si.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   6587   %res3 = add <16 x i32> %res, %res1
   6588   %res4 = add <16 x i32> %res3, %res2
   6589   ret <16 x i32> %res4
   6590 }
   6591 
   6592 declare <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
   6593 
   6594 define <16 x i32>@test_int_x86_avx512_mask_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
   6595 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
   6596 ; X86:       ## %bb.0:
   6597 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6598 ; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
   6599 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6600 ; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
   6601 ; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6602 ; X86-NEXT:    retl ## encoding: [0xc3]
   6603 ;
   6604 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_512:
   6605 ; X64:       ## %bb.0:
   6606 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6607 ; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
   6608 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6609 ; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0x75,0x49,0x25,0xc2,0x21]
   6610 ; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6611 ; X64-NEXT:    retq ## encoding: [0xc3]
   6612   %res = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
   6613   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
   6614   %res2 = add <16 x i32> %res, %res1
   6615   ret <16 x i32> %res2
   6616 }
   6617 
   6618 declare <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i32, i16)
   6619 
   6620 define <16 x i32>@test_int_x86_avx512_maskz_pternlog_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x4) {
   6621 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
   6622 ; X86:       ## %bb.0:
   6623 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6624 ; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
   6625 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6626 ; X86-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
   6627 ; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6628 ; X86-NEXT:    retl ## encoding: [0xc3]
   6629 ;
   6630 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_512:
   6631 ; X64:       ## %bb.0:
   6632 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6633 ; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0x75,0x48,0x25,0xda,0x21]
   6634 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6635 ; X64-NEXT:    vpternlogd $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x75,0xc9,0x25,0xc2,0x21]
   6636 ; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   6637 ; X64-NEXT:    retq ## encoding: [0xc3]
   6638   %res = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 %x4)
   6639   %res1 = call <16 x i32> @llvm.x86.avx512.maskz.pternlog.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i32 33, i16 -1)
   6640   %res2 = add <16 x i32> %res, %res1
   6641   ret <16 x i32> %res2
   6642 }
   6643 
   6644 declare <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
   6645 
   6646 define <8 x i64>@test_int_x86_avx512_mask_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
   6647 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
   6648 ; X86:       ## %bb.0:
   6649 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6650 ; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
   6651 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6652 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6653 ; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
   6654 ; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6655 ; X86-NEXT:    retl ## encoding: [0xc3]
   6656 ;
   6657 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_512:
   6658 ; X64:       ## %bb.0:
   6659 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6660 ; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
   6661 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6662 ; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf3,0xf5,0x49,0x25,0xc2,0x21]
   6663 ; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6664 ; X64-NEXT:    retq ## encoding: [0xc3]
   6665   %res = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
   6666   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
   6667   %res2 = add <8 x i64> %res, %res1
   6668   ret <8 x i64> %res2
   6669 }
   6670 
   6671 declare <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i32, i8)
   6672 
   6673 define <8 x i64>@test_int_x86_avx512_maskz_pternlog_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x4) {
   6674 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
   6675 ; X86:       ## %bb.0:
   6676 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6677 ; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
   6678 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6679 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6680 ; X86-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
   6681 ; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6682 ; X86-NEXT:    retl ## encoding: [0xc3]
   6683 ;
   6684 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_512:
   6685 ; X64:       ## %bb.0:
   6686 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6687 ; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf3,0xf5,0x48,0x25,0xda,0x21]
   6688 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6689 ; X64-NEXT:    vpternlogq $33, %zmm2, %zmm1, %zmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xf5,0xc9,0x25,0xc2,0x21]
   6690 ; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   6691 ; X64-NEXT:    retq ## encoding: [0xc3]
   6692   %res = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 %x4)
   6693   %res1 = call <8 x i64> @llvm.x86.avx512.maskz.pternlog.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i32 33, i8 -1)
   6694   %res2 = add <8 x i64> %res, %res1
   6695   ret <8 x i64> %res2
   6696 }
   6697 
   6698 declare <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   6699 
   6700 define <16 x i32>@test_int_x86_avx512_mask_vpermi2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, <16 x i32> %x4, i16 %x3) {
   6701 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
   6702 ; X86:       ## %bb.0:
   6703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6704 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   6705 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6706 ; X86-NEXT:    vpermi2d (%eax), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x18]
   6707 ; X86-NEXT:    vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
   6708 ; X86-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
   6709 ; X86-NEXT:    retl ## encoding: [0xc3]
   6710 ;
   6711 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_512:
   6712 ; X64:       ## %bb.0:
   6713 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6714 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6715 ; X64-NEXT:    vpermi2d (%rdi), %zmm0, %zmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x76,0x1f]
   6716 ; X64-NEXT:    vpermt2d %zmm2, %zmm1, %zmm0 ## encoding: [0x62,0xf2,0x75,0x48,0x7e,0xc2]
   6717 ; X64-NEXT:    vpaddd %zmm0, %zmm3, %zmm0 ## encoding: [0x62,0xf1,0x65,0x48,0xfe,0xc0]
   6718 ; X64-NEXT:    retq ## encoding: [0xc3]
   6719   %x2 = load <16 x i32>, <16 x i32>* %x2p
   6720   %res = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   6721   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermi2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x4, i16 -1)
   6722   %res2 = add <16 x i32> %res, %res1
   6723   ret <16 x i32> %res2
   6724 }
   6725 
   6726 declare <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double>, <8 x i64>, <8 x double>, i8)
   6727 
   6728 define <8 x double>@test_int_x86_avx512_mask_vpermi2var_pd_512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3) {
   6729 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
   6730 ; X86:       ## %bb.0:
   6731 ; X86-NEXT:    vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8]
   6732 ; X86-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda]
   6733 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6734 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6735 ; X86-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
   6736 ; X86-NEXT:    vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3]
   6737 ; X86-NEXT:    retl ## encoding: [0xc3]
   6738 ;
   6739 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_512:
   6740 ; X64:       ## %bb.0:
   6741 ; X64-NEXT:    vmovapd %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd8]
   6742 ; X64-NEXT:    vpermt2pd %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7f,0xda]
   6743 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6744 ; X64-NEXT:    vpermi2pd %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x77,0xca]
   6745 ; X64-NEXT:    vaddpd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0x58,0xc3]
   6746 ; X64-NEXT:    retq ## encoding: [0xc3]
   6747   %res = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 %x3)
   6748   %res1 = call <8 x double> @llvm.x86.avx512.mask.vpermi2var.pd.512(<8 x double> %x0, <8 x i64> %x1, <8 x double> %x2, i8 -1)
   6749   %res2 = fadd <8 x double> %res, %res1
   6750   ret <8 x double> %res2
   6751 }
   6752 
   6753 declare <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float>, <16 x i32>, <16 x float>, i16)
   6754 
   6755 define <16 x float>@test_int_x86_avx512_mask_vpermi2var_ps_512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3) {
   6756 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
   6757 ; X86:       ## %bb.0:
   6758 ; X86-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   6759 ; X86-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda]
   6760 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6761 ; X86-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
   6762 ; X86-NEXT:    vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
   6763 ; X86-NEXT:    retl ## encoding: [0xc3]
   6764 ;
   6765 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_512:
   6766 ; X64:       ## %bb.0:
   6767 ; X64-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   6768 ; X64-NEXT:    vpermt2ps %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0x75,0x48,0x7f,0xda]
   6769 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6770 ; X64-NEXT:    vpermi2ps %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x77,0xca]
   6771 ; X64-NEXT:    vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
   6772 ; X64-NEXT:    retq ## encoding: [0xc3]
   6773   %res = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 %x3)
   6774   %res1 = call <16 x float> @llvm.x86.avx512.mask.vpermi2var.ps.512(<16 x float> %x0, <16 x i32> %x1, <16 x float> %x2, i16 -1)
   6775   %res2 = fadd <16 x float> %res, %res1
   6776   ret <16 x float> %res2
   6777 }
   6778 
   6779 declare <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   6780 
   6781 define <8 x i64>@test_int_x86_avx512_mask_vpermi2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   6782 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
   6783 ; X86:       ## %bb.0:
   6784 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6785 ; X86-NEXT:    vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda]
   6786 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6787 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6788 ; X86-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
   6789 ; X86-NEXT:    vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
   6790 ; X86-NEXT:    retl ## encoding: [0xc3]
   6791 ;
   6792 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_512:
   6793 ; X64:       ## %bb.0:
   6794 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   6795 ; X64-NEXT:    vpermt2q %zmm2, %zmm1, %zmm3 ## encoding: [0x62,0xf2,0xf5,0x48,0x7e,0xda]
   6796 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6797 ; X64-NEXT:    vpermi2q %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x76,0xca]
   6798 ; X64-NEXT:    vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
   6799 ; X64-NEXT:    retq ## encoding: [0xc3]
   6800   %res = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   6801   %res1 = call <8 x i64> @llvm.x86.avx512.mask.vpermi2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   6802   %res2 = add <8 x i64> %res, %res1
   6803   ret <8 x i64> %res2
   6804 }
   6805 
   6806 declare <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   6807 
   6808 define <16 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32>* %x2p, i16 %x3) {
   6809 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
   6810 ; X86:       ## %bb.0:
   6811 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6812 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   6813 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1]
   6814 ; X86-NEXT:    vpermt2d (%eax), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x10]
   6815 ; X86-NEXT:    vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9]
   6816 ; X86-NEXT:    vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1]
   6817 ; X86-NEXT:    retl ## encoding: [0xc3]
   6818 ;
   6819 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_512:
   6820 ; X64:       ## %bb.0:
   6821 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6822 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd1]
   6823 ; X64-NEXT:    vpermt2d (%rdi), %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7e,0x17]
   6824 ; X64-NEXT:    vpermt2d %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xc9]
   6825 ; X64-NEXT:    vpaddd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc1]
   6826 ; X64-NEXT:    retq ## encoding: [0xc3]
   6827   %x2 = load <16 x i32>, <16 x i32>* %x2p
   6828   %res = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   6829   %res1 = call <16 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x1, i16 -1)
   6830   %res2 = add <16 x i32> %res, %res1
   6831   ret <16 x i32> %res2
   6832 }
   6833 
   6834 declare <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64>, <8 x double>, <8 x double>, i8)
   6835 
   6836 define <8 x double>@test_int_x86_avx512_maskz_vpermt2var_pd_512(<8 x i64> %x0, <8 x double> %x1, double* %x2ptr, i8 %x3) {
   6837 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
   6838 ; X86:       ## %bb.0:
   6839 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   6840 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   6841 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   6842 ; X86-NEXT:    vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1]
   6843 ; X86-NEXT:    vpermt2pd (%eax){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x10]
   6844 ; X86-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9]
   6845 ; X86-NEXT:    vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1]
   6846 ; X86-NEXT:    retl ## encoding: [0xc3]
   6847 ;
   6848 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_pd_512:
   6849 ; X64:       ## %bb.0:
   6850 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6851 ; X64-NEXT:    vmovapd %zmm1, %zmm2 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xd1]
   6852 ; X64-NEXT:    vpermt2pd (%rdi){1to8}, %zmm0, %zmm2 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xd9,0x7f,0x17]
   6853 ; X64-NEXT:    vpermt2pd %zmm1, %zmm0, %zmm1 ## encoding: [0x62,0xf2,0xfd,0x48,0x7f,0xc9]
   6854 ; X64-NEXT:    vaddpd %zmm1, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0x58,0xc1]
   6855 ; X64-NEXT:    retq ## encoding: [0xc3]
   6856   %x2s = load double, double* %x2ptr
   6857   %x2ins = insertelement <8 x double> undef, double %x2s, i32 0
   6858   %x2 = shufflevector <8 x double> %x2ins, <8 x double> undef, <8 x i32> zeroinitializer
   6859   %res = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x2, i8 %x3)
   6860   %res1 = call <8 x double> @llvm.x86.avx512.maskz.vpermt2var.pd.512(<8 x i64> %x0, <8 x double> %x1, <8 x double> %x1, i8 -1)
   6861   %res2 = fadd <8 x double> %res, %res1
   6862   ret <8 x double> %res2
   6863 }
   6864 
   6865 declare <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32>, <16 x float>, <16 x float>, i16)
   6866 
   6867 define <16 x float>@test_int_x86_avx512_maskz_vpermt2var_ps_512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3) {
   6868 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
   6869 ; X86:       ## %bb.0:
   6870 ; X86-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   6871 ; X86-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda]
   6872 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6873 ; X86-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca]
   6874 ; X86-NEXT:    vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
   6875 ; X86-NEXT:    retl ## encoding: [0xc3]
   6876 ;
   6877 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_ps_512:
   6878 ; X64:       ## %bb.0:
   6879 ; X64-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   6880 ; X64-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7f,0xda]
   6881 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6882 ; X64-NEXT:    vpermt2ps %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x7f,0xca]
   6883 ; X64-NEXT:    vaddps %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x74,0x48,0x58,0xc3]
   6884 ; X64-NEXT:    retq ## encoding: [0xc3]
   6885   %res = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 %x3)
   6886   %res1 = call <16 x float> @llvm.x86.avx512.maskz.vpermt2var.ps.512(<16 x i32> %x0, <16 x float> %x1, <16 x float> %x2, i16 -1)
   6887   %res2 = fadd <16 x float> %res, %res1
   6888   ret <16 x float> %res2
   6889 }
   6890 
   6891 
   6892 declare <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   6893 
   6894 define <8 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   6895 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
   6896 ; X86:       ## %bb.0:
   6897 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6898 ; X86-NEXT:    vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda]
   6899 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6900 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   6901 ; X86-NEXT:    vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca]
   6902 ; X86-NEXT:    vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
   6903 ; X86-NEXT:    retl ## encoding: [0xc3]
   6904 ;
   6905 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_512:
   6906 ; X64:       ## %bb.0:
   6907 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6908 ; X64-NEXT:    vpermt2q %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x7e,0xda]
   6909 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6910 ; X64-NEXT:    vpermt2q %zmm2, %zmm0, %zmm1 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x7e,0xca]
   6911 ; X64-NEXT:    vpaddq %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc3]
   6912 ; X64-NEXT:    retq ## encoding: [0xc3]
   6913   %res = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   6914   %res1 = call <8 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   6915   %res2 = add <8 x i64> %res, %res1
   6916   ret <8 x i64> %res2
   6917 }
   6918 
   6919 declare <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   6920 
   6921 define <16 x i32>@test_int_x86_avx512_mask_vpermt2var_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   6922 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
   6923 ; X86:       ## %bb.0:
   6924 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6925 ; X86-NEXT:    vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda]
   6926 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6927 ; X86-NEXT:    vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
   6928 ; X86-NEXT:    vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3]
   6929 ; X86-NEXT:    retl ## encoding: [0xc3]
   6930 ;
   6931 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_512:
   6932 ; X64:       ## %bb.0:
   6933 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 ## encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   6934 ; X64-NEXT:    vpermt2d %zmm2, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x7e,0xda]
   6935 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   6936 ; X64-NEXT:    vpermt2d %zmm2, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x7e,0xca]
   6937 ; X64-NEXT:    vpaddd %zmm3, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc3]
   6938 ; X64-NEXT:    retq ## encoding: [0xc3]
   6939   %res = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   6940   %res1 = call <16 x i32> @llvm.x86.avx512.mask.vpermt2var.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   6941   %res2 = add <16 x i32> %res, %res1
   6942   ret <16 x i32> %res2
   6943 }
   6944 
   6945 declare <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   6946 declare <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   6947 declare <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double>, <8 x double>, <8 x double>, i8, i32)
   6948 
   6949 define <16 x float> @test_vsubps_rn(<16 x float> %a0, <16 x float> %a1) {
   6950 ; CHECK-LABEL: test_vsubps_rn:
   6951 ; CHECK:       ## %bb.0:
   6952 ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
   6953 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6954   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
   6955                     <16 x float> zeroinitializer, i16 -1, i32 0)
   6956   ret <16 x float> %res
   6957 }
   6958 
   6959 define <16 x float> @test_vsubps_rd(<16 x float> %a0, <16 x float> %a1) {
   6960 ; CHECK-LABEL: test_vsubps_rd:
   6961 ; CHECK:       ## %bb.0:
   6962 ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
   6963 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6964   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
   6965                     <16 x float> zeroinitializer, i16 -1, i32 1)
   6966   ret <16 x float> %res
   6967 }
   6968 
   6969 define <16 x float> @test_vsubps_ru(<16 x float> %a0, <16 x float> %a1) {
   6970 ; CHECK-LABEL: test_vsubps_ru:
   6971 ; CHECK:       ## %bb.0:
   6972 ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
   6973 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6974   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
   6975                     <16 x float> zeroinitializer, i16 -1, i32 2)
   6976   ret <16 x float> %res
   6977 }
   6978 
   6979 define <16 x float> @test_vsubps_rz(<16 x float> %a0, <16 x float> %a1) {
   6980 ; CHECK-LABEL: test_vsubps_rz:
   6981 ; CHECK:       ## %bb.0:
   6982 ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
   6983 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6984   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1,
   6985                     <16 x float> zeroinitializer, i16 -1, i32 3)
   6986   ret <16 x float> %res
   6987 }
   6988 
   6989 define <16 x float> @test_vmulps_rn(<16 x float> %a0, <16 x float> %a1) {
   6990 ; CHECK-LABEL: test_vmulps_rn:
   6991 ; CHECK:       ## %bb.0:
   6992 ; CHECK-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x59,0xc1]
   6993 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   6994   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   6995                     <16 x float> zeroinitializer, i16 -1, i32 0)
   6996   ret <16 x float> %res
   6997 }
   6998 
   6999 define <16 x float> @test_vmulps_rd(<16 x float> %a0, <16 x float> %a1) {
   7000 ; CHECK-LABEL: test_vmulps_rd:
   7001 ; CHECK:       ## %bb.0:
   7002 ; CHECK-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x59,0xc1]
   7003 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7004   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7005                     <16 x float> zeroinitializer, i16 -1, i32 1)
   7006   ret <16 x float> %res
   7007 }
   7008 
   7009 define <16 x float> @test_vmulps_ru(<16 x float> %a0, <16 x float> %a1) {
   7010 ; CHECK-LABEL: test_vmulps_ru:
   7011 ; CHECK:       ## %bb.0:
   7012 ; CHECK-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x59,0xc1]
   7013 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7014   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7015                     <16 x float> zeroinitializer, i16 -1, i32 2)
   7016   ret <16 x float> %res
   7017 }
   7018 
   7019 define <16 x float> @test_vmulps_rz(<16 x float> %a0, <16 x float> %a1) {
   7020 ; CHECK-LABEL: test_vmulps_rz:
   7021 ; CHECK:       ## %bb.0:
   7022 ; CHECK-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x59,0xc1]
   7023 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7024   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7025                     <16 x float> zeroinitializer, i16 -1, i32 3)
   7026   ret <16 x float> %res
   7027 }
   7028 
   7029 ;; mask float
   7030 define <16 x float> @test_vmulps_mask_rn(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7031 ; X86-LABEL: test_vmulps_mask_rn:
   7032 ; X86:       ## %bb.0:
   7033 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7034 ; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
   7035 ; X86-NEXT:    retl ## encoding: [0xc3]
   7036 ;
   7037 ; X64-LABEL: test_vmulps_mask_rn:
   7038 ; X64:       ## %bb.0:
   7039 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7040 ; X64-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x59,0xc1]
   7041 ; X64-NEXT:    retq ## encoding: [0xc3]
   7042   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7043                     <16 x float> zeroinitializer, i16 %mask, i32 0)
   7044   ret <16 x float> %res
   7045 }
   7046 
   7047 define <16 x float> @test_vmulps_mask_rd(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7048 ; X86-LABEL: test_vmulps_mask_rd:
   7049 ; X86:       ## %bb.0:
   7050 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7051 ; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
   7052 ; X86-NEXT:    retl ## encoding: [0xc3]
   7053 ;
   7054 ; X64-LABEL: test_vmulps_mask_rd:
   7055 ; X64:       ## %bb.0:
   7056 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7057 ; X64-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x59,0xc1]
   7058 ; X64-NEXT:    retq ## encoding: [0xc3]
   7059   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7060                     <16 x float> zeroinitializer, i16 %mask, i32 1)
   7061   ret <16 x float> %res
   7062 }
   7063 
   7064 define <16 x float> @test_vmulps_mask_ru(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7065 ; X86-LABEL: test_vmulps_mask_ru:
   7066 ; X86:       ## %bb.0:
   7067 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7068 ; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
   7069 ; X86-NEXT:    retl ## encoding: [0xc3]
   7070 ;
   7071 ; X64-LABEL: test_vmulps_mask_ru:
   7072 ; X64:       ## %bb.0:
   7073 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7074 ; X64-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x59,0xc1]
   7075 ; X64-NEXT:    retq ## encoding: [0xc3]
   7076   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7077                     <16 x float> zeroinitializer, i16 %mask, i32 2)
   7078   ret <16 x float> %res
   7079 }
   7080 
   7081 define <16 x float> @test_vmulps_mask_rz(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7082 ; X86-LABEL: test_vmulps_mask_rz:
   7083 ; X86:       ## %bb.0:
   7084 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7085 ; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
   7086 ; X86-NEXT:    retl ## encoding: [0xc3]
   7087 ;
   7088 ; X64-LABEL: test_vmulps_mask_rz:
   7089 ; X64:       ## %bb.0:
   7090 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7091 ; X64-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x59,0xc1]
   7092 ; X64-NEXT:    retq ## encoding: [0xc3]
   7093   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7094                     <16 x float> zeroinitializer, i16 %mask, i32 3)
   7095   ret <16 x float> %res
   7096 }
   7097 
   7098 ;; With Passthru value
   7099 define <16 x float> @test_vmulps_mask_passthru_rn(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
   7100 ; X86-LABEL: test_vmulps_mask_passthru_rn:
   7101 ; X86:       ## %bb.0:
   7102 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7103 ; X86-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
   7104 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7105 ; X86-NEXT:    retl ## encoding: [0xc3]
   7106 ;
   7107 ; X64-LABEL: test_vmulps_mask_passthru_rn:
   7108 ; X64:       ## %bb.0:
   7109 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7110 ; X64-NEXT:    vmulps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x59,0xd1]
   7111 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7112 ; X64-NEXT:    retq ## encoding: [0xc3]
   7113   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7114                     <16 x float> %passthru, i16 %mask, i32 0)
   7115   ret <16 x float> %res
   7116 }
   7117 
   7118 define <16 x float> @test_vmulps_mask_passthru_rd(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
   7119 ; X86-LABEL: test_vmulps_mask_passthru_rd:
   7120 ; X86:       ## %bb.0:
   7121 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7122 ; X86-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
   7123 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7124 ; X86-NEXT:    retl ## encoding: [0xc3]
   7125 ;
   7126 ; X64-LABEL: test_vmulps_mask_passthru_rd:
   7127 ; X64:       ## %bb.0:
   7128 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7129 ; X64-NEXT:    vmulps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x59,0xd1]
   7130 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7131 ; X64-NEXT:    retq ## encoding: [0xc3]
   7132   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7133                     <16 x float> %passthru, i16 %mask, i32 1)
   7134   ret <16 x float> %res
   7135 }
   7136 
   7137 define <16 x float> @test_vmulps_mask_passthru_ru(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
   7138 ; X86-LABEL: test_vmulps_mask_passthru_ru:
   7139 ; X86:       ## %bb.0:
   7140 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7141 ; X86-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
   7142 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7143 ; X86-NEXT:    retl ## encoding: [0xc3]
   7144 ;
   7145 ; X64-LABEL: test_vmulps_mask_passthru_ru:
   7146 ; X64:       ## %bb.0:
   7147 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7148 ; X64-NEXT:    vmulps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x59,0xd1]
   7149 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7150 ; X64-NEXT:    retq ## encoding: [0xc3]
   7151   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7152                     <16 x float> %passthru, i16 %mask, i32 2)
   7153   ret <16 x float> %res
   7154 }
   7155 
   7156 define <16 x float> @test_vmulps_mask_passthru_rz(<16 x float> %a0, <16 x float> %a1, <16 x float> %passthru, i16 %mask) {
   7157 ; X86-LABEL: test_vmulps_mask_passthru_rz:
   7158 ; X86:       ## %bb.0:
   7159 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7160 ; X86-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
   7161 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7162 ; X86-NEXT:    retl ## encoding: [0xc3]
   7163 ;
   7164 ; X64-LABEL: test_vmulps_mask_passthru_rz:
   7165 ; X64:       ## %bb.0:
   7166 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7167 ; X64-NEXT:    vmulps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x59,0xd1]
   7168 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7169 ; X64-NEXT:    retq ## encoding: [0xc3]
   7170   %res = call <16 x float> @llvm.x86.avx512.mask.mul.ps.512(<16 x float> %a0, <16 x float> %a1,
   7171                     <16 x float> %passthru, i16 %mask, i32 3)
   7172   ret <16 x float> %res
   7173 }
   7174 
   7175 ;; mask double
   7176 define <8 x double> @test_vmulpd_mask_rn(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
   7177 ; X86-LABEL: test_vmulpd_mask_rn:
   7178 ; X86:       ## %bb.0:
   7179 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7180 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   7181 ; X86-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
   7182 ; X86-NEXT:    retl ## encoding: [0xc3]
   7183 ;
   7184 ; X64-LABEL: test_vmulpd_mask_rn:
   7185 ; X64:       ## %bb.0:
   7186 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7187 ; X64-NEXT:    vmulpd {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0x59,0xc1]
   7188 ; X64-NEXT:    retq ## encoding: [0xc3]
   7189   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
   7190                     <8 x double> zeroinitializer, i8 %mask, i32 0)
   7191   ret <8 x double> %res
   7192 }
   7193 
   7194 define <8 x double> @test_vmulpd_mask_rd(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
   7195 ; X86-LABEL: test_vmulpd_mask_rd:
   7196 ; X86:       ## %bb.0:
   7197 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7198 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   7199 ; X86-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
   7200 ; X86-NEXT:    retl ## encoding: [0xc3]
   7201 ;
   7202 ; X64-LABEL: test_vmulpd_mask_rd:
   7203 ; X64:       ## %bb.0:
   7204 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7205 ; X64-NEXT:    vmulpd {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0x59,0xc1]
   7206 ; X64-NEXT:    retq ## encoding: [0xc3]
   7207   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
   7208                     <8 x double> zeroinitializer, i8 %mask, i32 1)
   7209   ret <8 x double> %res
   7210 }
   7211 
   7212 define <8 x double> @test_vmulpd_mask_ru(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
   7213 ; X86-LABEL: test_vmulpd_mask_ru:
   7214 ; X86:       ## %bb.0:
   7215 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7216 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   7217 ; X86-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
   7218 ; X86-NEXT:    retl ## encoding: [0xc3]
   7219 ;
   7220 ; X64-LABEL: test_vmulpd_mask_ru:
   7221 ; X64:       ## %bb.0:
   7222 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7223 ; X64-NEXT:    vmulpd {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xd9,0x59,0xc1]
   7224 ; X64-NEXT:    retq ## encoding: [0xc3]
   7225   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
   7226                     <8 x double> zeroinitializer, i8 %mask, i32 2)
   7227   ret <8 x double> %res
   7228 }
   7229 
   7230 define <8 x double> @test_vmulpd_mask_rz(<8 x double> %a0, <8 x double> %a1, i8 %mask) {
   7231 ; X86-LABEL: test_vmulpd_mask_rz:
   7232 ; X86:       ## %bb.0:
   7233 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7234 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   7235 ; X86-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
   7236 ; X86-NEXT:    retl ## encoding: [0xc3]
   7237 ;
   7238 ; X64-LABEL: test_vmulpd_mask_rz:
   7239 ; X64:       ## %bb.0:
   7240 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7241 ; X64-NEXT:    vmulpd {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x59,0xc1]
   7242 ; X64-NEXT:    retq ## encoding: [0xc3]
   7243   %res = call <8 x double> @llvm.x86.avx512.mask.mul.pd.512(<8 x double> %a0, <8 x double> %a1,
   7244                     <8 x double> zeroinitializer, i8 %mask, i32 3)
   7245   ret <8 x double> %res
   7246 }
   7247 
   7248 define <16 x float> @test_mm512_maskz_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7249 ; X86-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
   7250 ; X86:       ## %bb.0:
   7251 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7252 ; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
   7253 ; X86-NEXT:    retl ## encoding: [0xc3]
   7254 ;
   7255 ; X64-LABEL: test_mm512_maskz_add_round_ps_rn_sae:
   7256 ; X64:       ## %bb.0:
   7257 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7258 ; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x58,0xc1]
   7259 ; X64-NEXT:    retq ## encoding: [0xc3]
   7260   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
   7261   ret <16 x float> %res
   7262 }
   7263 define <16 x float> @test_mm512_maskz_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7264 ; X86-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
   7265 ; X86:       ## %bb.0:
   7266 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7267 ; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
   7268 ; X86-NEXT:    retl ## encoding: [0xc3]
   7269 ;
   7270 ; X64-LABEL: test_mm512_maskz_add_round_ps_rd_sae:
   7271 ; X64:       ## %bb.0:
   7272 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7273 ; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x58,0xc1]
   7274 ; X64-NEXT:    retq ## encoding: [0xc3]
   7275   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
   7276   ret <16 x float> %res
   7277 }
   7278 define <16 x float> @test_mm512_maskz_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7279 ; X86-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
   7280 ; X86:       ## %bb.0:
   7281 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7282 ; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
   7283 ; X86-NEXT:    retl ## encoding: [0xc3]
   7284 ;
   7285 ; X64-LABEL: test_mm512_maskz_add_round_ps_ru_sae:
   7286 ; X64:       ## %bb.0:
   7287 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7288 ; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x58,0xc1]
   7289 ; X64-NEXT:    retq ## encoding: [0xc3]
   7290   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
   7291   ret <16 x float> %res
   7292 }
   7293 
   7294 define <16 x float> @test_mm512_maskz_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7295 ; X86-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
   7296 ; X86:       ## %bb.0:
   7297 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7298 ; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
   7299 ; X86-NEXT:    retl ## encoding: [0xc3]
   7300 ;
   7301 ; X64-LABEL: test_mm512_maskz_add_round_ps_rz_sae:
   7302 ; X64:       ## %bb.0:
   7303 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7304 ; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x58,0xc1]
   7305 ; X64-NEXT:    retq ## encoding: [0xc3]
   7306   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
   7307   ret <16 x float> %res
   7308 }
   7309 
   7310 
   7311 define <16 x float> @test_mm512_maskz_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7312 ; X86-LABEL: test_mm512_maskz_add_round_ps_current:
   7313 ; X86:       ## %bb.0:
   7314 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7315 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
   7316 ; X86-NEXT:    retl ## encoding: [0xc3]
   7317 ;
   7318 ; X64-LABEL: test_mm512_maskz_add_round_ps_current:
   7319 ; X64:       ## %bb.0:
   7320 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7321 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x58,0xc1]
   7322 ; X64-NEXT:    retq ## encoding: [0xc3]
   7323   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   7324   ret <16 x float> %res
   7325 }
   7326 
   7327 define <16 x float> @test_mm512_mask_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7328 ; X86-LABEL: test_mm512_mask_add_round_ps_rn_sae:
   7329 ; X86:       ## %bb.0:
   7330 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7331 ; X86-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
   7332 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7333 ; X86-NEXT:    retl ## encoding: [0xc3]
   7334 ;
   7335 ; X64-LABEL: test_mm512_mask_add_round_ps_rn_sae:
   7336 ; X64:       ## %bb.0:
   7337 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7338 ; X64-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x58,0xd1]
   7339 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7340 ; X64-NEXT:    retq ## encoding: [0xc3]
   7341   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   7342   ret <16 x float> %res
   7343 }
   7344 define <16 x float> @test_mm512_mask_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7345 ; X86-LABEL: test_mm512_mask_add_round_ps_rd_sae:
   7346 ; X86:       ## %bb.0:
   7347 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7348 ; X86-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
   7349 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7350 ; X86-NEXT:    retl ## encoding: [0xc3]
   7351 ;
   7352 ; X64-LABEL: test_mm512_mask_add_round_ps_rd_sae:
   7353 ; X64:       ## %bb.0:
   7354 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7355 ; X64-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x58,0xd1]
   7356 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7357 ; X64-NEXT:    retq ## encoding: [0xc3]
   7358   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   7359   ret <16 x float> %res
   7360 }
   7361 define <16 x float> @test_mm512_mask_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7362 ; X86-LABEL: test_mm512_mask_add_round_ps_ru_sae:
   7363 ; X86:       ## %bb.0:
   7364 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7365 ; X86-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
   7366 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7367 ; X86-NEXT:    retl ## encoding: [0xc3]
   7368 ;
   7369 ; X64-LABEL: test_mm512_mask_add_round_ps_ru_sae:
   7370 ; X64:       ## %bb.0:
   7371 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7372 ; X64-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x58,0xd1]
   7373 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7374 ; X64-NEXT:    retq ## encoding: [0xc3]
   7375   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   7376   ret <16 x float> %res
   7377 }
   7378 
   7379 define <16 x float> @test_mm512_mask_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7380 ; X86-LABEL: test_mm512_mask_add_round_ps_rz_sae:
   7381 ; X86:       ## %bb.0:
   7382 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7383 ; X86-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
   7384 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7385 ; X86-NEXT:    retl ## encoding: [0xc3]
   7386 ;
   7387 ; X64-LABEL: test_mm512_mask_add_round_ps_rz_sae:
   7388 ; X64:       ## %bb.0:
   7389 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7390 ; X64-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x58,0xd1]
   7391 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7392 ; X64-NEXT:    retq ## encoding: [0xc3]
   7393   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   7394   ret <16 x float> %res
   7395 }
   7396 
   7397 
   7398 define <16 x float> @test_mm512_mask_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7399 ; X86-LABEL: test_mm512_mask_add_round_ps_current:
   7400 ; X86:       ## %bb.0:
   7401 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7402 ; X86-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
   7403 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7404 ; X86-NEXT:    retl ## encoding: [0xc3]
   7405 ;
   7406 ; X64-LABEL: test_mm512_mask_add_round_ps_current:
   7407 ; X64:       ## %bb.0:
   7408 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7409 ; X64-NEXT:    vaddps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x58,0xd1]
   7410 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7411 ; X64-NEXT:    retq ## encoding: [0xc3]
   7412   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   7413   ret <16 x float> %res
   7414 }
   7415 
   7416 
   7417 define <16 x float> @test_mm512_add_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7418 ; CHECK-LABEL: test_mm512_add_round_ps_rn_sae:
   7419 ; CHECK:       ## %bb.0:
   7420 ; CHECK-NEXT:    vaddps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x58,0xc1]
   7421 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7422   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   7423   ret <16 x float> %res
   7424 }
   7425 define <16 x float> @test_mm512_add_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7426 ; CHECK-LABEL: test_mm512_add_round_ps_rd_sae:
   7427 ; CHECK:       ## %bb.0:
   7428 ; CHECK-NEXT:    vaddps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x58,0xc1]
   7429 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7430   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   7431   ret <16 x float> %res
   7432 }
   7433 define <16 x float> @test_mm512_add_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7434 ; CHECK-LABEL: test_mm512_add_round_ps_ru_sae:
   7435 ; CHECK:       ## %bb.0:
   7436 ; CHECK-NEXT:    vaddps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x58,0xc1]
   7437 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7438   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   7439   ret <16 x float> %res
   7440 }
   7441 
   7442 define <16 x float> @test_mm512_add_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7443 ; CHECK-LABEL: test_mm512_add_round_ps_rz_sae:
   7444 ; CHECK:       ## %bb.0:
   7445 ; CHECK-NEXT:    vaddps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x58,0xc1]
   7446 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7447   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   7448   ret <16 x float> %res
   7449 }
   7450 
   7451 define <16 x float> @test_mm512_add_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7452 ; CHECK-LABEL: test_mm512_add_round_ps_current:
   7453 ; CHECK:       ## %bb.0:
   7454 ; CHECK-NEXT:    vaddps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x58,0xc1]
   7455 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7456   %res = call <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   7457   ret <16 x float> %res
   7458 }
   7459 declare <16 x float> @llvm.x86.avx512.mask.add.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   7460 
   7461 define <16 x float> @test_mm512_mask_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7462 ; X86-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
   7463 ; X86:       ## %bb.0:
   7464 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7465 ; X86-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
   7466 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7467 ; X86-NEXT:    retl ## encoding: [0xc3]
   7468 ;
   7469 ; X64-LABEL: test_mm512_mask_sub_round_ps_rn_sae:
   7470 ; X64:       ## %bb.0:
   7471 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7472 ; X64-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5c,0xd1]
   7473 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7474 ; X64-NEXT:    retq ## encoding: [0xc3]
   7475   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   7476   ret <16 x float> %res
   7477 }
   7478 define <16 x float> @test_mm512_mask_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7479 ; X86-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
   7480 ; X86:       ## %bb.0:
   7481 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7482 ; X86-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
   7483 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7484 ; X86-NEXT:    retl ## encoding: [0xc3]
   7485 ;
   7486 ; X64-LABEL: test_mm512_mask_sub_round_ps_rd_sae:
   7487 ; X64:       ## %bb.0:
   7488 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7489 ; X64-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5c,0xd1]
   7490 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7491 ; X64-NEXT:    retq ## encoding: [0xc3]
   7492   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   7493   ret <16 x float> %res
   7494 }
   7495 define <16 x float> @test_mm512_mask_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7496 ; X86-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
   7497 ; X86:       ## %bb.0:
   7498 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7499 ; X86-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
   7500 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7501 ; X86-NEXT:    retl ## encoding: [0xc3]
   7502 ;
   7503 ; X64-LABEL: test_mm512_mask_sub_round_ps_ru_sae:
   7504 ; X64:       ## %bb.0:
   7505 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7506 ; X64-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5c,0xd1]
   7507 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7508 ; X64-NEXT:    retq ## encoding: [0xc3]
   7509   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   7510   ret <16 x float> %res
   7511 }
   7512 
   7513 define <16 x float> @test_mm512_mask_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7514 ; X86-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
   7515 ; X86:       ## %bb.0:
   7516 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7517 ; X86-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
   7518 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7519 ; X86-NEXT:    retl ## encoding: [0xc3]
   7520 ;
   7521 ; X64-LABEL: test_mm512_mask_sub_round_ps_rz_sae:
   7522 ; X64:       ## %bb.0:
   7523 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7524 ; X64-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5c,0xd1]
   7525 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7526 ; X64-NEXT:    retq ## encoding: [0xc3]
   7527   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   7528   ret <16 x float> %res
   7529 }
   7530 
   7531 
   7532 define <16 x float> @test_mm512_mask_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7533 ; X86-LABEL: test_mm512_mask_sub_round_ps_current:
   7534 ; X86:       ## %bb.0:
   7535 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7536 ; X86-NEXT:    vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
   7537 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7538 ; X86-NEXT:    retl ## encoding: [0xc3]
   7539 ;
   7540 ; X64-LABEL: test_mm512_mask_sub_round_ps_current:
   7541 ; X64:       ## %bb.0:
   7542 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7543 ; X64-NEXT:    vsubps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5c,0xd1]
   7544 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7545 ; X64-NEXT:    retq ## encoding: [0xc3]
   7546   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   7547   ret <16 x float> %res
   7548 }
   7549 
   7550 define <16 x float> @test_mm512_sub_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7551 ; CHECK-LABEL: test_mm512_sub_round_ps_rn_sae:
   7552 ; CHECK:       ## %bb.0:
   7553 ; CHECK-NEXT:    vsubps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5c,0xc1]
   7554 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7555   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   7556   ret <16 x float> %res
   7557 }
   7558 define <16 x float> @test_mm512_sub_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7559 ; CHECK-LABEL: test_mm512_sub_round_ps_rd_sae:
   7560 ; CHECK:       ## %bb.0:
   7561 ; CHECK-NEXT:    vsubps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5c,0xc1]
   7562 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7563   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   7564   ret <16 x float> %res
   7565 }
   7566 define <16 x float> @test_mm512_sub_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7567 ; CHECK-LABEL: test_mm512_sub_round_ps_ru_sae:
   7568 ; CHECK:       ## %bb.0:
   7569 ; CHECK-NEXT:    vsubps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5c,0xc1]
   7570 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7571   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   7572   ret <16 x float> %res
   7573 }
   7574 
   7575 define <16 x float> @test_mm512_sub_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7576 ; CHECK-LABEL: test_mm512_sub_round_ps_rz_sae:
   7577 ; CHECK:       ## %bb.0:
   7578 ; CHECK-NEXT:    vsubps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5c,0xc1]
   7579 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7580   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   7581   ret <16 x float> %res
   7582 }
   7583 
   7584 define <16 x float> @test_mm512_sub_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7585 ; CHECK-LABEL: test_mm512_sub_round_ps_current:
   7586 ; CHECK:       ## %bb.0:
   7587 ; CHECK-NEXT:    vsubps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5c,0xc1]
   7588 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7589   %res = call <16 x float> @llvm.x86.avx512.mask.sub.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   7590   ret <16 x float> %res
   7591 }
   7592 
   7593 define <16 x float> @test_mm512_maskz_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7594 ; X86-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
   7595 ; X86:       ## %bb.0:
   7596 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7597 ; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
   7598 ; X86-NEXT:    retl ## encoding: [0xc3]
   7599 ;
   7600 ; X64-LABEL: test_mm512_maskz_div_round_ps_rn_sae:
   7601 ; X64:       ## %bb.0:
   7602 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7603 ; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5e,0xc1]
   7604 ; X64-NEXT:    retq ## encoding: [0xc3]
   7605   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 0)
   7606   ret <16 x float> %res
   7607 }
   7608 define <16 x float> @test_mm512_maskz_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7609 ; X86-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
   7610 ; X86:       ## %bb.0:
   7611 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7612 ; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
   7613 ; X86-NEXT:    retl ## encoding: [0xc3]
   7614 ;
   7615 ; X64-LABEL: test_mm512_maskz_div_round_ps_rd_sae:
   7616 ; X64:       ## %bb.0:
   7617 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7618 ; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xb9,0x5e,0xc1]
   7619 ; X64-NEXT:    retq ## encoding: [0xc3]
   7620   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 1)
   7621   ret <16 x float> %res
   7622 }
   7623 define <16 x float> @test_mm512_maskz_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7624 ; X86-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
   7625 ; X86:       ## %bb.0:
   7626 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7627 ; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
   7628 ; X86-NEXT:    retl ## encoding: [0xc3]
   7629 ;
   7630 ; X64-LABEL: test_mm512_maskz_div_round_ps_ru_sae:
   7631 ; X64:       ## %bb.0:
   7632 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7633 ; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xd9,0x5e,0xc1]
   7634 ; X64-NEXT:    retq ## encoding: [0xc3]
   7635   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 2)
   7636   ret <16 x float> %res
   7637 }
   7638 
   7639 define <16 x float> @test_mm512_maskz_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7640 ; X86-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
   7641 ; X86:       ## %bb.0:
   7642 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7643 ; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
   7644 ; X86-NEXT:    retl ## encoding: [0xc3]
   7645 ;
   7646 ; X64-LABEL: test_mm512_maskz_div_round_ps_rz_sae:
   7647 ; X64:       ## %bb.0:
   7648 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7649 ; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x5e,0xc1]
   7650 ; X64-NEXT:    retq ## encoding: [0xc3]
   7651   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 3)
   7652   ret <16 x float> %res
   7653 }
   7654 
   7655 
   7656 define <16 x float> @test_mm512_maskz_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7657 ; X86-LABEL: test_mm512_maskz_div_round_ps_current:
   7658 ; X86:       ## %bb.0:
   7659 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7660 ; X86-NEXT:    vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
   7661 ; X86-NEXT:    retl ## encoding: [0xc3]
   7662 ;
   7663 ; X64-LABEL: test_mm512_maskz_div_round_ps_current:
   7664 ; X64:       ## %bb.0:
   7665 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7666 ; X64-NEXT:    vdivps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5e,0xc1]
   7667 ; X64-NEXT:    retq ## encoding: [0xc3]
   7668   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   7669   ret <16 x float> %res
   7670 }
   7671 
   7672 define <16 x float> @test_mm512_mask_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7673 ; X86-LABEL: test_mm512_mask_div_round_ps_rn_sae:
   7674 ; X86:       ## %bb.0:
   7675 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7676 ; X86-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
   7677 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7678 ; X86-NEXT:    retl ## encoding: [0xc3]
   7679 ;
   7680 ; X64-LABEL: test_mm512_mask_div_round_ps_rn_sae:
   7681 ; X64:       ## %bb.0:
   7682 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7683 ; X64-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5e,0xd1]
   7684 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7685 ; X64-NEXT:    retq ## encoding: [0xc3]
   7686   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 0)
   7687   ret <16 x float> %res
   7688 }
   7689 define <16 x float> @test_mm512_mask_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7690 ; X86-LABEL: test_mm512_mask_div_round_ps_rd_sae:
   7691 ; X86:       ## %bb.0:
   7692 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7693 ; X86-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
   7694 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7695 ; X86-NEXT:    retl ## encoding: [0xc3]
   7696 ;
   7697 ; X64-LABEL: test_mm512_mask_div_round_ps_rd_sae:
   7698 ; X64:       ## %bb.0:
   7699 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7700 ; X64-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x39,0x5e,0xd1]
   7701 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7702 ; X64-NEXT:    retq ## encoding: [0xc3]
   7703   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 1)
   7704   ret <16 x float> %res
   7705 }
   7706 define <16 x float> @test_mm512_mask_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7707 ; X86-LABEL: test_mm512_mask_div_round_ps_ru_sae:
   7708 ; X86:       ## %bb.0:
   7709 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7710 ; X86-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
   7711 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7712 ; X86-NEXT:    retl ## encoding: [0xc3]
   7713 ;
   7714 ; X64-LABEL: test_mm512_mask_div_round_ps_ru_sae:
   7715 ; X64:       ## %bb.0:
   7716 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7717 ; X64-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x59,0x5e,0xd1]
   7718 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7719 ; X64-NEXT:    retq ## encoding: [0xc3]
   7720   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 2)
   7721   ret <16 x float> %res
   7722 }
   7723 
   7724 define <16 x float> @test_mm512_mask_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7725 ; X86-LABEL: test_mm512_mask_div_round_ps_rz_sae:
   7726 ; X86:       ## %bb.0:
   7727 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7728 ; X86-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
   7729 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7730 ; X86-NEXT:    retl ## encoding: [0xc3]
   7731 ;
   7732 ; X64-LABEL: test_mm512_mask_div_round_ps_rz_sae:
   7733 ; X64:       ## %bb.0:
   7734 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7735 ; X64-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x5e,0xd1]
   7736 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7737 ; X64-NEXT:    retq ## encoding: [0xc3]
   7738   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 3)
   7739   ret <16 x float> %res
   7740 }
   7741 
   7742 
   7743 define <16 x float> @test_mm512_mask_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   7744 ; X86-LABEL: test_mm512_mask_div_round_ps_current:
   7745 ; X86:       ## %bb.0:
   7746 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   7747 ; X86-NEXT:    vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
   7748 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7749 ; X86-NEXT:    retl ## encoding: [0xc3]
   7750 ;
   7751 ; X64-LABEL: test_mm512_mask_div_round_ps_current:
   7752 ; X64:       ## %bb.0:
   7753 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   7754 ; X64-NEXT:    vdivps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5e,0xd1]
   7755 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   7756 ; X64-NEXT:    retq ## encoding: [0xc3]
   7757   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   7758   ret <16 x float> %res
   7759 }
   7760 
   7761 
   7762 define <16 x float> @test_mm512_div_round_ps_rn_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7763 ; CHECK-LABEL: test_mm512_div_round_ps_rn_sae:
   7764 ; CHECK:       ## %bb.0:
   7765 ; CHECK-NEXT:    vdivps {rn-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5e,0xc1]
   7766 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7767   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 0)
   7768   ret <16 x float> %res
   7769 }
   7770 define <16 x float> @test_mm512_div_round_ps_rd_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7771 ; CHECK-LABEL: test_mm512_div_round_ps_rd_sae:
   7772 ; CHECK:       ## %bb.0:
   7773 ; CHECK-NEXT:    vdivps {rd-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x38,0x5e,0xc1]
   7774 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7775   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 1)
   7776   ret <16 x float> %res
   7777 }
   7778 define <16 x float> @test_mm512_div_round_ps_ru_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7779 ; CHECK-LABEL: test_mm512_div_round_ps_ru_sae:
   7780 ; CHECK:       ## %bb.0:
   7781 ; CHECK-NEXT:    vdivps {ru-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x58,0x5e,0xc1]
   7782 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7783   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 2)
   7784   ret <16 x float> %res
   7785 }
   7786 
   7787 define <16 x float> @test_mm512_div_round_ps_rz_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7788 ; CHECK-LABEL: test_mm512_div_round_ps_rz_sae:
   7789 ; CHECK:       ## %bb.0:
   7790 ; CHECK-NEXT:    vdivps {rz-sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x5e,0xc1]
   7791 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7792   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 3)
   7793   ret <16 x float> %res
   7794 }
   7795 
   7796 define <16 x float> @test_mm512_div_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   7797 ; CHECK-LABEL: test_mm512_div_round_ps_current:
   7798 ; CHECK:       ## %bb.0:
   7799 ; CHECK-NEXT:    vdivps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5e,0xc1]
   7800 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   7801   %res = call <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   7802   ret <16 x float> %res
   7803 }
   7804 declare <16 x float> @llvm.x86.avx512.mask.div.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   7805 
   7806 define void @test_mask_compress_store_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
   7807 ; X86-LABEL: test_mask_compress_store_pd_512:
   7808 ; X86:       ## %bb.0:
   7809 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7810 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   7811 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   7812 ; X86-NEXT:    vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
   7813 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7814 ; X86-NEXT:    retl ## encoding: [0xc3]
   7815 ;
   7816 ; X64-LABEL: test_mask_compress_store_pd_512:
   7817 ; X64:       ## %bb.0:
   7818 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7819 ; X64-NEXT:    vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
   7820 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7821 ; X64-NEXT:    retq ## encoding: [0xc3]
   7822   call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
   7823   ret void
   7824 }
   7825 
   7826 declare void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
   7827 
   7828 define void @test_compress_store_pd_512(i8* %addr, <8 x double> %data) {
   7829 ; X86-LABEL: test_compress_store_pd_512:
   7830 ; X86:       ## %bb.0:
   7831 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7832 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7833 ; X86-NEXT:    vcompresspd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x00]
   7834 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7835 ; X86-NEXT:    retl ## encoding: [0xc3]
   7836 ;
   7837 ; X64-LABEL: test_compress_store_pd_512:
   7838 ; X64:       ## %bb.0:
   7839 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7840 ; X64-NEXT:    vcompresspd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8a,0x07]
   7841 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7842 ; X64-NEXT:    retq ## encoding: [0xc3]
   7843   call void @llvm.x86.avx512.mask.compress.store.pd.512(i8* %addr, <8 x double> %data, i8 -1)
   7844   ret void
   7845 }
   7846 
   7847 define void @test_mask_compress_store_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
   7848 ; X86-LABEL: test_mask_compress_store_ps_512:
   7849 ; X86:       ## %bb.0:
   7850 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7851 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   7852 ; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
   7853 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7854 ; X86-NEXT:    retl ## encoding: [0xc3]
   7855 ;
   7856 ; X64-LABEL: test_mask_compress_store_ps_512:
   7857 ; X64:       ## %bb.0:
   7858 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7859 ; X64-NEXT:    vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
   7860 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7861 ; X64-NEXT:    retq ## encoding: [0xc3]
   7862   call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
   7863   ret void
   7864 }
   7865 
   7866 declare void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
   7867 
   7868 define void @test_compress_store_ps_512(i8* %addr, <16 x float> %data) {
   7869 ; X86-LABEL: test_compress_store_ps_512:
   7870 ; X86:       ## %bb.0:
   7871 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7872 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7873 ; X86-NEXT:    vcompressps %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x00]
   7874 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7875 ; X86-NEXT:    retl ## encoding: [0xc3]
   7876 ;
   7877 ; X64-LABEL: test_compress_store_ps_512:
   7878 ; X64:       ## %bb.0:
   7879 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7880 ; X64-NEXT:    vcompressps %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8a,0x07]
   7881 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7882 ; X64-NEXT:    retq ## encoding: [0xc3]
   7883   call void @llvm.x86.avx512.mask.compress.store.ps.512(i8* %addr, <16 x float> %data, i16 -1)
   7884   ret void
   7885 }
   7886 
   7887 define void @test_mask_compress_store_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
   7888 ; X86-LABEL: test_mask_compress_store_q_512:
   7889 ; X86:       ## %bb.0:
   7890 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7891 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   7892 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   7893 ; X86-NEXT:    vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
   7894 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7895 ; X86-NEXT:    retl ## encoding: [0xc3]
   7896 ;
   7897 ; X64-LABEL: test_mask_compress_store_q_512:
   7898 ; X64:       ## %bb.0:
   7899 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7900 ; X64-NEXT:    vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
   7901 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7902 ; X64-NEXT:    retq ## encoding: [0xc3]
   7903   call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
   7904   ret void
   7905 }
   7906 
   7907 declare void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
   7908 
   7909 define void @test_compress_store_q_512(i8* %addr, <8 x i64> %data) {
   7910 ; X86-LABEL: test_compress_store_q_512:
   7911 ; X86:       ## %bb.0:
   7912 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7913 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7914 ; X86-NEXT:    vpcompressq %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x00]
   7915 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7916 ; X86-NEXT:    retl ## encoding: [0xc3]
   7917 ;
   7918 ; X64-LABEL: test_compress_store_q_512:
   7919 ; X64:       ## %bb.0:
   7920 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7921 ; X64-NEXT:    vpcompressq %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x8b,0x07]
   7922 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7923 ; X64-NEXT:    retq ## encoding: [0xc3]
   7924   call void @llvm.x86.avx512.mask.compress.store.q.512(i8* %addr, <8 x i64> %data, i8 -1)
   7925   ret void
   7926 }
   7927 
   7928 define void @test_mask_compress_store_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
   7929 ; X86-LABEL: test_mask_compress_store_d_512:
   7930 ; X86:       ## %bb.0:
   7931 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7932 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   7933 ; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
   7934 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7935 ; X86-NEXT:    retl ## encoding: [0xc3]
   7936 ;
   7937 ; X64-LABEL: test_mask_compress_store_d_512:
   7938 ; X64:       ## %bb.0:
   7939 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7940 ; X64-NEXT:    vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
   7941 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7942 ; X64-NEXT:    retq ## encoding: [0xc3]
   7943   call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
   7944   ret void
   7945 }
   7946 
   7947 declare void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
   7948 
   7949 define void @test_compress_store_d_512(i8* %addr, <16 x i32> %data) {
   7950 ; X86-LABEL: test_compress_store_d_512:
   7951 ; X86:       ## %bb.0:
   7952 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7953 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7954 ; X86-NEXT:    vpcompressd %zmm0, (%eax) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x00]
   7955 ; X86-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7956 ; X86-NEXT:    retl ## encoding: [0xc3]
   7957 ;
   7958 ; X64-LABEL: test_compress_store_d_512:
   7959 ; X64:       ## %bb.0:
   7960 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   7961 ; X64-NEXT:    vpcompressd %zmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x8b,0x07]
   7962 ; X64-NEXT:    vzeroupper ## encoding: [0xc5,0xf8,0x77]
   7963 ; X64-NEXT:    retq ## encoding: [0xc3]
   7964   call void @llvm.x86.avx512.mask.compress.store.d.512(i8* %addr, <16 x i32> %data, i16 -1)
   7965   ret void
   7966 }
   7967 
   7968 define <8 x double> @test_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
   7969 ; X86-LABEL: test_mask_expand_load_pd_512:
   7970 ; X86:       ## %bb.0:
   7971 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7972 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   7973 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   7974 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
   7975 ; X86-NEXT:    retl ## encoding: [0xc3]
   7976 ;
   7977 ; X64-LABEL: test_mask_expand_load_pd_512:
   7978 ; X64:       ## %bb.0:
   7979 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7980 ; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
   7981 ; X64-NEXT:    retq ## encoding: [0xc3]
   7982   %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
   7983   ret <8 x double> %res
   7984 }
   7985 
   7986 define <8 x double> @test_maskz_expand_load_pd_512(i8* %addr, i8 %mask) {
   7987 ; X86-LABEL: test_maskz_expand_load_pd_512:
   7988 ; X86:       ## %bb.0:
   7989 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   7990 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   7991 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   7992 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x00]
   7993 ; X86-NEXT:    retl ## encoding: [0xc3]
   7994 ;
   7995 ; X64-LABEL: test_maskz_expand_load_pd_512:
   7996 ; X64:       ## %bb.0:
   7997 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   7998 ; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x88,0x07]
   7999 ; X64-NEXT:    retq ## encoding: [0xc3]
   8000   %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> zeroinitializer, i8 %mask)
   8001   ret <8 x double> %res
   8002 }
   8003 
   8004 declare <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 %mask)
   8005 
   8006 define <8 x double> @test_expand_load_pd_512(i8* %addr, <8 x double> %data) {
   8007 ; X86-LABEL: test_expand_load_pd_512:
   8008 ; X86:       ## %bb.0:
   8009 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8010 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8011 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
   8012 ; X86-NEXT:    retl ## encoding: [0xc3]
   8013 ;
   8014 ; X64-LABEL: test_expand_load_pd_512:
   8015 ; X64:       ## %bb.0:
   8016 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8017 ; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
   8018 ; X64-NEXT:    retq ## encoding: [0xc3]
   8019   %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 -1)
   8020   ret <8 x double> %res
   8021 }
   8022 
   8023 ; Make sure we don't crash if you pass 0 to the mask.
   8024 define <8 x double> @test_zero_mask_expand_load_pd_512(i8* %addr, <8 x double> %data, i8 %mask) {
   8025 ; X86-LABEL: test_zero_mask_expand_load_pd_512:
   8026 ; X86:       ## %bb.0:
   8027 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8028 ; X86-NEXT:    kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
   8029 ; X86-NEXT:    vexpandpd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x00]
   8030 ; X86-NEXT:    retl ## encoding: [0xc3]
   8031 ;
   8032 ; X64-LABEL: test_zero_mask_expand_load_pd_512:
   8033 ; X64:       ## %bb.0:
   8034 ; X64-NEXT:    kxorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x47,0xc8]
   8035 ; X64-NEXT:    vexpandpd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x88,0x07]
   8036 ; X64-NEXT:    retq ## encoding: [0xc3]
   8037   %res = call <8 x double> @llvm.x86.avx512.mask.expand.load.pd.512(i8* %addr, <8 x double> %data, i8 0)
   8038   ret <8 x double> %res
   8039 }
   8040 
   8041 define <16 x float> @test_mask_expand_load_ps_512(i8* %addr, <16 x float> %data, i16 %mask) {
   8042 ; X86-LABEL: test_mask_expand_load_ps_512:
   8043 ; X86:       ## %bb.0:
   8044 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8045 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8046 ; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
   8047 ; X86-NEXT:    retl ## encoding: [0xc3]
   8048 ;
   8049 ; X64-LABEL: test_mask_expand_load_ps_512:
   8050 ; X64:       ## %bb.0:
   8051 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8052 ; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
   8053 ; X64-NEXT:    retq ## encoding: [0xc3]
   8054   %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
   8055   ret <16 x float> %res
   8056 }
   8057 
   8058 define <16 x float> @test_maskz_expand_load_ps_512(i8* %addr, i16 %mask) {
   8059 ; X86-LABEL: test_maskz_expand_load_ps_512:
   8060 ; X86:       ## %bb.0:
   8061 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8062 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8063 ; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x00]
   8064 ; X86-NEXT:    retl ## encoding: [0xc3]
   8065 ;
   8066 ; X64-LABEL: test_maskz_expand_load_ps_512:
   8067 ; X64:       ## %bb.0:
   8068 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8069 ; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x88,0x07]
   8070 ; X64-NEXT:    retq ## encoding: [0xc3]
   8071   %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> zeroinitializer, i16 %mask)
   8072   ret <16 x float> %res
   8073 }
   8074 
   8075 declare <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 %mask)
   8076 
   8077 define <16 x float> @test_expand_load_ps_512(i8* %addr, <16 x float> %data) {
   8078 ; X86-LABEL: test_expand_load_ps_512:
   8079 ; X86:       ## %bb.0:
   8080 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8081 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8082 ; X86-NEXT:    vexpandps (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x00]
   8083 ; X86-NEXT:    retl ## encoding: [0xc3]
   8084 ;
   8085 ; X64-LABEL: test_expand_load_ps_512:
   8086 ; X64:       ## %bb.0:
   8087 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8088 ; X64-NEXT:    vexpandps (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x88,0x07]
   8089 ; X64-NEXT:    retq ## encoding: [0xc3]
   8090   %res = call <16 x float> @llvm.x86.avx512.mask.expand.load.ps.512(i8* %addr, <16 x float> %data, i16 -1)
   8091   ret <16 x float> %res
   8092 }
   8093 
   8094 define <8 x i64> @test_mask_expand_load_q_512(i8* %addr, <8 x i64> %data, i8 %mask) {
   8095 ; X86-LABEL: test_mask_expand_load_q_512:
   8096 ; X86:       ## %bb.0:
   8097 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8098 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   8099 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   8100 ; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
   8101 ; X86-NEXT:    retl ## encoding: [0xc3]
   8102 ;
   8103 ; X64-LABEL: test_mask_expand_load_q_512:
   8104 ; X64:       ## %bb.0:
   8105 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8106 ; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
   8107 ; X64-NEXT:    retq ## encoding: [0xc3]
   8108   %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
   8109   ret <8 x i64> %res
   8110 }
   8111 
   8112 define <8 x i64> @test_maskz_expand_load_q_512(i8* %addr, i8 %mask) {
   8113 ; X86-LABEL: test_maskz_expand_load_q_512:
   8114 ; X86:       ## %bb.0:
   8115 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8116 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx ## encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   8117 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   8118 ; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x00]
   8119 ; X86-NEXT:    retl ## encoding: [0xc3]
   8120 ;
   8121 ; X64-LABEL: test_maskz_expand_load_q_512:
   8122 ; X64:       ## %bb.0:
   8123 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8124 ; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x89,0x07]
   8125 ; X64-NEXT:    retq ## encoding: [0xc3]
   8126   %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> zeroinitializer, i8 %mask)
   8127   ret <8 x i64> %res
   8128 }
   8129 
   8130 declare <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 %mask)
   8131 
   8132 define <8 x i64> @test_expand_load_q_512(i8* %addr, <8 x i64> %data) {
   8133 ; X86-LABEL: test_expand_load_q_512:
   8134 ; X86:       ## %bb.0:
   8135 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8136 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8137 ; X86-NEXT:    vpexpandq (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x00]
   8138 ; X86-NEXT:    retl ## encoding: [0xc3]
   8139 ;
   8140 ; X64-LABEL: test_expand_load_q_512:
   8141 ; X64:       ## %bb.0:
   8142 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8143 ; X64-NEXT:    vpexpandq (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x89,0x07]
   8144 ; X64-NEXT:    retq ## encoding: [0xc3]
   8145   %res = call <8 x i64> @llvm.x86.avx512.mask.expand.load.q.512(i8* %addr, <8 x i64> %data, i8 -1)
   8146   ret <8 x i64> %res
   8147 }
   8148 
   8149 define <16 x i32> @test_mask_expand_load_d_512(i8* %addr, <16 x i32> %data, i16 %mask) {
   8150 ; X86-LABEL: test_mask_expand_load_d_512:
   8151 ; X86:       ## %bb.0:
   8152 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8153 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8154 ; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
   8155 ; X86-NEXT:    retl ## encoding: [0xc3]
   8156 ;
   8157 ; X64-LABEL: test_mask_expand_load_d_512:
   8158 ; X64:       ## %bb.0:
   8159 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8160 ; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
   8161 ; X64-NEXT:    retq ## encoding: [0xc3]
   8162   %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
   8163   ret <16 x i32> %res
   8164 }
   8165 
   8166 define <16 x i32> @test_maskz_expand_load_d_512(i8* %addr, i16 %mask) {
   8167 ; X86-LABEL: test_maskz_expand_load_d_512:
   8168 ; X86:       ## %bb.0:
   8169 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8170 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8171 ; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x00]
   8172 ; X86-NEXT:    retl ## encoding: [0xc3]
   8173 ;
   8174 ; X64-LABEL: test_maskz_expand_load_d_512:
   8175 ; X64:       ## %bb.0:
   8176 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8177 ; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x89,0x07]
   8178 ; X64-NEXT:    retq ## encoding: [0xc3]
   8179   %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> zeroinitializer, i16 %mask)
   8180   ret <16 x i32> %res
   8181 }
   8182 
   8183 declare <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 %mask)
   8184 
   8185 define <16 x i32> @test_expand_load_d_512(i8* %addr, <16 x i32> %data) {
   8186 ; X86-LABEL: test_expand_load_d_512:
   8187 ; X86:       ## %bb.0:
   8188 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   8189 ; X86-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8190 ; X86-NEXT:    vpexpandd (%eax), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x00]
   8191 ; X86-NEXT:    retl ## encoding: [0xc3]
   8192 ;
   8193 ; X64-LABEL: test_expand_load_d_512:
   8194 ; X64:       ## %bb.0:
   8195 ; X64-NEXT:    kxnorw %k0, %k0, %k1 ## encoding: [0xc5,0xfc,0x46,0xc8]
   8196 ; X64-NEXT:    vpexpandd (%rdi), %zmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x89,0x07]
   8197 ; X64-NEXT:    retq ## encoding: [0xc3]
   8198   %res = call <16 x i32> @llvm.x86.avx512.mask.expand.load.d.512(i8* %addr, <16 x i32> %data, i16 -1)
   8199   ret <16 x i32> %res
   8200 }
   8201 
   8202 define <16 x float> @test_mm512_maskz_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8203 ; X86-LABEL: test_mm512_maskz_min_round_ps_sae:
   8204 ; X86:       ## %bb.0:
   8205 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8206 ; X86-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
   8207 ; X86-NEXT:    retl ## encoding: [0xc3]
   8208 ;
   8209 ; X64-LABEL: test_mm512_maskz_min_round_ps_sae:
   8210 ; X64:       ## %bb.0:
   8211 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8212 ; X64-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5d,0xc1]
   8213 ; X64-NEXT:    retq ## encoding: [0xc3]
   8214   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
   8215   ret <16 x float> %res
   8216 }
   8217 
   8218 define <16 x float> @test_mm512_maskz_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8219 ; X86-LABEL: test_mm512_maskz_min_round_ps_current:
   8220 ; X86:       ## %bb.0:
   8221 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8222 ; X86-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
   8223 ; X86-NEXT:    retl ## encoding: [0xc3]
   8224 ;
   8225 ; X64-LABEL: test_mm512_maskz_min_round_ps_current:
   8226 ; X64:       ## %bb.0:
   8227 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8228 ; X64-NEXT:    vminps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5d,0xc1]
   8229 ; X64-NEXT:    retq ## encoding: [0xc3]
   8230   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   8231   ret <16 x float> %res
   8232 }
   8233 
   8234 define <16 x float> @test_mm512_mask_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   8235 ; X86-LABEL: test_mm512_mask_min_round_ps_sae:
   8236 ; X86:       ## %bb.0:
   8237 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8238 ; X86-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
   8239 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8240 ; X86-NEXT:    retl ## encoding: [0xc3]
   8241 ;
   8242 ; X64-LABEL: test_mm512_mask_min_round_ps_sae:
   8243 ; X64:       ## %bb.0:
   8244 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8245 ; X64-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5d,0xd1]
   8246 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8247 ; X64-NEXT:    retq ## encoding: [0xc3]
   8248   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
   8249   ret <16 x float> %res
   8250 }
   8251 
   8252 define <16 x float> @test_mm512_mask_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   8253 ; X86-LABEL: test_mm512_mask_min_round_ps_current:
   8254 ; X86:       ## %bb.0:
   8255 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8256 ; X86-NEXT:    vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
   8257 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8258 ; X86-NEXT:    retl ## encoding: [0xc3]
   8259 ;
   8260 ; X64-LABEL: test_mm512_mask_min_round_ps_current:
   8261 ; X64:       ## %bb.0:
   8262 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8263 ; X64-NEXT:    vminps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5d,0xd1]
   8264 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8265 ; X64-NEXT:    retq ## encoding: [0xc3]
   8266   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   8267   ret <16 x float> %res
   8268 }
   8269 
   8270 define <16 x float> @test_mm512_min_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8271 ; CHECK-LABEL: test_mm512_min_round_ps_sae:
   8272 ; CHECK:       ## %bb.0:
   8273 ; CHECK-NEXT:    vminps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5d,0xc1]
   8274 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8275   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
   8276   ret <16 x float> %res
   8277 }
   8278 
   8279 define <16 x float> @test_mm512_min_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8280 ; CHECK-LABEL: test_mm512_min_round_ps_current:
   8281 ; CHECK:       ## %bb.0:
   8282 ; CHECK-NEXT:    vminps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5d,0xc1]
   8283 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8284   %res = call <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   8285   ret <16 x float> %res
   8286 }
   8287 declare <16 x float> @llvm.x86.avx512.mask.min.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   8288 
   8289 define <16 x float> @test_mm512_maskz_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8290 ; X86-LABEL: test_mm512_maskz_max_round_ps_sae:
   8291 ; X86:       ## %bb.0:
   8292 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8293 ; X86-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
   8294 ; X86-NEXT:    retl ## encoding: [0xc3]
   8295 ;
   8296 ; X64-LABEL: test_mm512_maskz_max_round_ps_sae:
   8297 ; X64:       ## %bb.0:
   8298 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8299 ; X64-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x99,0x5f,0xc1]
   8300 ; X64-NEXT:    retq ## encoding: [0xc3]
   8301   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 8)
   8302   ret <16 x float> %res
   8303 }
   8304 
   8305 define <16 x float> @test_mm512_maskz_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8306 ; X86-LABEL: test_mm512_maskz_max_round_ps_current:
   8307 ; X86:       ## %bb.0:
   8308 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8309 ; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
   8310 ; X86-NEXT:    retl ## encoding: [0xc3]
   8311 ;
   8312 ; X64-LABEL: test_mm512_maskz_max_round_ps_current:
   8313 ; X64:       ## %bb.0:
   8314 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8315 ; X64-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x5f,0xc1]
   8316 ; X64-NEXT:    retq ## encoding: [0xc3]
   8317   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 %mask, i32 4)
   8318   ret <16 x float> %res
   8319 }
   8320 
   8321 define <16 x float> @test_mm512_mask_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   8322 ; X86-LABEL: test_mm512_mask_max_round_ps_sae:
   8323 ; X86:       ## %bb.0:
   8324 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8325 ; X86-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
   8326 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8327 ; X86-NEXT:    retl ## encoding: [0xc3]
   8328 ;
   8329 ; X64-LABEL: test_mm512_mask_max_round_ps_sae:
   8330 ; X64:       ## %bb.0:
   8331 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8332 ; X64-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x19,0x5f,0xd1]
   8333 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8334 ; X64-NEXT:    retq ## encoding: [0xc3]
   8335   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 8)
   8336   ret <16 x float> %res
   8337 }
   8338 
   8339 define <16 x float> @test_mm512_mask_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask) {
   8340 ; X86-LABEL: test_mm512_mask_max_round_ps_current:
   8341 ; X86:       ## %bb.0:
   8342 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8343 ; X86-NEXT:    vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
   8344 ; X86-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8345 ; X86-NEXT:    retl ## encoding: [0xc3]
   8346 ;
   8347 ; X64-LABEL: test_mm512_mask_max_round_ps_current:
   8348 ; X64:       ## %bb.0:
   8349 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8350 ; X64-NEXT:    vmaxps %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x5f,0xd1]
   8351 ; X64-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   8352 ; X64-NEXT:    retq ## encoding: [0xc3]
   8353   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float> %src, i16 %mask, i32 4)
   8354   ret <16 x float> %res
   8355 }
   8356 
   8357 define <16 x float> @test_mm512_max_round_ps_sae(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8358 ; CHECK-LABEL: test_mm512_max_round_ps_sae:
   8359 ; CHECK:       ## %bb.0:
   8360 ; CHECK-NEXT:    vmaxps {sae}, %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x18,0x5f,0xc1]
   8361 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8362   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 8)
   8363   ret <16 x float> %res
   8364 }
   8365 
   8366 define <16 x float> @test_mm512_max_round_ps_current(<16 x float> %a0, <16 x float> %a1, i16 %mask) {
   8367 ; CHECK-LABEL: test_mm512_max_round_ps_current:
   8368 ; CHECK:       ## %bb.0:
   8369 ; CHECK-NEXT:    vmaxps %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x5f,0xc1]
   8370 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8371   %res = call <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float> %a0, <16 x float> %a1, <16 x float>zeroinitializer, i16 -1, i32 4)
   8372   ret <16 x float> %res
   8373 }
   8374 declare <16 x float> @llvm.x86.avx512.mask.max.ps.512(<16 x float>, <16 x float>, <16 x float>, i16, i32)
   8375 
   8376 define <8 x double> @test_sqrt_pd_512(<8 x double> %a0) {
   8377 ; CHECK-LABEL: test_sqrt_pd_512:
   8378 ; CHECK:       ## %bb.0:
   8379 ; CHECK-NEXT:    vsqrtpd %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x51,0xc0]
   8380 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8381   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> undef, i8 -1, i32 4)
   8382   ret <8 x double> %res
   8383 }
   8384 define <8 x double> @test_mask_sqrt_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
   8385 ; X86-LABEL: test_mask_sqrt_pd_512:
   8386 ; X86:       ## %bb.0:
   8387 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8388 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8389 ; X86-NEXT:    vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
   8390 ; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
   8391 ; X86-NEXT:    retl ## encoding: [0xc3]
   8392 ;
   8393 ; X64-LABEL: test_mask_sqrt_pd_512:
   8394 ; X64:       ## %bb.0:
   8395 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8396 ; X64-NEXT:    vsqrtpd %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x49,0x51,0xc8]
   8397 ; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
   8398 ; X64-NEXT:    retq ## encoding: [0xc3]
   8399   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> %passthru, i8 %mask, i32 4)
   8400   ret <8 x double> %res
   8401 }
   8402 define <8 x double> @test_maskz_sqrt_pd_512(<8 x double> %a0, i8 %mask) {
   8403 ; X86-LABEL: test_maskz_sqrt_pd_512:
   8404 ; X86:       ## %bb.0:
   8405 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8406 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8407 ; X86-NEXT:    vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
   8408 ; X86-NEXT:    retl ## encoding: [0xc3]
   8409 ;
   8410 ; X64-LABEL: test_maskz_sqrt_pd_512:
   8411 ; X64:       ## %bb.0:
   8412 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8413 ; X64-NEXT:    vsqrtpd %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x51,0xc0]
   8414 ; X64-NEXT:    retq ## encoding: [0xc3]
   8415   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 %mask, i32 4)
   8416   ret <8 x double> %res
   8417 }
   8418 define <8 x double> @test_sqrt_round_pd_512(<8 x double> %a0) {
   8419 ; CHECK-LABEL: test_sqrt_round_pd_512:
   8420 ; CHECK:       ## %bb.0:
   8421 ; CHECK-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x78,0x51,0xc0]
   8422 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8423   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> undef, i8 -1, i32 11)
   8424   ret <8 x double> %res
   8425 }
   8426 define <8 x double> @test_mask_sqrt_round_pd_512(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
   8427 ; X86-LABEL: test_mask_sqrt_round_pd_512:
   8428 ; X86:       ## %bb.0:
   8429 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8430 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8431 ; X86-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
   8432 ; X86-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
   8433 ; X86-NEXT:    retl ## encoding: [0xc3]
   8434 ;
   8435 ; X64-LABEL: test_mask_sqrt_round_pd_512:
   8436 ; X64:       ## %bb.0:
   8437 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8438 ; X64-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x79,0x51,0xc8]
   8439 ; X64-NEXT:    vmovapd %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0x28,0xc1]
   8440 ; X64-NEXT:    retq ## encoding: [0xc3]
   8441   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> %passthru, i8 %mask, i32 11)
   8442   ret <8 x double> %res
   8443 }
   8444 define <8 x double> @test_maskz_sqrt_round_pd_512(<8 x double> %a0, i8 %mask) {
   8445 ; X86-LABEL: test_maskz_sqrt_round_pd_512:
   8446 ; X86:       ## %bb.0:
   8447 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8448 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8449 ; X86-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
   8450 ; X86-NEXT:    retl ## encoding: [0xc3]
   8451 ;
   8452 ; X64-LABEL: test_maskz_sqrt_round_pd_512:
   8453 ; X64:       ## %bb.0:
   8454 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8455 ; X64-NEXT:    vsqrtpd {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xf9,0x51,0xc0]
   8456 ; X64-NEXT:    retq ## encoding: [0xc3]
   8457   %res = call <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double> %a0,  <8 x double> zeroinitializer, i8 %mask, i32 11)
   8458   ret <8 x double> %res
   8459 }
   8460 declare <8 x double> @llvm.x86.avx512.mask.sqrt.pd.512(<8 x double>, <8 x double>, i8, i32) nounwind readnone
   8461 
   8462 define <16 x float> @test_sqrt_ps_512(<16 x float> %a0) {
   8463 ; CHECK-LABEL: test_sqrt_ps_512:
   8464 ; CHECK:       ## %bb.0:
   8465 ; CHECK-NEXT:    vsqrtps %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x51,0xc0]
   8466 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8467   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> undef, i16 -1, i32 4)
   8468   ret <16 x float> %res
   8469 }
   8470 define <16 x float> @test_mask_sqrt_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
   8471 ; X86-LABEL: test_mask_sqrt_ps_512:
   8472 ; X86:       ## %bb.0:
   8473 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8474 ; X86-NEXT:    vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
   8475 ; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   8476 ; X86-NEXT:    retl ## encoding: [0xc3]
   8477 ;
   8478 ; X64-LABEL: test_mask_sqrt_ps_512:
   8479 ; X64:       ## %bb.0:
   8480 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8481 ; X64-NEXT:    vsqrtps %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x49,0x51,0xc8]
   8482 ; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   8483 ; X64-NEXT:    retq ## encoding: [0xc3]
   8484   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 4)
   8485   ret <16 x float> %res
   8486 }
   8487 define <16 x float> @test_maskz_sqrt_ps_512(<16 x float> %a0, i16 %mask) {
   8488 ; X86-LABEL: test_maskz_sqrt_ps_512:
   8489 ; X86:       ## %bb.0:
   8490 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8491 ; X86-NEXT:    vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
   8492 ; X86-NEXT:    retl ## encoding: [0xc3]
   8493 ;
   8494 ; X64-LABEL: test_maskz_sqrt_ps_512:
   8495 ; X64:       ## %bb.0:
   8496 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8497 ; X64-NEXT:    vsqrtps %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xc9,0x51,0xc0]
   8498 ; X64-NEXT:    retq ## encoding: [0xc3]
   8499   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 4)
   8500   ret <16 x float> %res
   8501 }
   8502 define <16 x float> @test_sqrt_round_ps_512(<16 x float> %a0) {
   8503 ; CHECK-LABEL: test_sqrt_round_ps_512:
   8504 ; CHECK:       ## %bb.0:
   8505 ; CHECK-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x78,0x51,0xc0]
   8506 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   8507   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 -1, i32 11)
   8508   ret <16 x float> %res
   8509 }
   8510 define <16 x float> @test_mask_sqrt_round_ps_512(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
   8511 ; X86-LABEL: test_mask_sqrt_round_ps_512:
   8512 ; X86:       ## %bb.0:
   8513 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8514 ; X86-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
   8515 ; X86-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   8516 ; X86-NEXT:    retl ## encoding: [0xc3]
   8517 ;
   8518 ; X64-LABEL: test_mask_sqrt_round_ps_512:
   8519 ; X64:       ## %bb.0:
   8520 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8521 ; X64-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7c,0x79,0x51,0xc8]
   8522 ; X64-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   8523 ; X64-NEXT:    retq ## encoding: [0xc3]
   8524   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> %passthru, i16 %mask, i32 11)
   8525   ret <16 x float> %res
   8526 }
   8527 define <16 x float> @test_maskz_sqrt_round_ps_512(<16 x float> %a0, i16 %mask) {
   8528 ; X86-LABEL: test_maskz_sqrt_round_ps_512:
   8529 ; X86:       ## %bb.0:
   8530 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8531 ; X86-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
   8532 ; X86-NEXT:    retl ## encoding: [0xc3]
   8533 ;
   8534 ; X64-LABEL: test_maskz_sqrt_round_ps_512:
   8535 ; X64:       ## %bb.0:
   8536 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8537 ; X64-NEXT:    vsqrtps {rz-sae}, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xf9,0x51,0xc0]
   8538 ; X64-NEXT:    retq ## encoding: [0xc3]
   8539   %res = call <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float> %a0, <16 x float> zeroinitializer, i16 %mask, i32 11)
   8540   ret <16 x float> %res
   8541 }
   8542 declare <16 x float> @llvm.x86.avx512.mask.sqrt.ps.512(<16 x float>, <16 x float>, i16, i32) nounwind readnone
   8543 
   8544 declare <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   8545 
   8546 define <16 x i32>@test_int_x86_avx512_mask_prolv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   8547 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_512:
   8548 ; X86:       ## %bb.0:
   8549 ; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
   8550 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8551 ; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
   8552 ; X86-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
   8553 ; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   8554 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   8555 ; X86-NEXT:    retl ## encoding: [0xc3]
   8556 ;
   8557 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_512:
   8558 ; X64:       ## %bb.0:
   8559 ; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x15,0xd9]
   8560 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8561 ; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x15,0xd1]
   8562 ; X64-NEXT:    vprolvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x15,0xc1]
   8563 ; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   8564 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   8565 ; X64-NEXT:    retq ## encoding: [0xc3]
   8566   %res = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   8567   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
   8568   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prolv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   8569   %res3 = add <16 x i32> %res, %res1
   8570   %res4 = add <16 x i32> %res3, %res2
   8571   ret <16 x i32> %res4
   8572 }
   8573 
   8574 declare <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   8575 
   8576 define <8 x i64>@test_int_x86_avx512_mask_prolv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   8577 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_512:
   8578 ; X86:       ## %bb.0:
   8579 ; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
   8580 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8581 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8582 ; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
   8583 ; X86-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
   8584 ; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   8585 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   8586 ; X86-NEXT:    retl ## encoding: [0xc3]
   8587 ;
   8588 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_512:
   8589 ; X64:       ## %bb.0:
   8590 ; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x15,0xd9]
   8591 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8592 ; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x15,0xd1]
   8593 ; X64-NEXT:    vprolvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x15,0xc1]
   8594 ; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   8595 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   8596 ; X64-NEXT:    retq ## encoding: [0xc3]
   8597   %res = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   8598   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
   8599   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prolv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   8600   %res3 = add <8 x i64> %res, %res1
   8601   %res4 = add <8 x i64> %res3, %res2
   8602   ret <8 x i64> %res4
   8603 }
   8604 
   8605 declare <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32>, <16 x i32>, <16 x i32>, i16)
   8606 
   8607 define <16 x i32>@test_int_x86_avx512_mask_prorv_d_512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3) {
   8608 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_512:
   8609 ; X86:       ## %bb.0:
   8610 ; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
   8611 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   8612 ; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
   8613 ; X86-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
   8614 ; X86-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   8615 ; X86-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   8616 ; X86-NEXT:    retl ## encoding: [0xc3]
   8617 ;
   8618 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_512:
   8619 ; X64:       ## %bb.0:
   8620 ; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0x7d,0x48,0x14,0xd9]
   8621 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8622 ; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x49,0x14,0xd1]
   8623 ; X64-NEXT:    vprorvd %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xc9,0x14,0xc1]
   8624 ; X64-NEXT:    vpaddd %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc3]
   8625 ; X64-NEXT:    vpaddd %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc0]
   8626 ; X64-NEXT:    retq ## encoding: [0xc3]
   8627   %res = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 %x3)
   8628   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> zeroinitializer, i16 %x3)
   8629   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prorv.d.512(<16 x i32> %x0, <16 x i32> %x1, <16 x i32> %x2, i16 -1)
   8630   %res3 = add <16 x i32> %res, %res1
   8631   %res4 = add <16 x i32> %res3, %res2
   8632   ret <16 x i32> %res4
   8633 }
   8634 
   8635 declare <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64>, <8 x i64>, <8 x i64>, i8)
   8636 
   8637 define <8 x i64>@test_int_x86_avx512_mask_prorv_q_512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3) {
   8638 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_512:
   8639 ; X86:       ## %bb.0:
   8640 ; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
   8641 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8642 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8643 ; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
   8644 ; X86-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
   8645 ; X86-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   8646 ; X86-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   8647 ; X86-NEXT:    retl ## encoding: [0xc3]
   8648 ;
   8649 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_512:
   8650 ; X64:       ## %bb.0:
   8651 ; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm3 ## encoding: [0x62,0xf2,0xfd,0x48,0x14,0xd9]
   8652 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8653 ; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x49,0x14,0xd1]
   8654 ; X64-NEXT:    vprorvq %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xc9,0x14,0xc1]
   8655 ; X64-NEXT:    vpaddq %zmm3, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc3]
   8656 ; X64-NEXT:    vpaddq %zmm0, %zmm2, %zmm0 ## encoding: [0x62,0xf1,0xed,0x48,0xd4,0xc0]
   8657 ; X64-NEXT:    retq ## encoding: [0xc3]
   8658   %res = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 %x3)
   8659   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> zeroinitializer, i8 %x3)
   8660   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prorv.q.512(<8 x i64> %x0, <8 x i64> %x1, <8 x i64> %x2, i8 -1)
   8661   %res3 = add <8 x i64> %res, %res1
   8662   %res4 = add <8 x i64> %res3, %res2
   8663   ret <8 x i64> %res4
   8664 }
   8665 
   8666 declare <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32>, i32, <16 x i32>, i16)
   8667 
   8668 define <16 x i32>@test_int_x86_avx512_mask_prol_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
   8669 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_512:
   8670 ; X86:       ## %bb.0:
   8671 ; X86-NEXT:    vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
   8672 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8673 ; X86-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
   8674 ; X86-NEXT:    vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
   8675 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   8676 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   8677 ; X86-NEXT:    retl ## encoding: [0xc3]
   8678 ;
   8679 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_512:
   8680 ; X64:       ## %bb.0:
   8681 ; X64-NEXT:    vprold $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc8,0x03]
   8682 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8683 ; X64-NEXT:    vprold $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc8,0x03]
   8684 ; X64-NEXT:    vprold $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc8,0x03]
   8685 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   8686 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   8687 ; X64-NEXT:    retq ## encoding: [0xc3]
   8688   %res = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
   8689   %res1 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
   8690   %res2 = call <16 x i32> @llvm.x86.avx512.mask.prol.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
   8691   %res3 = add <16 x i32> %res, %res1
   8692   %res4 = add <16 x i32> %res3, %res2
   8693   ret <16 x i32> %res4
   8694 }
   8695 
   8696 declare <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64>, i32, <8 x i64>, i8)
   8697 
   8698 define <8 x i64>@test_int_x86_avx512_mask_prol_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
   8699 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_512:
   8700 ; X86:       ## %bb.0:
   8701 ; X86-NEXT:    vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
   8702 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   8703 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8704 ; X86-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
   8705 ; X86-NEXT:    vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
   8706 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   8707 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   8708 ; X86-NEXT:    retl ## encoding: [0xc3]
   8709 ;
   8710 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_512:
   8711 ; X64:       ## %bb.0:
   8712 ; X64-NEXT:    vprolq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc8,0x03]
   8713 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8714 ; X64-NEXT:    vprolq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc8,0x03]
   8715 ; X64-NEXT:    vprolq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc8,0x03]
   8716 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   8717 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   8718 ; X64-NEXT:    retq ## encoding: [0xc3]
   8719   %res = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
   8720   %res1 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
   8721   %res2 = call <8 x i64> @llvm.x86.avx512.mask.prol.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
   8722   %res3 = add <8 x i64> %res, %res1
   8723   %res4 = add <8 x i64> %res3, %res2
   8724   ret <8 x i64> %res4
   8725 }
   8726 
   8727 declare <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32>, i32, <16 x i32>, i16)
   8728 
   8729 define <16 x i32>@test_int_x86_avx512_mask_pror_d_512(<16 x i32> %x0, i32 %x1, <16 x i32> %x2, i16 %x3) {
   8730 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_512:
   8731 ; X86:       ## %bb.0:
   8732 ; X86-NEXT:    vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
   8733 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 ## encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   8734 ; X86-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
   8735 ; X86-NEXT:    vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
   8736 ; X86-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   8737 ; X86-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   8738 ; X86-NEXT:    retl ## encoding: [0xc3]
   8739 ;
   8740 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_512:
   8741 ; X64:       ## %bb.0:
   8742 ; X64-NEXT:    vprord $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0x6d,0x48,0x72,0xc0,0x03]
   8743 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8744 ; X64-NEXT:    vprord $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x49,0x72,0xc0,0x03]
   8745 ; X64-NEXT:    vprord $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0x72,0xc0,0x03]
   8746 ; X64-NEXT:    vpaddd %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfe,0xc2]
   8747 ; X64-NEXT:    vpaddd %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x75,0x48,0xfe,0xc0]
   8748 ; X64-NEXT:    retq ## encoding: [0xc3]
   8749   %res = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 %x3)
   8750   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> zeroinitializer, i16 %x3)
   8751   %res2 = call <16 x i32> @llvm.x86.avx512.mask.pror.d.512(<16 x i32> %x0, i32 3, <16 x i32> %x2, i16 -1)
   8752   %res3 = add <16 x i32> %res, %res1
   8753   %res4 = add <16 x i32> %res3, %res2
   8754   ret <16 x i32> %res4
   8755 }
   8756 
   8757 declare <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64>, i32, <8 x i64>, i8)
   8758 
   8759 define <8 x i64>@test_int_x86_avx512_mask_pror_q_512(<8 x i64> %x0, i32 %x1, <8 x i64> %x2, i8 %x3) {
   8760 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_512:
   8761 ; X86:       ## %bb.0:
   8762 ; X86-NEXT:    vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
   8763 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax ## encoding: [0x0f,0xb6,0x44,0x24,0x08]
   8764 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8765 ; X86-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
   8766 ; X86-NEXT:    vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
   8767 ; X86-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   8768 ; X86-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   8769 ; X86-NEXT:    retl ## encoding: [0xc3]
   8770 ;
   8771 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_512:
   8772 ; X64:       ## %bb.0:
   8773 ; X64-NEXT:    vprorq $3, %zmm0, %zmm2 ## encoding: [0x62,0xf1,0xed,0x48,0x72,0xc0,0x03]
   8774 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   8775 ; X64-NEXT:    vprorq $3, %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0xf5,0x49,0x72,0xc0,0x03]
   8776 ; X64-NEXT:    vprorq $3, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xc9,0x72,0xc0,0x03]
   8777 ; X64-NEXT:    vpaddq %zmm2, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0xfd,0x48,0xd4,0xc2]
   8778 ; X64-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 ## encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   8779 ; X64-NEXT:    retq ## encoding: [0xc3]
   8780   %res = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 %x3)
   8781   %res1 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> zeroinitializer, i8 %x3)
   8782   %res2 = call <8 x i64> @llvm.x86.avx512.mask.pror.q.512(<8 x i64> %x0, i32 3, <8 x i64> %x2, i8 -1)
   8783   %res3 = add <8 x i64> %res, %res1
   8784   %res4 = add <8 x i64> %res3, %res2
   8785   ret <8 x i64> %res4
   8786 }
   8787 
   8788 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
   8789 
   8790 define <2 x double>@test_int_x86_avx512_mask_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
   8791 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
   8792 ; X86:       ## %bb.0:
   8793 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8794 ; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
   8795 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda]
   8796 ; X86-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8797 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8798 ; X86-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
   8799 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2]
   8800 ; X86-NEXT:    ## xmm4 = (xmm1 * xmm4) + xmm2
   8801 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   8802 ; X86-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
   8803 ; X86-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
   8804 ; X86-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2]
   8805 ; X86-NEXT:    vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0]
   8806 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8807 ; X86-NEXT:    retl ## encoding: [0xc3]
   8808 ;
   8809 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_sd:
   8810 ; X64:       ## %bb.0:
   8811 ; X64-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
   8812 ; X64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa9,0xda]
   8813 ; X64-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8814 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8815 ; X64-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
   8816 ; X64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa9,0xe2]
   8817 ; X64-NEXT:    ## xmm4 = (xmm1 * xmm4) + xmm2
   8818 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   8819 ; X64-NEXT:    vmovapd %xmm0, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe0]
   8820 ; X64-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0xf5,0x78,0xa9,0xe2]
   8821 ; X64-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x79,0xa9,0xc2]
   8822 ; X64-NEXT:    vaddpd %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc0]
   8823 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8824 ; X64-NEXT:    retq ## encoding: [0xc3]
   8825   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
   8826   %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
   8827   %res2 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
   8828   %res3 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
   8829   %res4 = fadd <2 x double> %res, %res1
   8830   %res5 = fadd <2 x double> %res2, %res3
   8831   %res6 = fadd <2 x double> %res4, %res5
   8832   ret <2 x double> %res6
   8833 }
   8834 
   8835 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   8836 
   8837 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
   8838 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
   8839 ; X86:       ## %bb.0:
   8840 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8841 ; X86-NEXT:    vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
   8842 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda]
   8843 ; X86-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8844 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8845 ; X86-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
   8846 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2]
   8847 ; X86-NEXT:    ## xmm4 = (xmm1 * xmm4) + xmm2
   8848 ; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   8849 ; X86-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
   8850 ; X86-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
   8851 ; X86-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2]
   8852 ; X86-NEXT:    vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0]
   8853 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   8854 ; X86-NEXT:    retl ## encoding: [0xc3]
   8855 ;
   8856 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss:
   8857 ; X64:       ## %bb.0:
   8858 ; X64-NEXT:    vmovaps %xmm0, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xd8]
   8859 ; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa9,0xda]
   8860 ; X64-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8861 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8862 ; X64-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
   8863 ; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa9,0xe2]
   8864 ; X64-NEXT:    ## xmm4 = (xmm1 * xmm4) + xmm2
   8865 ; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   8866 ; X64-NEXT:    vmovaps %xmm0, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe0]
   8867 ; X64-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm4 ## encoding: [0x62,0xf2,0x75,0x78,0xa9,0xe2]
   8868 ; X64-NEXT:    vfmadd213ss {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x79,0xa9,0xc2]
   8869 ; X64-NEXT:    vaddps %xmm0, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc0]
   8870 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   8871 ; X64-NEXT:    retq ## encoding: [0xc3]
   8872   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
   8873   %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
   8874   %res2 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
   8875   %res3 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
   8876   %res4 = fadd <4 x float> %res, %res1
   8877   %res5 = fadd <4 x float> %res2, %res3
   8878   %res6 = fadd <4 x float> %res4, %res5
   8879   ret <4 x float> %res6
   8880 }
   8881 
   8882 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
   8883 
   8884 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
   8885 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
   8886 ; X86:       ## %bb.0:
   8887 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8888 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8889 ; X86-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
   8890 ; X86-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
   8891 ; X86-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8892 ; X86-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
   8893 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8894 ; X86-NEXT:    retl ## encoding: [0xc3]
   8895 ;
   8896 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_sd:
   8897 ; X64:       ## %bb.0:
   8898 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8899 ; X64-NEXT:    vmovapd %xmm0, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xd8]
   8900 ; X64-NEXT:    vfmadd213sd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa9,0xda]
   8901 ; X64-NEXT:    ## xmm3 = (xmm1 * xmm3) + xmm2
   8902 ; X64-NEXT:    vfmadd213sd {rz-sae}, %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xf9,0xa9,0xc2]
   8903 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8904 ; X64-NEXT:    retq ## encoding: [0xc3]
   8905   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
   8906   %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
   8907   %res2 = fadd <2 x double> %res, %res1
   8908   ret <2 x double> %res2
   8909 }
   8910 
   8911 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   8912 
   8913 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
   8914 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
   8915 ; X86:       ## %bb.0:
   8916 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8917 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8918 ; X86-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
   8919 ; X86-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
   8920 ; X86-NEXT:    retl ## encoding: [0xc3]
   8921 ;
   8922 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ss:
   8923 ; X64:       ## %bb.0:
   8924 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8925 ; X64-NEXT:    vfmadd213ss %xmm2, %xmm1, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa9,0xc2]
   8926 ; X64-NEXT:    ## xmm0 = (xmm1 * xmm0) + xmm2
   8927 ; X64-NEXT:    retq ## encoding: [0xc3]
   8928   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
   8929   %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
   8930   %res2 = fadd <4 x float> %res, %res1
   8931   ret <4 x float> %res
   8932 }
   8933 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
   8934 
   8935 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
   8936 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
   8937 ; X86:       ## %bb.0:
   8938 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8939 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   8940 ; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9]
   8941 ; X86-NEXT:    ## xmm3 = (xmm0 * xmm1) + xmm3
   8942 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8943 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   8944 ; X86-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1]
   8945 ; X86-NEXT:    ## xmm4 = (xmm0 * xmm1) + xmm4
   8946 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   8947 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   8948 ; X86-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
   8949 ; X86-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1]
   8950 ; X86-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   8951 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8952 ; X86-NEXT:    retl ## encoding: [0xc3]
   8953 ;
   8954 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_sd:
   8955 ; X64:       ## %bb.0:
   8956 ; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   8957 ; X64-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0xd9]
   8958 ; X64-NEXT:    ## xmm3 = (xmm0 * xmm1) + xmm3
   8959 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   8960 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   8961 ; X64-NEXT:    vfmadd231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb9,0xe1]
   8962 ; X64-NEXT:    ## xmm4 = (xmm0 * xmm1) + xmm4
   8963 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   8964 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   8965 ; X64-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xb9,0xe1]
   8966 ; X64-NEXT:    vfmadd231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xb9,0xd1]
   8967 ; X64-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   8968 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   8969 ; X64-NEXT:    retq ## encoding: [0xc3]
   8970   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
   8971   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
   8972   %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
   8973   %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
   8974   %res4 = fadd <2 x double> %res, %res1
   8975   %res5 = fadd <2 x double> %res2, %res3
   8976   %res6 = fadd <2 x double> %res4, %res5
   8977   ret <2 x double> %res6
   8978 }
   8979 
   8980 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   8981 
   8982 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
   8983 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
   8984 ; X86:       ## %bb.0:
   8985 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   8986 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   8987 ; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9]
   8988 ; X86-NEXT:    ## xmm3 = (xmm0 * xmm1) + xmm3
   8989 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   8990 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   8991 ; X86-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1]
   8992 ; X86-NEXT:    ## xmm4 = (xmm0 * xmm1) + xmm4
   8993 ; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   8994 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   8995 ; X86-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
   8996 ; X86-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1]
   8997 ; X86-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   8998 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   8999 ; X86-NEXT:    retl ## encoding: [0xc3]
   9000 ;
   9001 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss:
   9002 ; X64:       ## %bb.0:
   9003 ; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   9004 ; X64-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0xd9]
   9005 ; X64-NEXT:    ## xmm3 = (xmm0 * xmm1) + xmm3
   9006 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   9007 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9008 ; X64-NEXT:    vfmadd231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0xe1]
   9009 ; X64-NEXT:    ## xmm4 = (xmm0 * xmm1) + xmm4
   9010 ; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   9011 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9012 ; X64-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xb9,0xe1]
   9013 ; X64-NEXT:    vfmadd231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xb9,0xd1]
   9014 ; X64-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   9015 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   9016 ; X64-NEXT:    retq ## encoding: [0xc3]
   9017   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
   9018   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
   9019   %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
   9020   %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
   9021   %res4 = fadd <4 x float> %res, %res1
   9022   %res5 = fadd <4 x float> %res2, %res3
   9023   %res6 = fadd <4 x float> %res4, %res5
   9024   ret <4 x float> %res6
   9025 }
   9026 
   9027 define void @fmadd_ss_mask_memfold(float* %a, float* %b, i8 %c) {
   9028 ; X86-LABEL: fmadd_ss_mask_memfold:
   9029 ; X86:       ## %bb.0:
   9030 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
   9031 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
   9032 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
   9033 ; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
   9034 ; X86-NEXT:    ## xmm0 = mem[0],zero,zero,zero
   9035 ; X86-NEXT:    vmovss (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x09]
   9036 ; X86-NEXT:    ## xmm1 = mem[0],zero,zero,zero
   9037 ; X86-NEXT:    vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
   9038 ; X86-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
   9039 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9040 ; X86-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
   9041 ; X86-NEXT:    vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
   9042 ; X86-NEXT:    retl ## encoding: [0xc3]
   9043 ;
   9044 ; X64-LABEL: fmadd_ss_mask_memfold:
   9045 ; X64:       ## %bb.0:
   9046 ; X64-NEXT:    vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
   9047 ; X64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
   9048 ; X64-NEXT:    vmovss (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x0e]
   9049 ; X64-NEXT:    ## xmm1 = mem[0],zero,zero,zero
   9050 ; X64-NEXT:    vfmadd213ss %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xa9,0xc8]
   9051 ; X64-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
   9052 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
   9053 ; X64-NEXT:    vmovss %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x10,0xc1]
   9054 ; X64-NEXT:    vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
   9055 ; X64-NEXT:    retq ## encoding: [0xc3]
   9056   %a.val = load float, float* %a
   9057   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   9058   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
   9059   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
   9060   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
   9061 
   9062   %b.val = load float, float* %b
   9063   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
   9064   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
   9065   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
   9066   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
   9067 
   9068   %vr = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
   9069 
   9070   %sr = extractelement <4 x float> %vr, i32 0
   9071   store float %sr, float* %a
   9072   ret void
   9073 }
   9074 
   9075 define void @fmadd_ss_maskz_memfold(float* %a, float* %b, i8 %c) {
   9076 ; X86-LABEL: fmadd_ss_maskz_memfold:
   9077 ; X86:       ## %bb.0:
   9078 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
   9079 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
   9080 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
   9081 ; X86-NEXT:    vmovss (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x02]
   9082 ; X86-NEXT:    ## xmm0 = mem[0],zero,zero,zero
   9083 ; X86-NEXT:    vfmadd231ss (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x01]
   9084 ; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
   9085 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9086 ; X86-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
   9087 ; X86-NEXT:    vmovss %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x02]
   9088 ; X86-NEXT:    retl ## encoding: [0xc3]
   9089 ;
   9090 ; X64-LABEL: fmadd_ss_maskz_memfold:
   9091 ; X64:       ## %bb.0:
   9092 ; X64-NEXT:    vmovss (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x10,0x07]
   9093 ; X64-NEXT:    ## xmm0 = mem[0],zero,zero,zero
   9094 ; X64-NEXT:    vfmadd231ss (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xb9,0x06]
   9095 ; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
   9096 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
   9097 ; X64-NEXT:    vmovss %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x10,0xc0]
   9098 ; X64-NEXT:    vmovss %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfa,0x11,0x07]
   9099 ; X64-NEXT:    retq ## encoding: [0xc3]
   9100   %a.val = load float, float* %a
   9101   %av0 = insertelement <4 x float> undef, float %a.val, i32 0
   9102   %av1 = insertelement <4 x float> %av0, float 0.000000e+00, i32 1
   9103   %av2 = insertelement <4 x float> %av1, float 0.000000e+00, i32 2
   9104   %av  = insertelement <4 x float> %av2, float 0.000000e+00, i32 3
   9105 
   9106   %b.val = load float, float* %b
   9107   %bv0 = insertelement <4 x float> undef, float %b.val, i32 0
   9108   %bv1 = insertelement <4 x float> %bv0, float 0.000000e+00, i32 1
   9109   %bv2 = insertelement <4 x float> %bv1, float 0.000000e+00, i32 2
   9110   %bv  = insertelement <4 x float> %bv2, float 0.000000e+00, i32 3
   9111 
   9112   %vr = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %av, <4 x float> %bv, <4 x float> %av, i8 %c, i32 4)
   9113 
   9114   %sr = extractelement <4 x float> %vr, i32 0
   9115   store float %sr, float* %a
   9116   ret void
   9117 }
   9118 
   9119 define void @fmadd_sd_mask_memfold(double* %a, double* %b, i8 %c) {
   9120 ; X86-LABEL: fmadd_sd_mask_memfold:
   9121 ; X86:       ## %bb.0:
   9122 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
   9123 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
   9124 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
   9125 ; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
   9126 ; X86-NEXT:    ## xmm0 = mem[0],zero
   9127 ; X86-NEXT:    vmovsd (%ecx), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x09]
   9128 ; X86-NEXT:    ## xmm1 = mem[0],zero
   9129 ; X86-NEXT:    vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
   9130 ; X86-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
   9131 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9132 ; X86-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
   9133 ; X86-NEXT:    vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
   9134 ; X86-NEXT:    retl ## encoding: [0xc3]
   9135 ;
   9136 ; X64-LABEL: fmadd_sd_mask_memfold:
   9137 ; X64:       ## %bb.0:
   9138 ; X64-NEXT:    vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
   9139 ; X64-NEXT:    ## xmm0 = mem[0],zero
   9140 ; X64-NEXT:    vmovsd (%rsi), %xmm1 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x0e]
   9141 ; X64-NEXT:    ## xmm1 = mem[0],zero
   9142 ; X64-NEXT:    vfmadd213sd %xmm0, %xmm0, %xmm1 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xa9,0xc8]
   9143 ; X64-NEXT:    ## xmm1 = (xmm0 * xmm1) + xmm0
   9144 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
   9145 ; X64-NEXT:    vmovsd %xmm1, %xmm0, %xmm0 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x10,0xc1]
   9146 ; X64-NEXT:    vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
   9147 ; X64-NEXT:    retq ## encoding: [0xc3]
   9148   %a.val = load double, double* %a
   9149   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   9150   %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
   9151 
   9152   %b.val = load double, double* %b
   9153   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
   9154   %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
   9155 
   9156   %vr = call <2 x double> @llvm.x86.avx512.mask.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
   9157 
   9158   %sr = extractelement <2 x double> %vr, i32 0
   9159   store double %sr, double* %a
   9160   ret void
   9161 }
   9162 
   9163 define void @fmadd_sd_maskz_memfold(double* %a, double* %b, i8 %c) {
   9164 ; X86-LABEL: fmadd_sd_maskz_memfold:
   9165 ; X86:       ## %bb.0:
   9166 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x0c]
   9167 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx ## encoding: [0x8b,0x4c,0x24,0x08]
   9168 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx ## encoding: [0x8b,0x54,0x24,0x04]
   9169 ; X86-NEXT:    vmovsd (%edx), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x02]
   9170 ; X86-NEXT:    ## xmm0 = mem[0],zero
   9171 ; X86-NEXT:    vfmadd231sd (%ecx), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x01]
   9172 ; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
   9173 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9174 ; X86-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
   9175 ; X86-NEXT:    vmovsd %xmm0, (%edx) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x02]
   9176 ; X86-NEXT:    retl ## encoding: [0xc3]
   9177 ;
   9178 ; X64-LABEL: fmadd_sd_maskz_memfold:
   9179 ; X64:       ## %bb.0:
   9180 ; X64-NEXT:    vmovsd (%rdi), %xmm0 ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x07]
   9181 ; X64-NEXT:    ## xmm0 = mem[0],zero
   9182 ; X64-NEXT:    vfmadd231sd (%rsi), %xmm0, %xmm0 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xb9,0x06]
   9183 ; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm0
   9184 ; X64-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
   9185 ; X64-NEXT:    vmovsd %xmm0, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x10,0xc0]
   9186 ; X64-NEXT:    vmovsd %xmm0, (%rdi) ## EVEX TO VEX Compression encoding: [0xc5,0xfb,0x11,0x07]
   9187 ; X64-NEXT:    retq ## encoding: [0xc3]
   9188   %a.val = load double, double* %a
   9189   %av0 = insertelement <2 x double> undef, double %a.val, i32 0
   9190   %av = insertelement <2 x double> %av0, double 0.000000e+00, i32 1
   9191 
   9192   %b.val = load double, double* %b
   9193   %bv0 = insertelement <2 x double> undef, double %b.val, i32 0
   9194   %bv = insertelement <2 x double> %bv0, double 0.000000e+00, i32 1
   9195 
   9196   %vr = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.sd(<2 x double> %av, <2 x double> %bv, <2 x double> %av, i8 %c, i32 4)
   9197 
   9198   %sr = extractelement <2 x double> %vr, i32 0
   9199   store double %sr, double* %a
   9200   ret void
   9201 }
   9202 
   9203 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
   9204 
   9205 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
   9206 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
   9207 ; X86:       ## %bb.0:
   9208 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   9209 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   9210 ; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9]
   9211 ; X86-NEXT:    ## xmm3 = (xmm0 * xmm1) - xmm3
   9212 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9213 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9214 ; X86-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1]
   9215 ; X86-NEXT:    ## xmm4 = (xmm0 * xmm1) - xmm4
   9216 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   9217 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9218 ; X86-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
   9219 ; X86-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1]
   9220 ; X86-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   9221 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   9222 ; X86-NEXT:    retl ## encoding: [0xc3]
   9223 ;
   9224 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_sd:
   9225 ; X64:       ## %bb.0:
   9226 ; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   9227 ; X64-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbb,0xd9]
   9228 ; X64-NEXT:    ## xmm3 = (xmm0 * xmm1) - xmm3
   9229 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   9230 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9231 ; X64-NEXT:    vfmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbb,0xe1]
   9232 ; X64-NEXT:    ## xmm4 = (xmm0 * xmm1) - xmm4
   9233 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   9234 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9235 ; X64-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbb,0xe1]
   9236 ; X64-NEXT:    vfmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbb,0xd1]
   9237 ; X64-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   9238 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   9239 ; X64-NEXT:    retq ## encoding: [0xc3]
   9240   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
   9241   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
   9242   %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
   9243   %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
   9244   %res4 = fadd <2 x double> %res, %res1
   9245   %res5 = fadd <2 x double> %res2, %res3
   9246   %res6 = fadd <2 x double> %res4, %res5
   9247   ret <2 x double> %res6
   9248 }
   9249 
   9250 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   9251 
   9252 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
   9253 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
   9254 ; X86:       ## %bb.0:
   9255 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   9256 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   9257 ; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9]
   9258 ; X86-NEXT:    ## xmm3 = (xmm0 * xmm1) - xmm3
   9259 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9260 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9261 ; X86-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1]
   9262 ; X86-NEXT:    ## xmm4 = (xmm0 * xmm1) - xmm4
   9263 ; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   9264 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9265 ; X86-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
   9266 ; X86-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1]
   9267 ; X86-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   9268 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   9269 ; X86-NEXT:    retl ## encoding: [0xc3]
   9270 ;
   9271 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ss:
   9272 ; X64:       ## %bb.0:
   9273 ; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   9274 ; X64-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbb,0xd9]
   9275 ; X64-NEXT:    ## xmm3 = (xmm0 * xmm1) - xmm3
   9276 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   9277 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9278 ; X64-NEXT:    vfmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbb,0xe1]
   9279 ; X64-NEXT:    ## xmm4 = (xmm0 * xmm1) - xmm4
   9280 ; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   9281 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9282 ; X64-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbb,0xe1]
   9283 ; X64-NEXT:    vfmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbb,0xd1]
   9284 ; X64-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   9285 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   9286 ; X64-NEXT:    retq ## encoding: [0xc3]
   9287   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
   9288   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
   9289   %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
   9290   %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
   9291   %res4 = fadd <4 x float> %res, %res1
   9292   %res5 = fadd <4 x float> %res2, %res3
   9293   %res6 = fadd <4 x float> %res4, %res5
   9294   ret <4 x float> %res6
   9295 }
   9296 
   9297 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double>, <2 x double>, <2 x double>, i8, i32)
   9298 
   9299 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3,i32 %x4 ){
   9300 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
   9301 ; X86:       ## %bb.0:
   9302 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   9303 ; X86-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   9304 ; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9]
   9305 ; X86-NEXT:    ## xmm3 = -(xmm0 * xmm1) - xmm3
   9306 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9307 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9308 ; X86-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1]
   9309 ; X86-NEXT:    ## xmm4 = -(xmm0 * xmm1) - xmm4
   9310 ; X86-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   9311 ; X86-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9312 ; X86-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
   9313 ; X86-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1]
   9314 ; X86-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   9315 ; X86-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   9316 ; X86-NEXT:    retl ## encoding: [0xc3]
   9317 ;
   9318 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_sd:
   9319 ; X64:       ## %bb.0:
   9320 ; X64-NEXT:    vmovapd %xmm2, %xmm3 ## encoding: [0xc5,0xf9,0x28,0xda]
   9321 ; X64-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0xbf,0xd9]
   9322 ; X64-NEXT:    ## xmm3 = -(xmm0 * xmm1) - xmm3
   9323 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   9324 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9325 ; X64-NEXT:    vfnmsub231sd %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbf,0xe1]
   9326 ; X64-NEXT:    ## xmm4 = -(xmm0 * xmm1) - xmm4
   9327 ; X64-NEXT:    vaddpd %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe1,0x58,0xdc]
   9328 ; X64-NEXT:    vmovapd %xmm2, %xmm4 ## encoding: [0xc5,0xf9,0x28,0xe2]
   9329 ; X64-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0xfd,0x78,0xbf,0xe1]
   9330 ; X64-NEXT:    vfnmsub231sd {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x79,0xbf,0xd1]
   9331 ; X64-NEXT:    vaddpd %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd9,0x58,0xc2]
   9332 ; X64-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe1,0x58,0xc0]
   9333 ; X64-NEXT:    retq ## encoding: [0xc3]
   9334   %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 4)
   9335   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 4)
   9336   %res2 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1, i32 3)
   9337   %res3 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.sd(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3, i32 3)
   9338   %res4 = fadd <2 x double> %res, %res1
   9339   %res5 = fadd <2 x double> %res2, %res3
   9340   %res6 = fadd <2 x double> %res4, %res5
   9341   ret <2 x double> %res6
   9342 }
   9343 
   9344 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float>, <4 x float>, <4 x float>, i8, i32)
   9345 
   9346 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3,i32 %x4 ){
   9347 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
   9348 ; X86:       ## %bb.0:
   9349 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al ## encoding: [0x8a,0x44,0x24,0x04]
   9350 ; X86-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   9351 ; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9]
   9352 ; X86-NEXT:    ## xmm3 = -(xmm0 * xmm1) - xmm3
   9353 ; X86-NEXT:    kmovw %eax, %k1 ## encoding: [0xc5,0xf8,0x92,0xc8]
   9354 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9355 ; X86-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1]
   9356 ; X86-NEXT:    ## xmm4 = -(xmm0 * xmm1) - xmm4
   9357 ; X86-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   9358 ; X86-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9359 ; X86-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
   9360 ; X86-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1]
   9361 ; X86-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   9362 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   9363 ; X86-NEXT:    retl ## encoding: [0xc3]
   9364 ;
   9365 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ss:
   9366 ; X64:       ## %bb.0:
   9367 ; X64-NEXT:    vmovaps %xmm2, %xmm3 ## encoding: [0xc5,0xf8,0x28,0xda]
   9368 ; X64-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm3 ## EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xbf,0xd9]
   9369 ; X64-NEXT:    ## xmm3 = -(xmm0 * xmm1) - xmm3
   9370 ; X64-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   9371 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9372 ; X64-NEXT:    vfnmsub231ss %xmm1, %xmm0, %xmm4 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbf,0xe1]
   9373 ; X64-NEXT:    ## xmm4 = -(xmm0 * xmm1) - xmm4
   9374 ; X64-NEXT:    vaddps %xmm4, %xmm3, %xmm3 ## encoding: [0xc5,0xe0,0x58,0xdc]
   9375 ; X64-NEXT:    vmovaps %xmm2, %xmm4 ## encoding: [0xc5,0xf8,0x28,0xe2]
   9376 ; X64-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm4 ## encoding: [0x62,0xf2,0x7d,0x78,0xbf,0xe1]
   9377 ; X64-NEXT:    vfnmsub231ss {rz-sae}, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x79,0xbf,0xd1]
   9378 ; X64-NEXT:    vaddps %xmm2, %xmm4, %xmm0 ## encoding: [0xc5,0xd8,0x58,0xc2]
   9379 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0xc5,0xe0,0x58,0xc0]
   9380 ; X64-NEXT:    retq ## encoding: [0xc3]
   9381   %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 4)
   9382   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 4)
   9383   %res2 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1, i32 3)
   9384   %res3 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3, i32 3)
   9385   %res4 = fadd <4 x float> %res, %res1
   9386   %res5 = fadd <4 x float> %res2, %res3
   9387   %res6 = fadd <4 x float> %res4, %res5
   9388   ret <4 x float> %res6
   9389 }
   9390 
   9391 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1, float *%ptr_b ,i8 %x3,i32 %x4) {
   9392 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
   9393 ; X86:       ## %bb.0:
   9394 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   9395 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
   9396 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   9397 ; X86-NEXT:    vfmadd231ss (%eax), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x08]
   9398 ; X86-NEXT:    ## xmm1 = (xmm0 * mem) + xmm1
   9399 ; X86-NEXT:    vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
   9400 ; X86-NEXT:    retl ## encoding: [0xc3]
   9401 ;
   9402 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ss_rm:
   9403 ; X64:       ## %bb.0:
   9404 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   9405 ; X64-NEXT:    vfmadd231ss (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb9,0x0f]
   9406 ; X64-NEXT:    ## xmm1 = (xmm0 * mem) + xmm1
   9407 ; X64-NEXT:    vmovaps %xmm1, %xmm0 ## encoding: [0xc5,0xf8,0x28,0xc1]
   9408 ; X64-NEXT:    retq ## encoding: [0xc3]
   9409   %q = load float, float* %ptr_b
   9410   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   9411   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ss(<4 x float> %x0, <4 x float> %vecinit.i, <4 x float> %x1, i8 %x3, i32 4)
   9412   ret < 4 x float> %res
   9413 }
   9414 
   9415 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
   9416 ; X86-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
   9417 ; X86:       ## %bb.0:
   9418 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax ## encoding: [0x8b,0x44,0x24,0x04]
   9419 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl ## encoding: [0x8a,0x4c,0x24,0x08]
   9420 ; X86-NEXT:    kmovw %ecx, %k1 ## encoding: [0xc5,0xf8,0x92,0xc9]
   9421 ; X86-NEXT:    vfmadd132ss (%eax), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x00]
   9422 ; X86-NEXT:    ## xmm0 = (xmm0 * mem) + xmm1
   9423 ; X86-NEXT:    retl ## encoding: [0xc3]
   9424 ;
   9425 ; X64-LABEL: test_int_x86_avx512_mask_vfmadd_ss_rm:
   9426 ; X64:       ## %bb.0:
   9427 ; X64-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   9428 ; X64-NEXT:    vfmadd132ss (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0x99,0x07]
   9429 ; X64-NEXT:    ## xmm0 = (xmm0 * mem) + xmm1
   9430 ; X64-NEXT:    retq ## encoding: [0xc3]
   9431   %q = load float, float* %ptr_b
   9432   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   9433   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ss(<4 x float> %x0,<4 x float> %vecinit.i, <4 x float> %x1,  i8 %x3, i32 4)
   9434   ret < 4 x float> %res
   9435 }
   9436 
   9437 
   9438 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ss_rm(<4 x float> %x0, <4 x float> %x1,float *%ptr_b ,i8 %x3,i32 %x4) {
   9439 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ss_rm:
   9440 ; CHECK:       ## %bb.0:
   9441 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1 ## encoding: [0xc5,0xf0,0x57,0xc9]
   9442 ; CHECK-NEXT:    vblendps $1, %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x0c,0xc1,0x01]
   9443 ; CHECK-NEXT:    ## xmm0 = xmm1[0],xmm0[1,2,3]
   9444 ; CHECK-NEXT:    ret{{[l|q]}} ## encoding: [0xc3]
   9445   %q = load float, float* %ptr_b
   9446   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   9447   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ss(<4 x float> %x0, <4 x float> %x1, <4 x float> %vecinit.i, i8 0, i32 4)
   9448   ret < 4 x float> %res
   9449 }
   9450