Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare i32 @llvm.x86.avx512.kunpck.wd(i32, i32)
      6 
      7 define i32@test_int_x86_avx512_kunpck_wd(i32 %x0, i32 %x1) {
      8 ; X86-LABEL: test_int_x86_avx512_kunpck_wd:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
     11 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
     12 ; X86-NEXT:    kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
     13 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
     14 ; X86-NEXT:    retl # encoding: [0xc3]
     15 ;
     16 ; X64-LABEL: test_int_x86_avx512_kunpck_wd:
     17 ; X64:       # %bb.0:
     18 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
     19 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     20 ; X64-NEXT:    kunpckwd %k1, %k0, %k0 # encoding: [0xc5,0xfc,0x4b,0xc1]
     21 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
     22 ; X64-NEXT:    retq # encoding: [0xc3]
     23   %res = call i32 @llvm.x86.avx512.kunpck.wd(i32 %x0, i32 %x1)
     24   ret i32 %res
     25 }
     26 
     27 declare i64 @llvm.x86.avx512.kunpck.dq(i64, i64)
     28 
     29 define i64@test_int_x86_avx512_kunpck_qd(i64 %x0, i64 %x1) {
     30 ; X86-LABEL: test_int_x86_avx512_kunpck_qd:
     31 ; X86:       # %bb.0:
     32 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x04]
     33 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x0c]
     34 ; X86-NEXT:    retl # encoding: [0xc3]
     35 ;
     36 ; X64-LABEL: test_int_x86_avx512_kunpck_qd:
     37 ; X64:       # %bb.0:
     38 ; X64-NEXT:    kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
     39 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
     40 ; X64-NEXT:    kunpckdq %k1, %k0, %k0 # encoding: [0xc4,0xe1,0xfc,0x4b,0xc1]
     41 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
     42 ; X64-NEXT:    retq # encoding: [0xc3]
     43   %res = call i64 @llvm.x86.avx512.kunpck.dq(i64 %x0, i64 %x1)
     44   ret i64 %res
     45 }
     46 
     47 declare <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8, <64 x i8>, i64)
     48 
     49   define <64 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_512(i8 %x0, <64 x i8> %x1, i64 %mask) {
     50 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
     51 ; X86:       # %bb.0:
     52 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
     53 ; X86-NEXT:    vpbroadcastb %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xc8]
     54 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
     55 ; X86-NEXT:    vpbroadcastb %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc0]
     56 ; X86-NEXT:    vpbroadcastb %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd0]
     57 ; X86-NEXT:    vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
     58 ; X86-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
     59 ; X86-NEXT:    retl # encoding: [0xc3]
     60 ;
     61 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_512:
     62 ; X64:       # %bb.0:
     63 ; X64-NEXT:    vpbroadcastb %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7a,0xcf]
     64 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
     65 ; X64-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7a,0xc7]
     66 ; X64-NEXT:    vpbroadcastb %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7a,0xd7]
     67 ; X64-NEXT:    vpaddb %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc2]
     68 ; X64-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
     69 ; X64-NEXT:    retq # encoding: [0xc3]
     70     %res = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 -1)
     71     %res1 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> %x1, i64 %mask)
     72     %res2 = call <64 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.512(i8 %x0, <64 x i8> zeroinitializer, i64 %mask)
     73     %res3 = add <64 x i8> %res, %res1
     74     %res4 = add <64 x i8> %res2, %res3
     75     ret <64 x i8> %res4
     76   }
     77 
     78 declare <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16, <32 x i16>, i32)
     79   define <32 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_512(i16 %x0, <32 x i16> %x1, i32 %mask) {
     80 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
     81 ; X86:       # %bb.0:
     82 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
     83 ; X86-NEXT:    vpbroadcastw %eax, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xc8]
     84 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     85 ; X86-NEXT:    vpbroadcastw %eax, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc0]
     86 ; X86-NEXT:    vpbroadcastw %eax, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd0]
     87 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
     88 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
     89 ; X86-NEXT:    retl # encoding: [0xc3]
     90 ;
     91 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_512:
     92 ; X64:       # %bb.0:
     93 ; X64-NEXT:    vpbroadcastw %edi, %zmm1 # encoding: [0x62,0xf2,0x7d,0x48,0x7b,0xcf]
     94 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     95 ; X64-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x7b,0xc7]
     96 ; X64-NEXT:    vpbroadcastw %edi, %zmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x7b,0xd7]
     97 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
     98 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
     99 ; X64-NEXT:    retq # encoding: [0xc3]
    100     %res = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 -1)
    101     %res1 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> %x1, i32 %mask)
    102    %res2 = call <32 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.512(i16 %x0, <32 x i16> zeroinitializer, i32 %mask)
    103     %res3 = add <32 x i16> %res, %res1
    104    %res4 = add <32 x i16> %res2, %res3
    105     ret <32 x i16> %res4
    106  }
    107 
    108 declare void @llvm.x86.avx512.mask.storeu.b.512(i8*, <64 x i8>, i64)
    109 
    110 define void@test_int_x86_avx512_mask_storeu_b_512(i8* %ptr1, i8* %ptr2, <64 x i8> %x1, i64 %x2) {
    111 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_512:
    112 ; X86:       # %bb.0:
    113 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    114 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    115 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
    116 ; X86-NEXT:    vmovdqu8 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x01]
    117 ; X86-NEXT:    vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
    118 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    119 ; X86-NEXT:    retl # encoding: [0xc3]
    120 ;
    121 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_512:
    122 ; X64:       # %bb.0:
    123 ; X64-NEXT:    kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
    124 ; X64-NEXT:    vmovdqu8 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x7f,0x07]
    125 ; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
    126 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    127 ; X64-NEXT:    retq # encoding: [0xc3]
    128   call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr1, <64 x i8> %x1, i64 %x2)
    129   call void @llvm.x86.avx512.mask.storeu.b.512(i8* %ptr2, <64 x i8> %x1, i64 -1)
    130   ret void
    131 }
    132 
    133 declare void @llvm.x86.avx512.mask.storeu.w.512(i8*, <32 x i16>, i32)
    134 
    135 define void@test_int_x86_avx512_mask_storeu_w_512(i8* %ptr1, i8* %ptr2, <32 x i16> %x1, i32 %x2) {
    136 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_512:
    137 ; X86:       # %bb.0:
    138 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    139 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    140 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
    141 ; X86-NEXT:    vmovdqu16 %zmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x01]
    142 ; X86-NEXT:    vmovdqu64 %zmm0, (%eax) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x00]
    143 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    144 ; X86-NEXT:    retl # encoding: [0xc3]
    145 ;
    146 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_512:
    147 ; X64:       # %bb.0:
    148 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    149 ; X64-NEXT:    vmovdqu16 %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x7f,0x07]
    150 ; X64-NEXT:    vmovdqu64 %zmm0, (%rsi) # encoding: [0x62,0xf1,0xfe,0x48,0x7f,0x06]
    151 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    152 ; X64-NEXT:    retq # encoding: [0xc3]
    153   call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr1, <32 x i16> %x1, i32 %x2)
    154   call void @llvm.x86.avx512.mask.storeu.w.512(i8* %ptr2, <32 x i16> %x1, i32 -1)
    155   ret void
    156 }
    157 
    158 declare <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8*, <32 x i16>, i32)
    159 
    160 define <32 x i16>@test_int_x86_avx512_mask_loadu_w_512(i8* %ptr, i8* %ptr2, <32 x i16> %x1, i32 %mask) {
    161 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_512:
    162 ; X86:       # %bb.0:
    163 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    164 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    165 ; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
    166 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
    167 ; X86-NEXT:    vmovdqu16 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x00]
    168 ; X86-NEXT:    vmovdqu16 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x09]
    169 ; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
    170 ; X86-NEXT:    retl # encoding: [0xc3]
    171 ;
    172 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_512:
    173 ; X64:       # %bb.0:
    174 ; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
    175 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    176 ; X64-NEXT:    vmovdqu16 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x49,0x6f,0x06]
    177 ; X64-NEXT:    vmovdqu16 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xc9,0x6f,0x0f]
    178 ; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
    179 ; X64-NEXT:    retq # encoding: [0xc3]
    180   %res0 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> %x1, i32 -1)
    181   %res = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr2, <32 x i16> %res0, i32 %mask)
    182   %res1 = call <32 x i16> @llvm.x86.avx512.mask.loadu.w.512(i8* %ptr, <32 x i16> zeroinitializer, i32 %mask)
    183   %res2 = add <32 x i16> %res, %res1
    184   ret <32 x i16> %res2
    185 }
    186 
    187 declare <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8*, <64 x i8>, i64)
    188 
    189 define <64 x i8>@test_int_x86_avx512_mask_loadu_b_512(i8* %ptr, i8* %ptr2, <64 x i8> %x1, i64 %mask) {
    190 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_512:
    191 ; X86:       # %bb.0:
    192 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    193 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    194 ; X86-NEXT:    vmovdqu64 (%ecx), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x01]
    195 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x0c]
    196 ; X86-NEXT:    vmovdqu8 (%eax), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x00]
    197 ; X86-NEXT:    vmovdqu8 (%ecx), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x09]
    198 ; X86-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
    199 ; X86-NEXT:    retl # encoding: [0xc3]
    200 ;
    201 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_512:
    202 ; X64:       # %bb.0:
    203 ; X64-NEXT:    vmovdqu64 (%rdi), %zmm0 # encoding: [0x62,0xf1,0xfe,0x48,0x6f,0x07]
    204 ; X64-NEXT:    kmovq %rdx, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xca]
    205 ; X64-NEXT:    vmovdqu8 (%rsi), %zmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x6f,0x06]
    206 ; X64-NEXT:    vmovdqu8 (%rdi), %zmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x6f,0x0f]
    207 ; X64-NEXT:    vpaddb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc1]
    208 ; X64-NEXT:    retq # encoding: [0xc3]
    209   %res0 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> %x1, i64 -1)
    210   %res = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr2, <64 x i8> %res0, i64 %mask)
    211   %res1 = call <64 x i8> @llvm.x86.avx512.mask.loadu.b.512(i8* %ptr, <64 x i8> zeroinitializer, i64 %mask)
    212   %res2 = add <64 x i8> %res, %res1
    213   ret <64 x i8> %res2
    214 }
    215 
    216 declare <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64>, i32)
    217 
    218 define <8 x i64>@test_int_x86_avx512_psll_dq_512(<8 x i64> %x0) {
    219 ; CHECK-LABEL: test_int_x86_avx512_psll_dq_512:
    220 ; CHECK:       # %bb.0:
    221 ; CHECK-NEXT:    vpslldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xf8,0x08]
    222 ; CHECK-NEXT:    # zmm1 = zero,zero,zero,zero,zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55]
    223 ; CHECK-NEXT:    vpslldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xf8,0x04]
    224 ; CHECK-NEXT:    # zmm0 = zero,zero,zero,zero,zmm0[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,zmm0[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,zmm0[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,zmm0[48,49,50,51,52,53,54,55,56,57,58,59]
    225 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    226 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    227   %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 8)
    228   %res1 = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
    229   %res2 = add <8 x i64> %res, %res1
    230   ret <8 x i64> %res2
    231 }
    232 
    233 define <8 x i64>@test_int_x86_avx512_psll_load_dq_512(<8 x i64>* %p0) {
    234 ; X86-LABEL: test_int_x86_avx512_psll_load_dq_512:
    235 ; X86:       # %bb.0:
    236 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    237 ; X86-NEXT:    vpslldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x38,0x04]
    238 ; X86-NEXT:    # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
    239 ; X86-NEXT:    retl # encoding: [0xc3]
    240 ;
    241 ; X64-LABEL: test_int_x86_avx512_psll_load_dq_512:
    242 ; X64:       # %bb.0:
    243 ; X64-NEXT:    vpslldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x3f,0x04]
    244 ; X64-NEXT:    # zmm0 = zero,zero,zero,zero,mem[0,1,2,3,4,5,6,7,8,9,10,11],zero,zero,zero,zero,mem[16,17,18,19,20,21,22,23,24,25,26,27],zero,zero,zero,zero,mem[32,33,34,35,36,37,38,39,40,41,42,43],zero,zero,zero,zero,mem[48,49,50,51,52,53,54,55,56,57,58,59]
    245 ; X64-NEXT:    retq # encoding: [0xc3]
    246   %x0 = load <8 x i64>, <8 x i64> *%p0
    247   %res = call <8 x i64> @llvm.x86.avx512.psll.dq.512(<8 x i64> %x0, i32 4)
    248   ret <8 x i64> %res
    249 }
    250 
    251 declare <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64>, i32)
    252 
    253 define <8 x i64>@test_int_x86_avx512_psrl_dq_512(<8 x i64> %x0) {
    254 ; CHECK-LABEL: test_int_x86_avx512_psrl_dq_512:
    255 ; CHECK:       # %bb.0:
    256 ; CHECK-NEXT:    vpsrldq $8, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x75,0x48,0x73,0xd8,0x08]
    257 ; CHECK-NEXT:    # zmm1 = zmm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[40,41,42,43,44,45,46,47],zero,zero,zero,zero,zero,zero,zero,zero,zmm0[56,57,58,59,60,61,62,63],zero,zero,zero,zero,zero,zero,zero,zero
    258 ; CHECK-NEXT:    vpsrldq $4, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0xd8,0x04]
    259 ; CHECK-NEXT:    # zmm0 = zmm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,zmm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,zmm0[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,zmm0[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
    260 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
    261 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    262   %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 8)
    263   %res1 = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
    264   %res2 = add <8 x i64> %res, %res1
    265   ret <8 x i64> %res2
    266 }
    267 
    268 define <8 x i64>@test_int_x86_avx512_psrl_load_dq_512(<8 x i64>* %p0) {
    269 ; X86-LABEL: test_int_x86_avx512_psrl_load_dq_512:
    270 ; X86:       # %bb.0:
    271 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    272 ; X86-NEXT:    vpsrldq $4, (%eax), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x18,0x04]
    273 ; X86-NEXT:    # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
    274 ; X86-NEXT:    retl # encoding: [0xc3]
    275 ;
    276 ; X64-LABEL: test_int_x86_avx512_psrl_load_dq_512:
    277 ; X64:       # %bb.0:
    278 ; X64-NEXT:    vpsrldq $4, (%rdi), %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x73,0x1f,0x04]
    279 ; X64-NEXT:    # zmm0 = mem[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,mem[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero,mem[36,37,38,39,40,41,42,43,44,45,46,47],zero,zero,zero,zero,mem[52,53,54,55,56,57,58,59,60,61,62,63],zero,zero,zero,zero
    280 ; X64-NEXT:    retq # encoding: [0xc3]
    281   %x0 = load <8 x i64>, <8 x i64> *%p0
    282   %res = call <8 x i64> @llvm.x86.avx512.psrl.dq.512(<8 x i64> %x0, i32 4)
    283   ret <8 x i64> %res
    284 }
    285 
    286 declare <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8>, <64 x i8>, i32, <64 x i8>, i64)
    287 
    288 define <64 x i8>@test_int_x86_avx512_mask_palignr_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x3, i64 %x4) {
    289 ; X86-LABEL: test_int_x86_avx512_mask_palignr_512:
    290 ; X86:       # %bb.0:
    291 ; X86-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02]
    292 ; X86-NEXT:    # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    293 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    294 ; X86-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
    295 ; X86-NEXT:    # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    296 ; X86-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
    297 ; X86-NEXT:    # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    298 ; X86-NEXT:    vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3]
    299 ; X86-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
    300 ; X86-NEXT:    retl # encoding: [0xc3]
    301 ;
    302 ; X64-LABEL: test_int_x86_avx512_mask_palignr_512:
    303 ; X64:       # %bb.0:
    304 ; X64-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x0f,0xd9,0x02]
    305 ; X64-NEXT:    # zmm3 = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    306 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    307 ; X64-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x0f,0xd1,0x02]
    308 ; X64-NEXT:    # zmm2 {%k1} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    309 ; X64-NEXT:    vpalignr $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x0f,0xc1,0x02]
    310 ; X64-NEXT:    # zmm0 {%k1} {z} = zmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],zmm0[0,1],zmm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],zmm0[16,17],zmm1[34,35,36,37,38,39,40,41,42,43,44,45,46,47],zmm0[32,33],zmm1[50,51,52,53,54,55,56,57,58,59,60,61,62,63],zmm0[48,49]
    311 ; X64-NEXT:    vpaddb %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfc,0xc3]
    312 ; X64-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
    313 ; X64-NEXT:    retq # encoding: [0xc3]
    314   %res = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 %x4)
    315   %res1 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> zeroinitializer, i64 %x4)
    316   %res2 = call <64 x i8> @llvm.x86.avx512.mask.palignr.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <64 x i8> %x3, i64 -1)
    317   %res3 = add <64 x i8> %res, %res1
    318   %res4 = add <64 x i8> %res3, %res2
    319   ret <64 x i8> %res4
    320 }
    321 
    322 declare <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16>, i32, <32 x i16>, i32)
    323 
    324 define <32 x i16>@test_int_x86_avx512_mask_pshufh_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
    325 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
    326 ; X86:       # %bb.0:
    327 ; X86-NEXT:    vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03]
    328 ; X86-NEXT:    # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    329 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    330 ; X86-NEXT:    vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
    331 ; X86-NEXT:    # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    332 ; X86-NEXT:    vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
    333 ; X86-NEXT:    # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    334 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    335 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    336 ; X86-NEXT:    retl # encoding: [0xc3]
    337 ;
    338 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_512:
    339 ; X64:       # %bb.0:
    340 ; X64-NEXT:    vpshufhw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7e,0x48,0x70,0xd0,0x03]
    341 ; X64-NEXT:    # zmm2 = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    342 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    343 ; X64-NEXT:    vpshufhw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x49,0x70,0xc8,0x03]
    344 ; X64-NEXT:    # zmm1 {%k1} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    345 ; X64-NEXT:    vpshufhw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xc9,0x70,0xc0,0x03]
    346 ; X64-NEXT:    # zmm0 {%k1} {z} = zmm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12,16,17,18,19,23,20,20,20,24,25,26,27,31,28,28,28]
    347 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    348 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    349 ; X64-NEXT:    retq # encoding: [0xc3]
    350   %res = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
    351   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
    352   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufh.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
    353   %res3 = add <32 x i16> %res, %res1
    354   %res4 = add <32 x i16> %res3, %res2
    355   ret <32 x i16> %res4
    356 }
    357 
    358 declare <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16>, i32, <32 x i16>, i32)
    359 
    360 define <32 x i16>@test_int_x86_avx512_mask_pshufl_w_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
    361 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
    362 ; X86:       # %bb.0:
    363 ; X86-NEXT:    vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03]
    364 ; X86-NEXT:    # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    365 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    366 ; X86-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
    367 ; X86-NEXT:    # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    368 ; X86-NEXT:    vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
    369 ; X86-NEXT:    # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    370 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    371 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    372 ; X86-NEXT:    retl # encoding: [0xc3]
    373 ;
    374 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_512:
    375 ; X64:       # %bb.0:
    376 ; X64-NEXT:    vpshuflw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x7f,0x48,0x70,0xd0,0x03]
    377 ; X64-NEXT:    # zmm2 = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    378 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    379 ; X64-NEXT:    vpshuflw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x49,0x70,0xc8,0x03]
    380 ; X64-NEXT:    # zmm1 {%k1} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    381 ; X64-NEXT:    vpshuflw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xc9,0x70,0xc0,0x03]
    382 ; X64-NEXT:    # zmm0 {%k1} {z} = zmm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15,19,16,16,16,20,21,22,23,27,24,24,24,28,29,30,31]
    383 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    384 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    385 ; X64-NEXT:    retq # encoding: [0xc3]
    386   %res = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
    387   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
    388   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pshufl.w.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
    389   %res3 = add <32 x i16> %res, %res1
    390   %res4 = add <32 x i16> %res3, %res2
    391   ret <32 x i16> %res4
    392 }
    393 
    394 define i64 @test_pcmpeq_b(<64 x i8> %a, <64 x i8> %b) {
    395 ; X86-LABEL: test_pcmpeq_b:
    396 ; X86:       # %bb.0:
    397 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
    398 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
    399 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    400 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
    401 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    402 ; X86-NEXT:    retl # encoding: [0xc3]
    403 ;
    404 ; X64-LABEL: test_pcmpeq_b:
    405 ; X64:       # %bb.0:
    406 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
    407 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
    408 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    409 ; X64-NEXT:    retq # encoding: [0xc3]
    410   %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
    411   ret i64 %res
    412 }
    413 
    414 define i64 @test_mask_pcmpeq_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
    415 ; X86-LABEL: test_mask_pcmpeq_b:
    416 ; X86:       # %bb.0:
    417 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
    418 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
    419 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
    420 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    421 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    422 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
    423 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    424 ; X86-NEXT:    retl # encoding: [0xc3]
    425 ;
    426 ; X64-LABEL: test_mask_pcmpeq_b:
    427 ; X64:       # %bb.0:
    428 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
    429 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
    430 ; X64-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
    431 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    432 ; X64-NEXT:    retq # encoding: [0xc3]
    433   %res = call i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
    434   ret i64 %res
    435 }
    436 
    437 declare i64 @llvm.x86.avx512.mask.pcmpeq.b.512(<64 x i8>, <64 x i8>, i64)
    438 
    439 define i32 @test_pcmpeq_w(<32 x i16> %a, <32 x i16> %b) {
    440 ; CHECK-LABEL: test_pcmpeq_w:
    441 ; CHECK:       # %bb.0:
    442 ; CHECK-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
    443 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    444 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    445 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    446   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
    447   ret i32 %res
    448 }
    449 
    450 define i32 @test_mask_pcmpeq_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    451 ; X86-LABEL: test_mask_pcmpeq_w:
    452 ; X86:       # %bb.0:
    453 ; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
    454 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    455 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    456 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    457 ; X86-NEXT:    retl # encoding: [0xc3]
    458 ;
    459 ; X64-LABEL: test_mask_pcmpeq_w:
    460 ; X64:       # %bb.0:
    461 ; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
    462 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    463 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    464 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    465 ; X64-NEXT:    retq # encoding: [0xc3]
    466   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
    467   ret i32 %res
    468 }
    469 
    470 declare i32 @llvm.x86.avx512.mask.pcmpeq.w.512(<32 x i16>, <32 x i16>, i32)
    471 
    472 define i64 @test_pcmpgt_b(<64 x i8> %a, <64 x i8> %b) {
    473 ; X86-LABEL: test_pcmpgt_b:
    474 ; X86:       # %bb.0:
    475 ; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
    476 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
    477 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    478 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
    479 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    480 ; X86-NEXT:    retl # encoding: [0xc3]
    481 ;
    482 ; X64-LABEL: test_pcmpgt_b:
    483 ; X64:       # %bb.0:
    484 ; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
    485 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
    486 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    487 ; X64-NEXT:    retq # encoding: [0xc3]
    488   %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 -1)
    489   ret i64 %res
    490 }
    491 
    492 define i64 @test_mask_pcmpgt_b(<64 x i8> %a, <64 x i8> %b, i64 %mask) {
    493 ; X86-LABEL: test_mask_pcmpgt_b:
    494 ; X86:       # %bb.0:
    495 ; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
    496 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
    497 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
    498 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    499 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    500 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %edx # encoding: [0x23,0x54,0x24,0x08]
    501 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    502 ; X86-NEXT:    retl # encoding: [0xc3]
    503 ;
    504 ; X64-LABEL: test_mask_pcmpgt_b:
    505 ; X64:       # %bb.0:
    506 ; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
    507 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
    508 ; X64-NEXT:    andq %rdi, %rax # encoding: [0x48,0x21,0xf8]
    509 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    510 ; X64-NEXT:    retq # encoding: [0xc3]
    511   %res = call i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8> %a, <64 x i8> %b, i64 %mask)
    512   ret i64 %res
    513 }
    514 
    515 declare i64 @llvm.x86.avx512.mask.pcmpgt.b.512(<64 x i8>, <64 x i8>, i64)
    516 
    517 define i32 @test_pcmpgt_w(<32 x i16> %a, <32 x i16> %b) {
    518 ; CHECK-LABEL: test_pcmpgt_w:
    519 ; CHECK:       # %bb.0:
    520 ; CHECK-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
    521 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    522 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    523 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    524   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 -1)
    525   ret i32 %res
    526 }
    527 
    528 define i32 @test_mask_pcmpgt_w(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    529 ; X86-LABEL: test_mask_pcmpgt_w:
    530 ; X86:       # %bb.0:
    531 ; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
    532 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    533 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    534 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    535 ; X86-NEXT:    retl # encoding: [0xc3]
    536 ;
    537 ; X64-LABEL: test_mask_pcmpgt_w:
    538 ; X64:       # %bb.0:
    539 ; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
    540 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    541 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    542 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    543 ; X64-NEXT:    retq # encoding: [0xc3]
    544   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16> %a, <32 x i16> %b, i32 %mask)
    545   ret i32 %res
    546 }
    547 
    548 declare i32 @llvm.x86.avx512.mask.pcmpgt.w.512(<32 x i16>, <32 x i16>, i32)
    549 
    550 declare <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    551 
    552 define <64 x i8>@test_int_x86_avx512_mask_punpckhb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    553 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
    554 ; X86:       # %bb.0:
    555 ; X86-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9]
    556 ; X86-NEXT:    # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    557 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    558 ; X86-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
    559 ; X86-NEXT:    # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    560 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    561 ; X86-NEXT:    retl # encoding: [0xc3]
    562 ;
    563 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_512:
    564 ; X64:       # %bb.0:
    565 ; X64-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x68,0xd9]
    566 ; X64-NEXT:    # zmm3 = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    567 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    568 ; X64-NEXT:    vpunpckhbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x68,0xd1]
    569 ; X64-NEXT:    # zmm2 {%k1} = zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31],zmm0[40],zmm1[40],zmm0[41],zmm1[41],zmm0[42],zmm1[42],zmm0[43],zmm1[43],zmm0[44],zmm1[44],zmm0[45],zmm1[45],zmm0[46],zmm1[46],zmm0[47],zmm1[47],zmm0[56],zmm1[56],zmm0[57],zmm1[57],zmm0[58],zmm1[58],zmm0[59],zmm1[59],zmm0[60],zmm1[60],zmm0[61],zmm1[61],zmm0[62],zmm1[62],zmm0[63],zmm1[63]
    570 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    571 ; X64-NEXT:    retq # encoding: [0xc3]
    572   %res = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    573   %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpckhb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    574   %res2 = add <64 x i8> %res, %res1
    575   ret <64 x i8> %res2
    576 }
    577 
    578 declare <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    579 
    580 define <64 x i8>@test_int_x86_avx512_mask_punpcklb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    581 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
    582 ; X86:       # %bb.0:
    583 ; X86-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9]
    584 ; X86-NEXT:    # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    585 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    586 ; X86-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
    587 ; X86-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    588 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    589 ; X86-NEXT:    retl # encoding: [0xc3]
    590 ;
    591 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_512:
    592 ; X64:       # %bb.0:
    593 ; X64-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x60,0xd9]
    594 ; X64-NEXT:    # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    595 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    596 ; X64-NEXT:    vpunpcklbw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x60,0xd1]
    597 ; X64-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[32],zmm1[32],zmm0[33],zmm1[33],zmm0[34],zmm1[34],zmm0[35],zmm1[35],zmm0[36],zmm1[36],zmm0[37],zmm1[37],zmm0[38],zmm1[38],zmm0[39],zmm1[39],zmm0[48],zmm1[48],zmm0[49],zmm1[49],zmm0[50],zmm1[50],zmm0[51],zmm1[51],zmm0[52],zmm1[52],zmm0[53],zmm1[53],zmm0[54],zmm1[54],zmm0[55],zmm1[55]
    598 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    599 ; X64-NEXT:    retq # encoding: [0xc3]
    600   %res = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    601   %res1 = call <64 x i8> @llvm.x86.avx512.mask.punpcklb.w.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    602   %res2 = add <64 x i8> %res, %res1
    603   ret <64 x i8> %res2
    604 }
    605 
    606 declare <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    607 
    608 define <32 x i16>@test_int_x86_avx512_mask_punpckhw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    609 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
    610 ; X86:       # %bb.0:
    611 ; X86-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9]
    612 ; X86-NEXT:    # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    613 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    614 ; X86-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
    615 ; X86-NEXT:    # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    616 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    617 ; X86-NEXT:    retl # encoding: [0xc3]
    618 ;
    619 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_512:
    620 ; X64:       # %bb.0:
    621 ; X64-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x69,0xd9]
    622 ; X64-NEXT:    # zmm3 = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    623 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    624 ; X64-NEXT:    vpunpckhwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x69,0xd1]
    625 ; X64-NEXT:    # zmm2 {%k1} = zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[12],zmm1[12],zmm0[13],zmm1[13],zmm0[14],zmm1[14],zmm0[15],zmm1[15],zmm0[20],zmm1[20],zmm0[21],zmm1[21],zmm0[22],zmm1[22],zmm0[23],zmm1[23],zmm0[28],zmm1[28],zmm0[29],zmm1[29],zmm0[30],zmm1[30],zmm0[31],zmm1[31]
    626 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    627 ; X64-NEXT:    retq # encoding: [0xc3]
    628   %res = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    629   %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpckhw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    630   %res2 = add <32 x i16> %res, %res1
    631   ret <32 x i16> %res2
    632 }
    633 
    634 declare <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    635 
    636 define <32 x i16>@test_int_x86_avx512_mask_punpcklw_d_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    637 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
    638 ; X86:       # %bb.0:
    639 ; X86-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9]
    640 ; X86-NEXT:    # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    641 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    642 ; X86-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
    643 ; X86-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    644 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    645 ; X86-NEXT:    retl # encoding: [0xc3]
    646 ;
    647 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_512:
    648 ; X64:       # %bb.0:
    649 ; X64-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0x61,0xd9]
    650 ; X64-NEXT:    # zmm3 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    651 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    652 ; X64-NEXT:    vpunpcklwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x61,0xd1]
    653 ; X64-NEXT:    # zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[16],zmm1[16],zmm0[17],zmm1[17],zmm0[18],zmm1[18],zmm0[19],zmm1[19],zmm0[24],zmm1[24],zmm0[25],zmm1[25],zmm0[26],zmm1[26],zmm0[27],zmm1[27]
    654 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    655 ; X64-NEXT:    retq # encoding: [0xc3]
    656   %res = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    657   %res1 = call <32 x i16> @llvm.x86.avx512.mask.punpcklw.d.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    658   %res2 = add <32 x i16> %res, %res1
    659   ret <32 x i16> %res2
    660 }
    661 
    662 declare <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    663 
    664 define <64 x i8>@test_int_x86_avx512_mask_pmaxs_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    665 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
    666 ; X86:       # %bb.0:
    667 ; X86-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9]
    668 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    669 ; X86-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
    670 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    671 ; X86-NEXT:    retl # encoding: [0xc3]
    672 ;
    673 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_512:
    674 ; X64:       # %bb.0:
    675 ; X64-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3c,0xd9]
    676 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    677 ; X64-NEXT:    vpmaxsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3c,0xd1]
    678 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    679 ; X64-NEXT:    retq # encoding: [0xc3]
    680   %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    681   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxs.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    682   %res2 = add <64 x i8> %res, %res1
    683   ret <64 x i8> %res2
    684 }
    685 
    686 declare <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    687 
    688 define <32 x i16>@test_int_x86_avx512_mask_pmaxs_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    689 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
    690 ; X86:       # %bb.0:
    691 ; X86-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9]
    692 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    693 ; X86-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
    694 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    695 ; X86-NEXT:    retl # encoding: [0xc3]
    696 ;
    697 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_512:
    698 ; X64:       # %bb.0:
    699 ; X64-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xee,0xd9]
    700 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    701 ; X64-NEXT:    vpmaxsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xee,0xd1]
    702 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    703 ; X64-NEXT:    retq # encoding: [0xc3]
    704   %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    705   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxs.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    706   %res2 = add <32 x i16> %res, %res1
    707   ret <32 x i16> %res2
    708 }
    709 
    710 declare <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    711 
    712 define <64 x i8>@test_int_x86_avx512_mask_pmaxu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    713 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
    714 ; X86:       # %bb.0:
    715 ; X86-NEXT:    vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9]
    716 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    717 ; X86-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
    718 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    719 ; X86-NEXT:    retl # encoding: [0xc3]
    720 ;
    721 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_512:
    722 ; X64:       # %bb.0:
    723 ; X64-NEXT:    vpmaxub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xde,0xd9]
    724 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    725 ; X64-NEXT:    vpmaxub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xde,0xd1]
    726 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    727 ; X64-NEXT:    retq # encoding: [0xc3]
    728   %res = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    729   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmaxu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    730   %res2 = add <64 x i8> %res, %res1
    731   ret <64 x i8> %res2
    732 }
    733 
    734 declare <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    735 
    736 define <32 x i16>@test_int_x86_avx512_mask_pmaxu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    737 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
    738 ; X86:       # %bb.0:
    739 ; X86-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9]
    740 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    741 ; X86-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
    742 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    743 ; X86-NEXT:    retl # encoding: [0xc3]
    744 ;
    745 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_512:
    746 ; X64:       # %bb.0:
    747 ; X64-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3e,0xd9]
    748 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    749 ; X64-NEXT:    vpmaxuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3e,0xd1]
    750 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    751 ; X64-NEXT:    retq # encoding: [0xc3]
    752   %res = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    753   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaxu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    754   %res2 = add <32 x i16> %res, %res1
    755   ret <32 x i16> %res2
    756 }
    757 
    758 declare <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    759 
    760 define <64 x i8>@test_int_x86_avx512_mask_pmins_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    761 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_512:
    762 ; X86:       # %bb.0:
    763 ; X86-NEXT:    vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9]
    764 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    765 ; X86-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
    766 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    767 ; X86-NEXT:    retl # encoding: [0xc3]
    768 ;
    769 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_512:
    770 ; X64:       # %bb.0:
    771 ; X64-NEXT:    vpminsb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x38,0xd9]
    772 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    773 ; X64-NEXT:    vpminsb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x38,0xd1]
    774 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    775 ; X64-NEXT:    retq # encoding: [0xc3]
    776   %res = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    777   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pmins.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    778   %res2 = add <64 x i8> %res, %res1
    779   ret <64 x i8> %res2
    780 }
    781 
    782 declare <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    783 
    784 define <32 x i16>@test_int_x86_avx512_mask_pmins_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    785 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_512:
    786 ; X86:       # %bb.0:
    787 ; X86-NEXT:    vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9]
    788 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    789 ; X86-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
    790 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    791 ; X86-NEXT:    retl # encoding: [0xc3]
    792 ;
    793 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_512:
    794 ; X64:       # %bb.0:
    795 ; X64-NEXT:    vpminsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xea,0xd9]
    796 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    797 ; X64-NEXT:    vpminsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xea,0xd1]
    798 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    799 ; X64-NEXT:    retq # encoding: [0xc3]
    800   %res = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    801   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmins.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    802   %res2 = add <32 x i16> %res, %res1
    803   ret <32 x i16> %res2
    804 }
    805 
    806 declare <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
    807 
    808 define <64 x i8>@test_int_x86_avx512_mask_pminu_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
    809 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_512:
    810 ; X86:       # %bb.0:
    811 ; X86-NEXT:    vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9]
    812 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    813 ; X86-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
    814 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    815 ; X86-NEXT:    retl # encoding: [0xc3]
    816 ;
    817 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_512:
    818 ; X64:       # %bb.0:
    819 ; X64-NEXT:    vpminub %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xda,0xd9]
    820 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    821 ; X64-NEXT:    vpminub %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xda,0xd1]
    822 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
    823 ; X64-NEXT:    retq # encoding: [0xc3]
    824   %res = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
    825   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pminu.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
    826   %res2 = add <64 x i8> %res, %res1
    827   ret <64 x i8> %res2
    828 }
    829 
    830 declare <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    831 
    832 define <32 x i16>@test_int_x86_avx512_mask_pminu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    833 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_512:
    834 ; X86:       # %bb.0:
    835 ; X86-NEXT:    vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9]
    836 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    837 ; X86-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
    838 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    839 ; X86-NEXT:    retl # encoding: [0xc3]
    840 ;
    841 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_512:
    842 ; X64:       # %bb.0:
    843 ; X64-NEXT:    vpminuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x3a,0xd9]
    844 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    845 ; X64-NEXT:    vpminuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x3a,0xd1]
    846 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
    847 ; X64-NEXT:    retq # encoding: [0xc3]
    848   %res = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
    849   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pminu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
    850   %res2 = add <32 x i16> %res, %res1
    851   ret <32 x i16> %res2
    852 }
    853 
    854 declare <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8>, <32 x i16>, i32)
    855 
    856 define <32 x i16>@test_int_x86_avx512_mask_pmovzxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
    857 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
    858 ; X86:       # %bb.0:
    859 ; X86-NEXT:    vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0]
    860 ; X86-NEXT:    # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    861 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    862 ; X86-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
    863 ; X86-NEXT:    # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    864 ; X86-NEXT:    vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
    865 ; X86-NEXT:    # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    866 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    867 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    868 ; X86-NEXT:    retl # encoding: [0xc3]
    869 ;
    870 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_512:
    871 ; X64:       # %bb.0:
    872 ; X64-NEXT:    vpmovzxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x30,0xd0]
    873 ; X64-NEXT:    # zmm2 = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    874 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    875 ; X64-NEXT:    vpmovzxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x30,0xc8]
    876 ; X64-NEXT:    # zmm1 {%k1} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    877 ; X64-NEXT:    vpmovzxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x30,0xc0]
    878 ; X64-NEXT:    # zmm0 {%k1} {z} = ymm0[0],zero,ymm0[1],zero,ymm0[2],zero,ymm0[3],zero,ymm0[4],zero,ymm0[5],zero,ymm0[6],zero,ymm0[7],zero,ymm0[8],zero,ymm0[9],zero,ymm0[10],zero,ymm0[11],zero,ymm0[12],zero,ymm0[13],zero,ymm0[14],zero,ymm0[15],zero,ymm0[16],zero,ymm0[17],zero,ymm0[18],zero,ymm0[19],zero,ymm0[20],zero,ymm0[21],zero,ymm0[22],zero,ymm0[23],zero,ymm0[24],zero,ymm0[25],zero,ymm0[26],zero,ymm0[27],zero,ymm0[28],zero,ymm0[29],zero,ymm0[30],zero,ymm0[31],zero
    879 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    880 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    881 ; X64-NEXT:    retq # encoding: [0xc3]
    882   %res = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
    883   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
    884   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovzxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
    885   %res3 = add <32 x i16> %res, %res1
    886   %res4 = add <32 x i16> %res3, %res2
    887   ret <32 x i16> %res4
    888 }
    889 
    890 declare <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8>, <32 x i16>, i32)
    891 
    892 define <32 x i16>@test_int_x86_avx512_mask_pmovsxb_w_512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2) {
    893 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
    894 ; X86:       # %bb.0:
    895 ; X86-NEXT:    vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0]
    896 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    897 ; X86-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
    898 ; X86-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
    899 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    900 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    901 ; X86-NEXT:    retl # encoding: [0xc3]
    902 ;
    903 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_512:
    904 ; X64:       # %bb.0:
    905 ; X64-NEXT:    vpmovsxbw %ymm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x20,0xd0]
    906 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    907 ; X64-NEXT:    vpmovsxbw %ymm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x20,0xc8]
    908 ; X64-NEXT:    vpmovsxbw %ymm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x20,0xc0]
    909 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
    910 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    911 ; X64-NEXT:    retq # encoding: [0xc3]
    912   %res = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 %x2)
    913   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> zeroinitializer, i32 %x2)
    914   %res2 = call <32 x i16> @llvm.x86.avx512.mask.pmovsxb.w.512(<32 x i8> %x0, <32 x i16> %x1, i32 -1)
    915   %res3 = add <32 x i16> %res, %res1
    916   %res4 = add <32 x i16> %res3, %res2
    917   ret <32 x i16> %res4
    918 }
    919 
    920 declare <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
    921 
    922 define <32 x i16>@test_int_x86_avx512_mask_psrl_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    923 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_512:
    924 ; X86:       # %bb.0:
    925 ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9]
    926 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    927 ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
    928 ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
    929 ; X86-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    930 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    931 ; X86-NEXT:    retl # encoding: [0xc3]
    932 ;
    933 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_512:
    934 ; X64:       # %bb.0:
    935 ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xd9]
    936 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    937 ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
    938 ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
    939 ; X64-NEXT:    vpaddw %zmm0, %zmm3, %zmm0 # encoding: [0x62,0xf1,0x65,0x48,0xfd,0xc0]
    940 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    941 ; X64-NEXT:    retq # encoding: [0xc3]
    942   %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
    943   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
    944   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
    945   %res3 = add <32 x i16> %res, %res1
    946   %res4 = add <32 x i16> %res3, %res2
    947   ret <32 x i16> %res4
    948 }
    949 
    950 declare <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16>, i32, <32 x i16>, i32)
    951 
    952 define <32 x i16>@test_int_x86_avx512_mask_psrl_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
    953 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
    954 ; X86:       # %bb.0:
    955 ; X86-NEXT:    vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03]
    956 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    957 ; X86-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
    958 ; X86-NEXT:    vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03]
    959 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    960 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    961 ; X86-NEXT:    retl # encoding: [0xc3]
    962 ;
    963 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_512:
    964 ; X64:       # %bb.0:
    965 ; X64-NEXT:    vpsrlw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xd0,0x03]
    966 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    967 ; X64-NEXT:    vpsrlw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x03]
    968 ; X64-NEXT:    vpsrlw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x03]
    969 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    970 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    971 ; X64-NEXT:    retq # encoding: [0xc3]
    972   %res = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
    973   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
    974   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrl.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
    975   %res3 = add <32 x i16> %res, %res1
    976   %res4 = add <32 x i16> %res3, %res2
    977   ret <32 x i16> %res4
    978 }
    979 
    980 declare <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
    981 
    982 define <32 x i16>@test_int_x86_avx512_mask_psra_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    983 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_512:
    984 ; X86:       # %bb.0:
    985 ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9]
    986 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    987 ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
    988 ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
    989 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
    990 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    991 ; X86-NEXT:    retl # encoding: [0xc3]
    992 ;
    993 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_512:
    994 ; X64:       # %bb.0:
    995 ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xd9]
    996 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    997 ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
    998 ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
    999 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1000 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1001 ; X64-NEXT:    retq # encoding: [0xc3]
   1002   %res = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
   1003   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   1004   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
   1005   %res3 = add <32 x i16> %res, %res1
   1006   %res4 = add <32 x i16> %res3, %res2
   1007   ret <32 x i16> %res4
   1008 }
   1009 
   1010 declare <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16>, i32, <32 x i16>, i32)
   1011 
   1012 define <32 x i16>@test_int_x86_avx512_mask_psra_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
   1013 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_512:
   1014 ; X86:       # %bb.0:
   1015 ; X86-NEXT:    vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03]
   1016 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1017 ; X86-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
   1018 ; X86-NEXT:    vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03]
   1019 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
   1020 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
   1021 ; X86-NEXT:    retl # encoding: [0xc3]
   1022 ;
   1023 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_512:
   1024 ; X64:       # %bb.0:
   1025 ; X64-NEXT:    vpsraw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xe0,0x03]
   1026 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1027 ; X64-NEXT:    vpsraw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x03]
   1028 ; X64-NEXT:    vpsraw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x03]
   1029 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
   1030 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
   1031 ; X64-NEXT:    retq # encoding: [0xc3]
   1032   %res = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
   1033   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
   1034   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psra.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
   1035   %res3 = add <32 x i16> %res, %res1
   1036   %res4 = add <32 x i16> %res3, %res2
   1037   ret <32 x i16> %res4
   1038 }
   1039 
   1040 declare <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16>, <8 x i16>, <32 x i16>, i32)
   1041 
   1042 define <32 x i16>@test_int_x86_avx512_mask_psll_w_512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1043 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_512:
   1044 ; X86:       # %bb.0:
   1045 ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9]
   1046 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1047 ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
   1048 ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
   1049 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1050 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1051 ; X86-NEXT:    retl # encoding: [0xc3]
   1052 ;
   1053 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_512:
   1054 ; X64:       # %bb.0:
   1055 ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xd9]
   1056 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1057 ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
   1058 ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
   1059 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1060 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1061 ; X64-NEXT:    retq # encoding: [0xc3]
   1062   %res = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 %x3)
   1063   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   1064   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.w.512(<32 x i16> %x0, <8 x i16> %x1, <32 x i16> %x2, i32 -1)
   1065   %res3 = add <32 x i16> %res, %res1
   1066   %res4 = add <32 x i16> %res3, %res2
   1067   ret <32 x i16> %res4
   1068 }
   1069 
   1070 declare <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16>, i32, <32 x i16>, i32)
   1071 
   1072 define <32 x i16>@test_int_x86_avx512_mask_psll_wi_512(<32 x i16> %x0, i32 %x1, <32 x i16> %x2, i32 %x3) {
   1073 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_512:
   1074 ; X86:       # %bb.0:
   1075 ; X86-NEXT:    vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03]
   1076 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1077 ; X86-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
   1078 ; X86-NEXT:    vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03]
   1079 ; X86-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
   1080 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
   1081 ; X86-NEXT:    retl # encoding: [0xc3]
   1082 ;
   1083 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_512:
   1084 ; X64:       # %bb.0:
   1085 ; X64-NEXT:    vpsllw $3, %zmm0, %zmm2 # encoding: [0x62,0xf1,0x6d,0x48,0x71,0xf0,0x03]
   1086 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1087 ; X64-NEXT:    vpsllw $3, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x03]
   1088 ; X64-NEXT:    vpsllw $3, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x03]
   1089 ; X64-NEXT:    vpaddw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc2]
   1090 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
   1091 ; X64-NEXT:    retq # encoding: [0xc3]
   1092   %res = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 %x3)
   1093   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> zeroinitializer, i32 %x3)
   1094   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psll.wi.512(<32 x i16> %x0, i32 3, <32 x i16> %x2, i32 -1)
   1095   %res3 = add <32 x i16> %res, %res1
   1096   %res4 = add <32 x i16> %res3, %res2
   1097   ret <32 x i16> %res4
   1098 }
   1099 
   1100 declare <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
   1101 
   1102 define <64 x i8>@test_int_x86_avx512_mask_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
   1103 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
   1104 ; X86:       # %bb.0:
   1105 ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9]
   1106 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1107 ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
   1108 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
   1109 ; X86-NEXT:    retl # encoding: [0xc3]
   1110 ;
   1111 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_512:
   1112 ; X64:       # %bb.0:
   1113 ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xd9]
   1114 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1115 ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
   1116 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
   1117 ; X64-NEXT:    retq # encoding: [0xc3]
   1118   %res = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   1119   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   1120   %res2 = add <64 x i8> %res, %res1
   1121   ret <64 x i8> %res2
   1122 }
   1123 
   1124 
   1125 declare <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64)
   1126 
   1127 define <64 x i8>@test_int_x86_avx512_cvtmask2b_512(i64 %x0) {
   1128 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_512:
   1129 ; X86:       # %bb.0:
   1130 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf8,0x90,0x44,0x24,0x04]
   1131 ; X86-NEXT:    vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
   1132 ; X86-NEXT:    retl # encoding: [0xc3]
   1133 ;
   1134 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_512:
   1135 ; X64:       # %bb.0:
   1136 ; X64-NEXT:    kmovq %rdi, %k0 # encoding: [0xc4,0xe1,0xfb,0x92,0xc7]
   1137 ; X64-NEXT:    vpmovm2b %k0, %zmm0 # encoding: [0x62,0xf2,0x7e,0x48,0x28,0xc0]
   1138 ; X64-NEXT:    retq # encoding: [0xc3]
   1139   %res = call <64 x i8> @llvm.x86.avx512.cvtmask2b.512(i64 %x0)
   1140   ret <64 x i8> %res
   1141 }
   1142 
   1143 declare <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32)
   1144 
   1145 define <32 x i16>@test_int_x86_avx512_cvtmask2w_512(i32 %x0) {
   1146 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_512:
   1147 ; X86:       # %bb.0:
   1148 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
   1149 ; X86-NEXT:    vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
   1150 ; X86-NEXT:    retl # encoding: [0xc3]
   1151 ;
   1152 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_512:
   1153 ; X64:       # %bb.0:
   1154 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
   1155 ; X64-NEXT:    vpmovm2w %k0, %zmm0 # encoding: [0x62,0xf2,0xfe,0x48,0x28,0xc0]
   1156 ; X64-NEXT:    retq # encoding: [0xc3]
   1157   %res = call <32 x i16> @llvm.x86.avx512.cvtmask2w.512(i32 %x0)
   1158   ret <32 x i16> %res
   1159 }
   1160 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
   1161 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
   1162 ; CHECK:       # %bb.0:
   1163 ; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
   1164 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1165   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1166   ret <32 x i16> %res
   1167 }
   1168 
   1169 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
   1170 ; X86-LABEL: test_mask_packs_epi32_rrk_512:
   1171 ; X86:       # %bb.0:
   1172 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1173 ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
   1174 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1175 ; X86-NEXT:    retl # encoding: [0xc3]
   1176 ;
   1177 ; X64-LABEL: test_mask_packs_epi32_rrk_512:
   1178 ; X64:       # %bb.0:
   1179 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1180 ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
   1181 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1182 ; X64-NEXT:    retq # encoding: [0xc3]
   1183   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1184   ret <32 x i16> %res
   1185 }
   1186 
   1187 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
   1188 ; X86-LABEL: test_mask_packs_epi32_rrkz_512:
   1189 ; X86:       # %bb.0:
   1190 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1191 ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
   1192 ; X86-NEXT:    retl # encoding: [0xc3]
   1193 ;
   1194 ; X64-LABEL: test_mask_packs_epi32_rrkz_512:
   1195 ; X64:       # %bb.0:
   1196 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1197 ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
   1198 ; X64-NEXT:    retq # encoding: [0xc3]
   1199   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1200   ret <32 x i16> %res
   1201 }
   1202 
   1203 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
   1204 ; X86-LABEL: test_mask_packs_epi32_rm_512:
   1205 ; X86:       # %bb.0:
   1206 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1207 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
   1208 ; X86-NEXT:    retl # encoding: [0xc3]
   1209 ;
   1210 ; X64-LABEL: test_mask_packs_epi32_rm_512:
   1211 ; X64:       # %bb.0:
   1212 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
   1213 ; X64-NEXT:    retq # encoding: [0xc3]
   1214   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1215   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1216   ret <32 x i16> %res
   1217 }
   1218 
   1219 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1220 ; X86-LABEL: test_mask_packs_epi32_rmk_512:
   1221 ; X86:       # %bb.0:
   1222 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1223 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1224 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
   1225 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1226 ; X86-NEXT:    retl # encoding: [0xc3]
   1227 ;
   1228 ; X64-LABEL: test_mask_packs_epi32_rmk_512:
   1229 ; X64:       # %bb.0:
   1230 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1231 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
   1232 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1233 ; X64-NEXT:    retq # encoding: [0xc3]
   1234   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1235   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1236   ret <32 x i16> %res
   1237 }
   1238 
   1239 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
   1240 ; X86-LABEL: test_mask_packs_epi32_rmkz_512:
   1241 ; X86:       # %bb.0:
   1242 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1243 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1244 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
   1245 ; X86-NEXT:    retl # encoding: [0xc3]
   1246 ;
   1247 ; X64-LABEL: test_mask_packs_epi32_rmkz_512:
   1248 ; X64:       # %bb.0:
   1249 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1250 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
   1251 ; X64-NEXT:    retq # encoding: [0xc3]
   1252   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1253   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1254   ret <32 x i16> %res
   1255 }
   1256 
   1257 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
   1258 ; X86-LABEL: test_mask_packs_epi32_rmb_512:
   1259 ; X86:       # %bb.0:
   1260 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1261 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
   1262 ; X86-NEXT:    retl # encoding: [0xc3]
   1263 ;
   1264 ; X64-LABEL: test_mask_packs_epi32_rmb_512:
   1265 ; X64:       # %bb.0:
   1266 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
   1267 ; X64-NEXT:    retq # encoding: [0xc3]
   1268   %q = load i32, i32* %ptr_b
   1269   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1270   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1271   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1272   ret <32 x i16> %res
   1273 }
   1274 
   1275 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1276 ; X86-LABEL: test_mask_packs_epi32_rmbk_512:
   1277 ; X86:       # %bb.0:
   1278 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1279 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1280 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
   1281 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1282 ; X86-NEXT:    retl # encoding: [0xc3]
   1283 ;
   1284 ; X64-LABEL: test_mask_packs_epi32_rmbk_512:
   1285 ; X64:       # %bb.0:
   1286 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1287 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
   1288 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1289 ; X64-NEXT:    retq # encoding: [0xc3]
   1290   %q = load i32, i32* %ptr_b
   1291   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1292   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1293   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1294   ret <32 x i16> %res
   1295 }
   1296 
   1297 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
   1298 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
   1299 ; X86:       # %bb.0:
   1300 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1301 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1302 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
   1303 ; X86-NEXT:    retl # encoding: [0xc3]
   1304 ;
   1305 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
   1306 ; X64:       # %bb.0:
   1307 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1308 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
   1309 ; X64-NEXT:    retq # encoding: [0xc3]
   1310   %q = load i32, i32* %ptr_b
   1311   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1312   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1313   %res = call <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1314   ret <32 x i16> %res
   1315 }
   1316 
   1317 declare <32 x i16> @llvm.x86.avx512.mask.packssdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
   1318 
   1319 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   1320 ; CHECK-LABEL: test_mask_packs_epi16_rr_512:
   1321 ; CHECK:       # %bb.0:
   1322 ; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
   1323 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1324   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   1325   ret <64 x i8> %res
   1326 }
   1327 
   1328 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
   1329 ; X86-LABEL: test_mask_packs_epi16_rrk_512:
   1330 ; X86:       # %bb.0:
   1331 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1332 ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
   1333 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1334 ; X86-NEXT:    retl # encoding: [0xc3]
   1335 ;
   1336 ; X64-LABEL: test_mask_packs_epi16_rrk_512:
   1337 ; X64:       # %bb.0:
   1338 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1339 ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
   1340 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1341 ; X64-NEXT:    retq # encoding: [0xc3]
   1342   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   1343   ret <64 x i8> %res
   1344 }
   1345 
   1346 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
   1347 ; X86-LABEL: test_mask_packs_epi16_rrkz_512:
   1348 ; X86:       # %bb.0:
   1349 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1350 ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
   1351 ; X86-NEXT:    retl # encoding: [0xc3]
   1352 ;
   1353 ; X64-LABEL: test_mask_packs_epi16_rrkz_512:
   1354 ; X64:       # %bb.0:
   1355 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1356 ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
   1357 ; X64-NEXT:    retq # encoding: [0xc3]
   1358   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   1359   ret <64 x i8> %res
   1360 }
   1361 
   1362 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   1363 ; X86-LABEL: test_mask_packs_epi16_rm_512:
   1364 ; X86:       # %bb.0:
   1365 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1366 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
   1367 ; X86-NEXT:    retl # encoding: [0xc3]
   1368 ;
   1369 ; X64-LABEL: test_mask_packs_epi16_rm_512:
   1370 ; X64:       # %bb.0:
   1371 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
   1372 ; X64-NEXT:    retq # encoding: [0xc3]
   1373   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1374   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   1375   ret <64 x i8> %res
   1376 }
   1377 
   1378 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
   1379 ; X86-LABEL: test_mask_packs_epi16_rmk_512:
   1380 ; X86:       # %bb.0:
   1381 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1382 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
   1383 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
   1384 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1385 ; X86-NEXT:    retl # encoding: [0xc3]
   1386 ;
   1387 ; X64-LABEL: test_mask_packs_epi16_rmk_512:
   1388 ; X64:       # %bb.0:
   1389 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
   1390 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
   1391 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1392 ; X64-NEXT:    retq # encoding: [0xc3]
   1393   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1394   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   1395   ret <64 x i8> %res
   1396 }
   1397 
   1398 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
   1399 ; X86-LABEL: test_mask_packs_epi16_rmkz_512:
   1400 ; X86:       # %bb.0:
   1401 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1402 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
   1403 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
   1404 ; X86-NEXT:    retl # encoding: [0xc3]
   1405 ;
   1406 ; X64-LABEL: test_mask_packs_epi16_rmkz_512:
   1407 ; X64:       # %bb.0:
   1408 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
   1409 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
   1410 ; X64-NEXT:    retq # encoding: [0xc3]
   1411   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1412   %res = call <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   1413   ret <64 x i8> %res
   1414 }
   1415 
   1416 declare <64 x i8> @llvm.x86.avx512.mask.packsswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
   1417 
   1418 
   1419 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
   1420 ; CHECK-LABEL: test_mask_packus_epi32_rr_512:
   1421 ; CHECK:       # %bb.0:
   1422 ; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
   1423 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1424   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1425   ret <32 x i16> %res
   1426 }
   1427 
   1428 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
   1429 ; X86-LABEL: test_mask_packus_epi32_rrk_512:
   1430 ; X86:       # %bb.0:
   1431 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1432 ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
   1433 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1434 ; X86-NEXT:    retl # encoding: [0xc3]
   1435 ;
   1436 ; X64-LABEL: test_mask_packus_epi32_rrk_512:
   1437 ; X64:       # %bb.0:
   1438 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1439 ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
   1440 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1441 ; X64-NEXT:    retq # encoding: [0xc3]
   1442   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1443   ret <32 x i16> %res
   1444 }
   1445 
   1446 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
   1447 ; X86-LABEL: test_mask_packus_epi32_rrkz_512:
   1448 ; X86:       # %bb.0:
   1449 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1450 ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
   1451 ; X86-NEXT:    retl # encoding: [0xc3]
   1452 ;
   1453 ; X64-LABEL: test_mask_packus_epi32_rrkz_512:
   1454 ; X64:       # %bb.0:
   1455 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1456 ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
   1457 ; X64-NEXT:    retq # encoding: [0xc3]
   1458   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1459   ret <32 x i16> %res
   1460 }
   1461 
   1462 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
   1463 ; X86-LABEL: test_mask_packus_epi32_rm_512:
   1464 ; X86:       # %bb.0:
   1465 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1466 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
   1467 ; X86-NEXT:    retl # encoding: [0xc3]
   1468 ;
   1469 ; X64-LABEL: test_mask_packus_epi32_rm_512:
   1470 ; X64:       # %bb.0:
   1471 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
   1472 ; X64-NEXT:    retq # encoding: [0xc3]
   1473   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1474   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1475   ret <32 x i16> %res
   1476 }
   1477 
   1478 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1479 ; X86-LABEL: test_mask_packus_epi32_rmk_512:
   1480 ; X86:       # %bb.0:
   1481 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1482 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1483 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
   1484 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1485 ; X86-NEXT:    retl # encoding: [0xc3]
   1486 ;
   1487 ; X64-LABEL: test_mask_packus_epi32_rmk_512:
   1488 ; X64:       # %bb.0:
   1489 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1490 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
   1491 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1492 ; X64-NEXT:    retq # encoding: [0xc3]
   1493   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1494   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1495   ret <32 x i16> %res
   1496 }
   1497 
   1498 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
   1499 ; X86-LABEL: test_mask_packus_epi32_rmkz_512:
   1500 ; X86:       # %bb.0:
   1501 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1502 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1503 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
   1504 ; X86-NEXT:    retl # encoding: [0xc3]
   1505 ;
   1506 ; X64-LABEL: test_mask_packus_epi32_rmkz_512:
   1507 ; X64:       # %bb.0:
   1508 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1509 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
   1510 ; X64-NEXT:    retq # encoding: [0xc3]
   1511   %b = load <16 x i32>, <16 x i32>* %ptr_b
   1512   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1513   ret <32 x i16> %res
   1514 }
   1515 
   1516 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
   1517 ; X86-LABEL: test_mask_packus_epi32_rmb_512:
   1518 ; X86:       # %bb.0:
   1519 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1520 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
   1521 ; X86-NEXT:    retl # encoding: [0xc3]
   1522 ;
   1523 ; X64-LABEL: test_mask_packus_epi32_rmb_512:
   1524 ; X64:       # %bb.0:
   1525 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
   1526 ; X64-NEXT:    retq # encoding: [0xc3]
   1527   %q = load i32, i32* %ptr_b
   1528   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1529   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1530   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 -1)
   1531   ret <32 x i16> %res
   1532 }
   1533 
   1534 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1535 ; X86-LABEL: test_mask_packus_epi32_rmbk_512:
   1536 ; X86:       # %bb.0:
   1537 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1538 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1539 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
   1540 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1541 ; X86-NEXT:    retl # encoding: [0xc3]
   1542 ;
   1543 ; X64-LABEL: test_mask_packus_epi32_rmbk_512:
   1544 ; X64:       # %bb.0:
   1545 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1546 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
   1547 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1548 ; X64-NEXT:    retq # encoding: [0xc3]
   1549   %q = load i32, i32* %ptr_b
   1550   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1551   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1552   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask)
   1553   ret <32 x i16> %res
   1554 }
   1555 
   1556 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
   1557 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
   1558 ; X86:       # %bb.0:
   1559 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1560 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1561 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
   1562 ; X86-NEXT:    retl # encoding: [0xc3]
   1563 ;
   1564 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
   1565 ; X64:       # %bb.0:
   1566 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1567 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
   1568 ; X64-NEXT:    retq # encoding: [0xc3]
   1569   %q = load i32, i32* %ptr_b
   1570   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
   1571   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
   1572   %res = call <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32> %a, <16 x i32> %b, <32 x i16> zeroinitializer, i32 %mask)
   1573   ret <32 x i16> %res
   1574 }
   1575 
   1576 declare <32 x i16> @llvm.x86.avx512.mask.packusdw.512(<16 x i32>, <16 x i32>, <32 x i16>, i32)
   1577 
   1578 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   1579 ; CHECK-LABEL: test_mask_packus_epi16_rr_512:
   1580 ; CHECK:       # %bb.0:
   1581 ; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
   1582 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1583   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   1584   ret <64 x i8> %res
   1585 }
   1586 
   1587 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
   1588 ; X86-LABEL: test_mask_packus_epi16_rrk_512:
   1589 ; X86:       # %bb.0:
   1590 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1591 ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
   1592 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1593 ; X86-NEXT:    retl # encoding: [0xc3]
   1594 ;
   1595 ; X64-LABEL: test_mask_packus_epi16_rrk_512:
   1596 ; X64:       # %bb.0:
   1597 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1598 ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
   1599 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1600 ; X64-NEXT:    retq # encoding: [0xc3]
   1601   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   1602   ret <64 x i8> %res
   1603 }
   1604 
   1605 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
   1606 ; X86-LABEL: test_mask_packus_epi16_rrkz_512:
   1607 ; X86:       # %bb.0:
   1608 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1609 ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
   1610 ; X86-NEXT:    retl # encoding: [0xc3]
   1611 ;
   1612 ; X64-LABEL: test_mask_packus_epi16_rrkz_512:
   1613 ; X64:       # %bb.0:
   1614 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1615 ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
   1616 ; X64-NEXT:    retq # encoding: [0xc3]
   1617   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   1618   ret <64 x i8> %res
   1619 }
   1620 
   1621 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   1622 ; X86-LABEL: test_mask_packus_epi16_rm_512:
   1623 ; X86:       # %bb.0:
   1624 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1625 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
   1626 ; X86-NEXT:    retl # encoding: [0xc3]
   1627 ;
   1628 ; X64-LABEL: test_mask_packus_epi16_rm_512:
   1629 ; X64:       # %bb.0:
   1630 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
   1631 ; X64-NEXT:    retq # encoding: [0xc3]
   1632   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1633   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 -1)
   1634   ret <64 x i8> %res
   1635 }
   1636 
   1637 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
   1638 ; X86-LABEL: test_mask_packus_epi16_rmk_512:
   1639 ; X86:       # %bb.0:
   1640 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1641 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
   1642 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
   1643 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1644 ; X86-NEXT:    retl # encoding: [0xc3]
   1645 ;
   1646 ; X64-LABEL: test_mask_packus_epi16_rmk_512:
   1647 ; X64:       # %bb.0:
   1648 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
   1649 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
   1650 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1651 ; X64-NEXT:    retq # encoding: [0xc3]
   1652   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1653   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask)
   1654   ret <64 x i8> %res
   1655 }
   1656 
   1657 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
   1658 ; X86-LABEL: test_mask_packus_epi16_rmkz_512:
   1659 ; X86:       # %bb.0:
   1660 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1661 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
   1662 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
   1663 ; X86-NEXT:    retl # encoding: [0xc3]
   1664 ;
   1665 ; X64-LABEL: test_mask_packus_epi16_rmkz_512:
   1666 ; X64:       # %bb.0:
   1667 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
   1668 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
   1669 ; X64-NEXT:    retq # encoding: [0xc3]
   1670   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1671   %res = call <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16> %a, <32 x i16> %b, <64 x i8> zeroinitializer, i64 %mask)
   1672   ret <64 x i8> %res
   1673 }
   1674 
   1675 declare <64 x i8> @llvm.x86.avx512.mask.packuswb.512(<32 x i16>, <32 x i16>, <64 x i8>, i64)
   1676 
   1677 define i64 @test_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
   1678 ; X86-LABEL: test_cmp_b_512:
   1679 ; X86:       # %bb.0:
   1680 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   1681 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1682 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   1683 ; X86-NEXT:    .cfi_def_cfa_offset 12
   1684 ; X86-NEXT:    .cfi_offset %esi, -12
   1685 ; X86-NEXT:    .cfi_offset %edi, -8
   1686 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
   1687 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1688 ; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   1689 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   1690 ; X86-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
   1691 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1692 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1693 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   1694 ; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
   1695 ; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
   1696 ; X86-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
   1697 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1698 ; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   1699 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   1700 ; X86-NEXT:    addl %esi, %ecx # encoding: [0x01,0xf1]
   1701 ; X86-NEXT:    adcl %edx, %eax # encoding: [0x11,0xd0]
   1702 ; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
   1703 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1704 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1705 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   1706 ; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
   1707 ; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
   1708 ; X86-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
   1709 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1710 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   1711 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   1712 ; X86-NEXT:    addl %esi, %edi # encoding: [0x01,0xf7]
   1713 ; X86-NEXT:    adcl %edx, %ecx # encoding: [0x11,0xd1]
   1714 ; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
   1715 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1716 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1717 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   1718 ; X86-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   1719 ; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
   1720 ; X86-NEXT:    addl $-1, %eax # encoding: [0x83,0xc0,0xff]
   1721 ; X86-NEXT:    adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
   1722 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   1723 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1724 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   1725 ; X86-NEXT:    .cfi_def_cfa_offset 4
   1726 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1727 ; X86-NEXT:    retl # encoding: [0xc3]
   1728 ;
   1729 ; X64-LABEL: test_cmp_b_512:
   1730 ; X64:       # %bb.0:
   1731 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
   1732 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1733 ; X64-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xc0]
   1734 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1735 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1736 ; X64-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x02]
   1737 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1738 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   1739 ; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
   1740 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1741 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1742 ; X64-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x05]
   1743 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1744 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   1745 ; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xc1]
   1746 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1747 ; X64-NEXT:    leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
   1748 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1749 ; X64-NEXT:    retq # encoding: [0xc3]
   1750   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
   1751   %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
   1752   %ret1 = add i64 %res0, %res1
   1753   %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
   1754   %ret2 = add i64 %ret1, %res2
   1755   %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
   1756   %ret3 = add i64 %ret2, %res3
   1757   %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
   1758   %ret4 = add i64 %ret3, %res4
   1759   %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
   1760   %ret5 = add i64 %ret4, %res5
   1761   %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
   1762   %ret6 = add i64 %ret5, %res6
   1763   %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
   1764   %ret7 = add i64 %ret6, %res7
   1765   ret i64 %ret7
   1766 }
   1767 
   1768 define i64 @test_mask_cmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
   1769 ; X86-LABEL: test_mask_cmp_b_512:
   1770 ; X86:       # %bb.0:
   1771 ; X86-NEXT:    pushl %ebp # encoding: [0x55]
   1772 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1773 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   1774 ; X86-NEXT:    .cfi_def_cfa_offset 12
   1775 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   1776 ; X86-NEXT:    .cfi_def_cfa_offset 16
   1777 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   1778 ; X86-NEXT:    .cfi_def_cfa_offset 20
   1779 ; X86-NEXT:    .cfi_offset %esi, -20
   1780 ; X86-NEXT:    .cfi_offset %edi, -16
   1781 ; X86-NEXT:    .cfi_offset %ebx, -12
   1782 ; X86-NEXT:    .cfi_offset %ebp, -8
   1783 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
   1784 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
   1785 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
   1786 ; X86-NEXT:    kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
   1787 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1788 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1789 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   1790 ; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   1791 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   1792 ; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
   1793 ; X86-NEXT:    vpcmpgtb %zmm0, %zmm1, %k2 # encoding: [0x62,0xf1,0x75,0x48,0x64,0xd0]
   1794 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1795 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   1796 ; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
   1797 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   1798 ; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
   1799 ; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
   1800 ; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
   1801 ; X86-NEXT:    vpcmpleb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x02]
   1802 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1803 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   1804 ; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   1805 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   1806 ; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
   1807 ; X86-NEXT:    addl %ebx, %edx # encoding: [0x01,0xda]
   1808 ; X86-NEXT:    adcl %edi, %eax # encoding: [0x11,0xf8]
   1809 ; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
   1810 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1811 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   1812 ; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
   1813 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   1814 ; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
   1815 ; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
   1816 ; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
   1817 ; X86-NEXT:    vpcmpnltb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x05]
   1818 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1819 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   1820 ; X86-NEXT:    kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
   1821 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   1822 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   1823 ; X86-NEXT:    addl %ebx, %ecx # encoding: [0x01,0xd9]
   1824 ; X86-NEXT:    adcl %edi, %ebp # encoding: [0x11,0xfd]
   1825 ; X86-NEXT:    vpcmpgtb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x64,0xd1]
   1826 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   1827 ; X86-NEXT:    kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
   1828 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1829 ; X86-NEXT:    kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
   1830 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   1831 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   1832 ; X86-NEXT:    adcl %ebp, %edx # encoding: [0x11,0xea]
   1833 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   1834 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
   1835 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   1836 ; X86-NEXT:    .cfi_def_cfa_offset 16
   1837 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   1838 ; X86-NEXT:    .cfi_def_cfa_offset 12
   1839 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   1840 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1841 ; X86-NEXT:    popl %ebp # encoding: [0x5d]
   1842 ; X86-NEXT:    .cfi_def_cfa_offset 4
   1843 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1844 ; X86-NEXT:    retl # encoding: [0xc3]
   1845 ;
   1846 ; X64-LABEL: test_mask_cmp_b_512:
   1847 ; X64:       # %bb.0:
   1848 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1849 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
   1850 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1851 ; X64-NEXT:    vpcmpgtb %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x64,0xc0]
   1852 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1853 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1854 ; X64-NEXT:    vpcmpleb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x02]
   1855 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1856 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   1857 ; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
   1858 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1859 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1860 ; X64-NEXT:    vpcmpnltb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x05]
   1861 ; X64-NEXT:    kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
   1862 ; X64-NEXT:    addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
   1863 ; X64-NEXT:    vpcmpgtb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x64,0xc1]
   1864 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1865 ; X64-NEXT:    addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
   1866 ; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
   1867 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1868 ; X64-NEXT:    retq # encoding: [0xc3]
   1869   %res0 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   1870   %res1 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
   1871   %ret1 = add i64 %res0, %res1
   1872   %res2 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
   1873   %ret2 = add i64 %ret1, %res2
   1874   %res3 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
   1875   %ret3 = add i64 %ret2, %res3
   1876   %res4 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
   1877   %ret4 = add i64 %ret3, %res4
   1878   %res5 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
   1879   %ret5 = add i64 %ret4, %res5
   1880   %res6 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
   1881   %ret6 = add i64 %ret5, %res6
   1882   %res7 = call i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
   1883   %ret7 = add i64 %ret6, %res7
   1884   ret i64 %ret7
   1885 }
   1886 
   1887 declare i64 @llvm.x86.avx512.mask.cmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
   1888 
   1889 define i64 @test_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1) {
   1890 ; X86-LABEL: test_ucmp_b_512:
   1891 ; X86:       # %bb.0:
   1892 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   1893 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1894 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   1895 ; X86-NEXT:    .cfi_def_cfa_offset 12
   1896 ; X86-NEXT:    .cfi_offset %esi, -12
   1897 ; X86-NEXT:    .cfi_offset %edi, -8
   1898 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
   1899 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1900 ; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   1901 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   1902 ; X86-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
   1903 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1904 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1905 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   1906 ; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
   1907 ; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
   1908 ; X86-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
   1909 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1910 ; X86-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   1911 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   1912 ; X86-NEXT:    addl %esi, %ecx # encoding: [0x01,0xf1]
   1913 ; X86-NEXT:    adcl %edx, %eax # encoding: [0x11,0xd0]
   1914 ; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
   1915 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1916 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1917 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   1918 ; X86-NEXT:    addl %ecx, %esi # encoding: [0x01,0xce]
   1919 ; X86-NEXT:    adcl %eax, %edx # encoding: [0x11,0xc2]
   1920 ; X86-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
   1921 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1922 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   1923 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   1924 ; X86-NEXT:    addl %esi, %edi # encoding: [0x01,0xf7]
   1925 ; X86-NEXT:    adcl %edx, %ecx # encoding: [0x11,0xd1]
   1926 ; X86-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
   1927 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   1928 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   1929 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   1930 ; X86-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   1931 ; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
   1932 ; X86-NEXT:    addl $-1, %eax # encoding: [0x83,0xc0,0xff]
   1933 ; X86-NEXT:    adcl $-1, %edx # encoding: [0x83,0xd2,0xff]
   1934 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   1935 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1936 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   1937 ; X86-NEXT:    .cfi_def_cfa_offset 4
   1938 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1939 ; X86-NEXT:    retl # encoding: [0xc3]
   1940 ;
   1941 ; X64-LABEL: test_ucmp_b_512:
   1942 ; X64:       # %bb.0:
   1943 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xc1]
   1944 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1945 ; X64-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x01]
   1946 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1947 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1948 ; X64-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x02]
   1949 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1950 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   1951 ; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xc1,0x04]
   1952 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1953 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   1954 ; X64-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x05]
   1955 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   1956 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   1957 ; X64-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xc1,0x06]
   1958 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   1959 ; X64-NEXT:    leaq -1(%rcx,%rax), %rax # encoding: [0x48,0x8d,0x44,0x01,0xff]
   1960 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1961 ; X64-NEXT:    retq # encoding: [0xc3]
   1962   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 -1)
   1963   %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 -1)
   1964   %ret1 = add i64 %res0, %res1
   1965   %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 -1)
   1966   %ret2 = add i64 %ret1, %res2
   1967   %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 -1)
   1968   %ret3 = add i64 %ret2, %res3
   1969   %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 -1)
   1970   %ret4 = add i64 %ret3, %res4
   1971   %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 -1)
   1972   %ret5 = add i64 %ret4, %res5
   1973   %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 -1)
   1974   %ret6 = add i64 %ret5, %res6
   1975   %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 -1)
   1976   %ret7 = add i64 %ret6, %res7
   1977   ret i64 %ret7
   1978 }
   1979 
   1980 define i64 @test_mask_x86_avx512_ucmp_b_512(<64 x i8> %a0, <64 x i8> %a1, i64 %mask) {
   1981 ; X86-LABEL: test_mask_x86_avx512_ucmp_b_512:
   1982 ; X86:       # %bb.0:
   1983 ; X86-NEXT:    pushl %ebp # encoding: [0x55]
   1984 ; X86-NEXT:    .cfi_def_cfa_offset 8
   1985 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   1986 ; X86-NEXT:    .cfi_def_cfa_offset 12
   1987 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   1988 ; X86-NEXT:    .cfi_def_cfa_offset 16
   1989 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   1990 ; X86-NEXT:    .cfi_def_cfa_offset 20
   1991 ; X86-NEXT:    .cfi_offset %esi, -20
   1992 ; X86-NEXT:    .cfi_offset %edi, -16
   1993 ; X86-NEXT:    .cfi_offset %ebx, -12
   1994 ; X86-NEXT:    .cfi_offset %ebp, -8
   1995 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %esi # encoding: [0x8b,0x74,0x24,0x14]
   1996 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x18]
   1997 ; X86-NEXT:    vpcmpeqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf1,0x7d,0x48,0x74,0xd1]
   1998 ; X86-NEXT:    kmovd %esi, %k0 # encoding: [0xc5,0xfb,0x92,0xc6]
   1999 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2000 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2001 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   2002 ; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   2003 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   2004 ; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
   2005 ; X86-NEXT:    vpcmpltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x01]
   2006 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2007 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   2008 ; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
   2009 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   2010 ; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
   2011 ; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
   2012 ; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
   2013 ; X86-NEXT:    vpcmpleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x02]
   2014 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2015 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   2016 ; X86-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   2017 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   2018 ; X86-NEXT:    kmovd %k2, %edx # encoding: [0xc5,0xfb,0x93,0xd2]
   2019 ; X86-NEXT:    addl %ebx, %edx # encoding: [0x01,0xda]
   2020 ; X86-NEXT:    adcl %edi, %eax # encoding: [0x11,0xf8]
   2021 ; X86-NEXT:    vpcmpneqb %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3f,0xd1,0x04]
   2022 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2023 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   2024 ; X86-NEXT:    kmovd %k3, %edi # encoding: [0xc5,0xfb,0x93,0xfb]
   2025 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   2026 ; X86-NEXT:    kmovd %k2, %ebx # encoding: [0xc5,0xfb,0x93,0xda]
   2027 ; X86-NEXT:    addl %edx, %ebx # encoding: [0x01,0xd3]
   2028 ; X86-NEXT:    adcl %eax, %edi # encoding: [0x11,0xc7]
   2029 ; X86-NEXT:    vpcmpnltub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x05]
   2030 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2031 ; X86-NEXT:    kandd %k1, %k3, %k3 # encoding: [0xc4,0xe1,0xe5,0x41,0xd9]
   2032 ; X86-NEXT:    kmovd %k3, %ebp # encoding: [0xc5,0xfb,0x93,0xeb]
   2033 ; X86-NEXT:    kandd %k0, %k2, %k2 # encoding: [0xc4,0xe1,0xed,0x41,0xd0]
   2034 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   2035 ; X86-NEXT:    addl %ebx, %ecx # encoding: [0x01,0xd9]
   2036 ; X86-NEXT:    adcl %edi, %ebp # encoding: [0x11,0xfd]
   2037 ; X86-NEXT:    vpcmpnleub %zmm1, %zmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x48,0x3e,0xd1,0x06]
   2038 ; X86-NEXT:    kshiftrq $32, %k2, %k3 # encoding: [0xc4,0xe3,0xf9,0x31,0xda,0x20]
   2039 ; X86-NEXT:    kandd %k1, %k3, %k1 # encoding: [0xc4,0xe1,0xe5,0x41,0xc9]
   2040 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   2041 ; X86-NEXT:    kandd %k0, %k2, %k0 # encoding: [0xc4,0xe1,0xed,0x41,0xc0]
   2042 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2043 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2044 ; X86-NEXT:    adcl %ebp, %edx # encoding: [0x11,0xea]
   2045 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   2046 ; X86-NEXT:    adcl {{[0-9]+}}(%esp), %edx # encoding: [0x13,0x54,0x24,0x18]
   2047 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   2048 ; X86-NEXT:    .cfi_def_cfa_offset 16
   2049 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   2050 ; X86-NEXT:    .cfi_def_cfa_offset 12
   2051 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   2052 ; X86-NEXT:    .cfi_def_cfa_offset 8
   2053 ; X86-NEXT:    popl %ebp # encoding: [0x5d]
   2054 ; X86-NEXT:    .cfi_def_cfa_offset 4
   2055 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2056 ; X86-NEXT:    retl # encoding: [0xc3]
   2057 ;
   2058 ; X64-LABEL: test_mask_x86_avx512_ucmp_b_512:
   2059 ; X64:       # %bb.0:
   2060 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   2061 ; X64-NEXT:    vpcmpeqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x74,0xc1]
   2062 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2063 ; X64-NEXT:    vpcmpltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x01]
   2064 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   2065 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   2066 ; X64-NEXT:    vpcmpleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x02]
   2067 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2068 ; X64-NEXT:    addq %rcx, %rax # encoding: [0x48,0x01,0xc8]
   2069 ; X64-NEXT:    vpcmpneqb %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3f,0xc1,0x04]
   2070 ; X64-NEXT:    kmovq %k0, %rcx # encoding: [0xc4,0xe1,0xfb,0x93,0xc8]
   2071 ; X64-NEXT:    addq %rax, %rcx # encoding: [0x48,0x01,0xc1]
   2072 ; X64-NEXT:    vpcmpnltub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x05]
   2073 ; X64-NEXT:    kmovq %k0, %rdx # encoding: [0xc4,0xe1,0xfb,0x93,0xd0]
   2074 ; X64-NEXT:    addq %rcx, %rdx # encoding: [0x48,0x01,0xca]
   2075 ; X64-NEXT:    vpcmpnleub %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x3e,0xc1,0x06]
   2076 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2077 ; X64-NEXT:    addq %rdx, %rax # encoding: [0x48,0x01,0xd0]
   2078 ; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
   2079 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2080 ; X64-NEXT:    retq # encoding: [0xc3]
   2081   %res0 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 0, i64 %mask)
   2082   %res1 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 1, i64 %mask)
   2083   %ret1 = add i64 %res0, %res1
   2084   %res2 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 2, i64 %mask)
   2085   %ret2 = add i64 %ret1, %res2
   2086   %res3 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 3, i64 %mask)
   2087   %ret3 = add i64 %ret2, %res3
   2088   %res4 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 4, i64 %mask)
   2089   %ret4 = add i64 %ret3, %res4
   2090   %res5 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 5, i64 %mask)
   2091   %ret5 = add i64 %ret4, %res5
   2092   %res6 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 6, i64 %mask)
   2093   %ret6 = add i64 %ret5, %res6
   2094   %res7 = call i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8> %a0, <64 x i8> %a1, i32 7, i64 %mask)
   2095   %ret7 = add i64 %ret6, %res7
   2096   ret i64 %ret7
   2097 }
   2098 
   2099 declare i64 @llvm.x86.avx512.mask.ucmp.b.512(<64 x i8>, <64 x i8>, i32, i64) nounwind readnone
   2100 
   2101 define i32 @test_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
   2102 ; X86-LABEL: test_cmp_w_512:
   2103 ; X86:       # %bb.0:
   2104 ; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
   2105 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2106 ; X86-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
   2107 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2108 ; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2109 ; X86-NEXT:    vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
   2110 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2111 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2112 ; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
   2113 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2114 ; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2115 ; X86-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
   2116 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2117 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2118 ; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
   2119 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2120 ; X86-NEXT:    leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
   2121 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2122 ; X86-NEXT:    retl # encoding: [0xc3]
   2123 ;
   2124 ; X64-LABEL: test_cmp_w_512:
   2125 ; X64:       # %bb.0:
   2126 ; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
   2127 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2128 ; X64-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 # encoding: [0x62,0xf1,0x75,0x48,0x65,0xc0]
   2129 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2130 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2131 ; X64-NEXT:    vpcmplew %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x02]
   2132 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2133 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2134 ; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
   2135 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2136 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2137 ; X64-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x05]
   2138 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2139 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2140 ; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x65,0xc1]
   2141 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2142 ; X64-NEXT:    leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
   2143 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2144 ; X64-NEXT:    retq # encoding: [0xc3]
   2145   %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
   2146   %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
   2147   %ret1 = add i32 %res0, %res1
   2148   %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
   2149   %ret2 = add i32 %ret1, %res2
   2150   %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
   2151   %ret3 = add i32 %ret2, %res3
   2152   %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
   2153   %ret4 = add i32 %ret3, %res4
   2154   %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
   2155   %ret5 = add i32 %ret4, %res5
   2156   %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
   2157   %ret6 = add i32 %ret5, %res6
   2158   %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
   2159   %ret7 = add i32 %ret6, %res7
   2160   ret i32 %ret7
   2161 }
   2162 
   2163 define i32 @test_mask_cmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
   2164 ; X86-LABEL: test_mask_cmp_w_512:
   2165 ; X86:       # %bb.0:
   2166 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   2167 ; X86-NEXT:    .cfi_def_cfa_offset 8
   2168 ; X86-NEXT:    .cfi_offset %esi, -8
   2169 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
   2170 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   2171 ; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
   2172 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2173 ; X86-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
   2174 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2175 ; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
   2176 ; X86-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
   2177 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2178 ; X86-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
   2179 ; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
   2180 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2181 ; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
   2182 ; X86-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
   2183 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   2184 ; X86-NEXT:    addl %edx, %esi # encoding: [0x01,0xd6]
   2185 ; X86-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
   2186 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2187 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   2188 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2189 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   2190 ; X86-NEXT:    .cfi_def_cfa_offset 4
   2191 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2192 ; X86-NEXT:    retl # encoding: [0xc3]
   2193 ;
   2194 ; X64-LABEL: test_mask_cmp_w_512:
   2195 ; X64:       # %bb.0:
   2196 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2197 ; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
   2198 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2199 ; X64-NEXT:    vpcmpgtw %zmm0, %zmm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x65,0xc0]
   2200 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2201 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2202 ; X64-NEXT:    vpcmplew %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x02]
   2203 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2204 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2205 ; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
   2206 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2207 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2208 ; X64-NEXT:    vpcmpnltw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x05]
   2209 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2210 ; X64-NEXT:    addl %ecx, %edx # encoding: [0x01,0xca]
   2211 ; X64-NEXT:    vpcmpgtw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x65,0xc1]
   2212 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2213 ; X64-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
   2214 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   2215 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2216 ; X64-NEXT:    retq # encoding: [0xc3]
   2217   %res0 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
   2218   %res1 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
   2219   %ret1 = add i32 %res0, %res1
   2220   %res2 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
   2221   %ret2 = add i32 %ret1, %res2
   2222   %res3 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
   2223   %ret3 = add i32 %ret2, %res3
   2224   %res4 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
   2225   %ret4 = add i32 %ret3, %res4
   2226   %res5 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
   2227   %ret5 = add i32 %ret4, %res5
   2228   %res6 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
   2229   %ret6 = add i32 %ret5, %res6
   2230   %res7 = call i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
   2231   %ret7 = add i32 %ret6, %res7
   2232   ret i32 %ret7
   2233 }
   2234 
   2235 declare i32 @llvm.x86.avx512.mask.cmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
   2236 
   2237 define i32 @test_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1) {
   2238 ; X86-LABEL: test_ucmp_w_512:
   2239 ; X86:       # %bb.0:
   2240 ; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
   2241 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2242 ; X86-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
   2243 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2244 ; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2245 ; X86-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
   2246 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2247 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2248 ; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
   2249 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2250 ; X86-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2251 ; X86-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
   2252 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2253 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2254 ; X86-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
   2255 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2256 ; X86-NEXT:    leal -1(%ecx,%eax), %eax # encoding: [0x8d,0x44,0x01,0xff]
   2257 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2258 ; X86-NEXT:    retl # encoding: [0xc3]
   2259 ;
   2260 ; X64-LABEL: test_ucmp_w_512:
   2261 ; X64:       # %bb.0:
   2262 ; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x48,0x75,0xc1]
   2263 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2264 ; X64-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x01]
   2265 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2266 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2267 ; X64-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x02]
   2268 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2269 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2270 ; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3f,0xc1,0x04]
   2271 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2272 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2273 ; X64-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x05]
   2274 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2275 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2276 ; X64-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf3,0xfd,0x48,0x3e,0xc1,0x06]
   2277 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2278 ; X64-NEXT:    leal -1(%rcx,%rax), %eax # encoding: [0x8d,0x44,0x01,0xff]
   2279 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2280 ; X64-NEXT:    retq # encoding: [0xc3]
   2281   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 -1)
   2282   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 -1)
   2283   %ret1 = add i32 %res0, %res1
   2284   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 -1)
   2285   %ret2 = add i32 %ret1, %res2
   2286   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 -1)
   2287   %ret3 = add i32 %ret2, %res3
   2288   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 -1)
   2289   %ret4 = add i32 %ret3, %res4
   2290   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 -1)
   2291   %ret5 = add i32 %ret4, %res5
   2292   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 -1)
   2293   %ret6 = add i32 %ret5, %res6
   2294   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 -1)
   2295   %ret7 = add i32 %ret6, %res7
   2296   ret i32 %ret7
   2297 }
   2298 
   2299 define i32 @test_mask_ucmp_w_512(<32 x i16> %a0, <32 x i16> %a1, i32 %mask) {
   2300 ; X86-LABEL: test_mask_ucmp_w_512:
   2301 ; X86:       # %bb.0:
   2302 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   2303 ; X86-NEXT:    .cfi_def_cfa_offset 8
   2304 ; X86-NEXT:    .cfi_offset %esi, -8
   2305 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x08]
   2306 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   2307 ; X86-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
   2308 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2309 ; X86-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
   2310 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2311 ; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
   2312 ; X86-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
   2313 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2314 ; X86-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
   2315 ; X86-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
   2316 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2317 ; X86-NEXT:    addl %eax, %edx # encoding: [0x01,0xc2]
   2318 ; X86-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
   2319 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   2320 ; X86-NEXT:    addl %edx, %esi # encoding: [0x01,0xd6]
   2321 ; X86-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
   2322 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2323 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   2324 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2325 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   2326 ; X86-NEXT:    .cfi_def_cfa_offset 4
   2327 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2328 ; X86-NEXT:    retl # encoding: [0xc3]
   2329 ;
   2330 ; X64-LABEL: test_mask_ucmp_w_512:
   2331 ; X64:       # %bb.0:
   2332 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2333 ; X64-NEXT:    vpcmpeqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x75,0xc1]
   2334 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2335 ; X64-NEXT:    vpcmpltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x01]
   2336 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2337 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2338 ; X64-NEXT:    vpcmpleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x02]
   2339 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2340 ; X64-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2341 ; X64-NEXT:    vpcmpneqw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3f,0xc1,0x04]
   2342 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2343 ; X64-NEXT:    addl %eax, %ecx # encoding: [0x01,0xc1]
   2344 ; X64-NEXT:    vpcmpnltuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x05]
   2345 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   2346 ; X64-NEXT:    addl %ecx, %edx # encoding: [0x01,0xca]
   2347 ; X64-NEXT:    vpcmpnleuw %zmm1, %zmm0, %k0 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0x3e,0xc1,0x06]
   2348 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2349 ; X64-NEXT:    addl %edx, %eax # encoding: [0x01,0xd0]
   2350 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   2351 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2352 ; X64-NEXT:    retq # encoding: [0xc3]
   2353   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 0, i32 %mask)
   2354   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 1, i32 %mask)
   2355   %ret1 = add i32 %res0, %res1
   2356   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 2, i32 %mask)
   2357   %ret2 = add i32 %ret1, %res2
   2358   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 3, i32 %mask)
   2359   %ret3 = add i32 %ret2, %res3
   2360   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 4, i32 %mask)
   2361   %ret4 = add i32 %ret3, %res4
   2362   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 5, i32 %mask)
   2363   %ret5 = add i32 %ret4, %res5
   2364   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 6, i32 %mask)
   2365   %ret6 = add i32 %ret5, %res6
   2366   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16> %a0, <32 x i16> %a1, i32 7, i32 %mask)
   2367   %ret7 = add i32 %ret6, %res7
   2368   ret i32 %ret7
   2369 }
   2370 
   2371 declare i32 @llvm.x86.avx512.mask.ucmp.w.512(<32 x i16>, <32 x i16>, i32, i32) nounwind readnone
   2372 
   2373 
   2374 declare <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8>, <64 x i8>, <64 x i8>, i64)
   2375 
   2376 define <64 x i8>@mm512_avg_epu8(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3) {
   2377 ; X86-LABEL: mm512_avg_epu8:
   2378 ; X86:       # %bb.0:
   2379 ; X86-NEXT:    vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
   2380 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   2381 ; X86-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
   2382 ; X86-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
   2383 ; X86-NEXT:    retl # encoding: [0xc3]
   2384 ;
   2385 ; X64-LABEL: mm512_avg_epu8:
   2386 ; X64:       # %bb.0:
   2387 ; X64-NEXT:    vpavgb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe0,0xd9]
   2388 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   2389 ; X64-NEXT:    vpavgb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe0,0xd1]
   2390 ; X64-NEXT:    vpaddb %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc3]
   2391 ; X64-NEXT:    retq # encoding: [0xc3]
   2392   %res = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %x3)
   2393   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pavg.b.512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 -1)
   2394   %res2 = add <64 x i8> %res, %res1
   2395   ret <64 x i8> %res2
   2396 }
   2397 
   2398 
   2399 declare <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2400 
   2401 define <32 x i16>@mm512_avg_epu16(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2402 ; X86-LABEL: mm512_avg_epu16:
   2403 ; X86:       # %bb.0:
   2404 ; X86-NEXT:    vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
   2405 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2406 ; X86-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
   2407 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2408 ; X86-NEXT:    retl # encoding: [0xc3]
   2409 ;
   2410 ; X64-LABEL: mm512_avg_epu16:
   2411 ; X64:       # %bb.0:
   2412 ; X64-NEXT:    vpavgw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe3,0xd9]
   2413 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2414 ; X64-NEXT:    vpavgw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe3,0xd1]
   2415 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2416 ; X64-NEXT:    retq # encoding: [0xc3]
   2417   %res = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2418   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pavg.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2419   %res2 = add <32 x i16> %res, %res1
   2420   ret <32 x i16> %res2
   2421 }
   2422 
   2423 declare <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16>, <32 x i16>, i32)
   2424 
   2425 define <32 x i16>@test_int_x86_avx512_mask_pabs_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
   2426 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_512:
   2427 ; X86:       # %bb.0:
   2428 ; X86-NEXT:    vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0]
   2429 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2430 ; X86-NEXT:    vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
   2431 ; X86-NEXT:    vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2]
   2432 ; X86-NEXT:    retl # encoding: [0xc3]
   2433 ;
   2434 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_512:
   2435 ; X64:       # %bb.0:
   2436 ; X64-NEXT:    vpabsw %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1d,0xd0]
   2437 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2438 ; X64-NEXT:    vpabsw %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1d,0xc8]
   2439 ; X64-NEXT:    vpaddw %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc2]
   2440 ; X64-NEXT:    retq # encoding: [0xc3]
   2441   %res = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
   2442   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pabs.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 -1)
   2443   %res2 = add <32 x i16> %res, %res1
   2444   ret <32 x i16> %res2
   2445 }
   2446 
   2447 declare <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8>, <64 x i8>, i64)
   2448 
   2449 define <64 x i8>@test_int_x86_avx512_mask_pabs_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
   2450 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_512:
   2451 ; X86:       # %bb.0:
   2452 ; X86-NEXT:    vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0]
   2453 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   2454 ; X86-NEXT:    vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
   2455 ; X86-NEXT:    vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2]
   2456 ; X86-NEXT:    retl # encoding: [0xc3]
   2457 ;
   2458 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_512:
   2459 ; X64:       # %bb.0:
   2460 ; X64-NEXT:    vpabsb %zmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x1c,0xd0]
   2461 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   2462 ; X64-NEXT:    vpabsb %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x1c,0xc8]
   2463 ; X64-NEXT:    vpaddb %zmm2, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc2]
   2464 ; X64-NEXT:    retq # encoding: [0xc3]
   2465   %res = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
   2466   %res1 = call <64 x i8> @llvm.x86.avx512.mask.pabs.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 -1)
   2467   %res2 = add <64 x i8> %res, %res1
   2468   ret <64 x i8> %res2
   2469 }
   2470 
   2471 declare i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8>, <64 x i8>, i64)
   2472 
   2473 define i64@test_int_x86_avx512_ptestm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
   2474 ; X86-LABEL: test_int_x86_avx512_ptestm_b_512:
   2475 ; X86:       # %bb.0:
   2476 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   2477 ; X86-NEXT:    .cfi_def_cfa_offset 8
   2478 ; X86-NEXT:    .cfi_offset %esi, -8
   2479 ; X86-NEXT:    vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
   2480 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   2481 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   2482 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
   2483 ; X86-NEXT:    andl %ecx, %edx # encoding: [0x21,0xca]
   2484 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   2485 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   2486 ; X86-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
   2487 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   2488 ; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
   2489 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   2490 ; X86-NEXT:    .cfi_def_cfa_offset 4
   2491 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2492 ; X86-NEXT:    retl # encoding: [0xc3]
   2493 ;
   2494 ; X64-LABEL: test_int_x86_avx512_ptestm_b_512:
   2495 ; X64:       # %bb.0:
   2496 ; X64-NEXT:    vptestmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x48,0x26,0xc1]
   2497 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2498 ; X64-NEXT:    andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
   2499 ; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
   2500 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2501 ; X64-NEXT:    retq # encoding: [0xc3]
   2502   %res = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
   2503   %res1 = call i64 @llvm.x86.avx512.ptestm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
   2504   %res2 = add i64 %res, %res1
   2505   ret i64 %res2
   2506 }
   2507 
   2508 declare i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16>, <32 x i16>, i32)
   2509 
   2510 define i32@test_int_x86_avx512_ptestm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
   2511 ; X86-LABEL: test_int_x86_avx512_ptestm_w_512:
   2512 ; X86:       # %bb.0:
   2513 ; X86-NEXT:    vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
   2514 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2515 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2516 ; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
   2517 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2518 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2519 ; X86-NEXT:    retl # encoding: [0xc3]
   2520 ;
   2521 ; X64-LABEL: test_int_x86_avx512_ptestm_w_512:
   2522 ; X64:       # %bb.0:
   2523 ; X64-NEXT:    vptestmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x48,0x26,0xc1]
   2524 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2525 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   2526 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   2527 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2528 ; X64-NEXT:    retq # encoding: [0xc3]
   2529   %res = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
   2530   %res1 = call i32 @llvm.x86.avx512.ptestm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
   2531   %res2 = add i32 %res, %res1
   2532   ret i32 %res2
   2533 }
   2534 
   2535 declare i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8>, <64 x i8>, i64 %x2)
   2536 
   2537 define i64@test_int_x86_avx512_ptestnm_b_512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2) {
   2538 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_512:
   2539 ; X86:       # %bb.0:
   2540 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   2541 ; X86-NEXT:    .cfi_def_cfa_offset 8
   2542 ; X86-NEXT:    .cfi_offset %esi, -8
   2543 ; X86-NEXT:    vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
   2544 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   2545 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   2546 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %edx # encoding: [0x8b,0x54,0x24,0x0c]
   2547 ; X86-NEXT:    andl %ecx, %edx # encoding: [0x21,0xca]
   2548 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   2549 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   2550 ; X86-NEXT:    andl %esi, %eax # encoding: [0x21,0xf0]
   2551 ; X86-NEXT:    addl %esi, %eax # encoding: [0x01,0xf0]
   2552 ; X86-NEXT:    adcl %ecx, %edx # encoding: [0x11,0xca]
   2553 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   2554 ; X86-NEXT:    .cfi_def_cfa_offset 4
   2555 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2556 ; X86-NEXT:    retl # encoding: [0xc3]
   2557 ;
   2558 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_512:
   2559 ; X64:       # %bb.0:
   2560 ; X64-NEXT:    vptestnmb %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x26,0xc1]
   2561 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2562 ; X64-NEXT:    andq %rax, %rdi # encoding: [0x48,0x21,0xc7]
   2563 ; X64-NEXT:    addq %rdi, %rax # encoding: [0x48,0x01,0xf8]
   2564 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2565 ; X64-NEXT:    retq # encoding: [0xc3]
   2566   %res = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64 %x2)
   2567   %res1 = call i64 @llvm.x86.avx512.ptestnm.b.512(<64 x i8> %x0, <64 x i8> %x1, i64-1)
   2568   %res2 = add i64 %res, %res1
   2569   ret i64 %res2
   2570 }
   2571 
   2572 declare i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16>, <32 x i16>, i32 %x2)
   2573 
   2574 define i32@test_int_x86_avx512_ptestnm_w_512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2) {
   2575 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_512:
   2576 ; X86:       # %bb.0:
   2577 ; X86-NEXT:    vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
   2578 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   2579 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2580 ; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
   2581 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   2582 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2583 ; X86-NEXT:    retl # encoding: [0xc3]
   2584 ;
   2585 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_512:
   2586 ; X64:       # %bb.0:
   2587 ; X64-NEXT:    vptestnmw %zmm1, %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x26,0xc1]
   2588 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2589 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   2590 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   2591 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2592 ; X64-NEXT:    retq # encoding: [0xc3]
   2593   %res = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32 %x2)
   2594   %res1 = call i32 @llvm.x86.avx512.ptestnm.w.512(<32 x i16> %x0, <32 x i16> %x1, i32-1)
   2595   %res2 = add i32 %res, %res1
   2596   ret i32 %res2
   2597 }
   2598 
   2599 declare i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8>)
   2600 
   2601 define i64@test_int_x86_avx512_cvtb2mask_512(<64 x i8> %x0) {
   2602 ; X86-LABEL: test_int_x86_avx512_cvtb2mask_512:
   2603 ; X86:       # %bb.0:
   2604 ; X86-NEXT:    vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
   2605 ; X86-NEXT:    kshiftrq $32, %k0, %k1 # encoding: [0xc4,0xe3,0xf9,0x31,0xc8,0x20]
   2606 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2607 ; X86-NEXT:    kmovd %k1, %edx # encoding: [0xc5,0xfb,0x93,0xd1]
   2608 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2609 ; X86-NEXT:    retl # encoding: [0xc3]
   2610 ;
   2611 ; X64-LABEL: test_int_x86_avx512_cvtb2mask_512:
   2612 ; X64:       # %bb.0:
   2613 ; X64-NEXT:    vpmovb2m %zmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x48,0x29,0xc0]
   2614 ; X64-NEXT:    kmovq %k0, %rax # encoding: [0xc4,0xe1,0xfb,0x93,0xc0]
   2615 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2616 ; X64-NEXT:    retq # encoding: [0xc3]
   2617     %res = call i64 @llvm.x86.avx512.cvtb2mask.512(<64 x i8> %x0)
   2618     ret i64 %res
   2619 }
   2620 
   2621 declare i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16>)
   2622 
   2623 define i32@test_int_x86_avx512_cvtw2mask_512(<32 x i16> %x0) {
   2624 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_512:
   2625 ; CHECK:       # %bb.0:
   2626 ; CHECK-NEXT:    vpmovw2m %zmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x48,0x29,0xc0]
   2627 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   2628 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   2629 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2630     %res = call i32 @llvm.x86.avx512.cvtw2mask.512(<32 x i16> %x0)
   2631     ret i32 %res
   2632 }
   2633 
   2634 declare <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2635 
   2636 define <32 x i16>@test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2637 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
   2638 ; X86:       # %bb.0:
   2639 ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
   2640 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2641 ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
   2642 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2643 ; X86-NEXT:    retl # encoding: [0xc3]
   2644 ;
   2645 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
   2646 ; X64:       # %bb.0:
   2647 ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
   2648 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2649 ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
   2650 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2651 ; X64-NEXT:    retq # encoding: [0xc3]
   2652   %res = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2653   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2654   %res2 = add <32 x i16> %res, %res1
   2655   ret <32 x i16> %res2
   2656 }
   2657 
   2658 declare <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2659 
   2660 define <32 x i16>@test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2661 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
   2662 ; X86:       # %bb.0:
   2663 ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
   2664 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2665 ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
   2666 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2667 ; X86-NEXT:    retl # encoding: [0xc3]
   2668 ;
   2669 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
   2670 ; X64:       # %bb.0:
   2671 ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
   2672 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2673 ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
   2674 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2675 ; X64-NEXT:    retq # encoding: [0xc3]
   2676   %res = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2677   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2678   %res2 = add <32 x i16> %res, %res1
   2679   ret <32 x i16> %res2
   2680 }
   2681 
   2682 declare <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2683 
   2684 define <32 x i16>@test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2685 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
   2686 ; X86:       # %bb.0:
   2687 ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
   2688 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2689 ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
   2690 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2691 ; X86-NEXT:    retl # encoding: [0xc3]
   2692 ;
   2693 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
   2694 ; X64:       # %bb.0:
   2695 ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
   2696 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2697 ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
   2698 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2699 ; X64-NEXT:    retq # encoding: [0xc3]
   2700   %res = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2701   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2702   %res2 = add <32 x i16> %res, %res1
   2703   ret <32 x i16> %res2
   2704 }
   2705 
   2706 declare <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8>, <64 x i8>, <32 x i16>, i32)
   2707 
   2708 define <32 x i16>@test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
   2709 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
   2710 ; X86:       # %bb.0:
   2711 ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
   2712 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2713 ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
   2714 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2715 ; X86-NEXT:    retl # encoding: [0xc3]
   2716 ;
   2717 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
   2718 ; X64:       # %bb.0:
   2719 ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
   2720 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2721 ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
   2722 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   2723 ; X64-NEXT:    retq # encoding: [0xc3]
   2724   %res = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3)
   2725   %res1 = call <32 x i16> @llvm.x86.avx512.mask.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 -1)
   2726   %res2 = add <32 x i16> %res, %res1
   2727   ret <32 x i16> %res2
   2728 }
   2729 
   2730 declare <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16>, <32 x i16>, <16 x i32>, i16)
   2731 
   2732 define <16 x i32>@test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
   2733 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
   2734 ; X86:       # %bb.0:
   2735 ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
   2736 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2737 ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
   2738 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2739 ; X86-NEXT:    retl # encoding: [0xc3]
   2740 ;
   2741 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
   2742 ; X64:       # %bb.0:
   2743 ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
   2744 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2745 ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
   2746 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   2747 ; X64-NEXT:    retq # encoding: [0xc3]
   2748   %res = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3)
   2749   %res1 = call <16 x i32> @llvm.x86.avx512.mask.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 -1)
   2750   %res2 = add <16 x i32> %res, %res1
   2751   ret <16 x i32> %res2
   2752 }
   2753 
   2754 declare <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2755 
   2756 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2757 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
   2758 ; X86:       # %bb.0:
   2759 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
   2760 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2761 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
   2762 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
   2763 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   2764 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   2765 ; X86-NEXT:    retl # encoding: [0xc3]
   2766 ;
   2767 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
   2768 ; X64:       # %bb.0:
   2769 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
   2770 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2771 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
   2772 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
   2773 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   2774 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   2775 ; X64-NEXT:    retq # encoding: [0xc3]
   2776   %res = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2777   %res1 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   2778   %res2 = call <32 x i16> @llvm.x86.avx512.mask.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2779   %res3 = add <32 x i16> %res, %res1
   2780   %res4 = add <32 x i16> %res3, %res2
   2781   ret <32 x i16> %res4
   2782 }
   2783 
   2784 declare <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2785 
   2786 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2787 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
   2788 ; X86:       # %bb.0:
   2789 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   2790 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
   2791 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2792 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
   2793 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2794 ; X86-NEXT:    retl # encoding: [0xc3]
   2795 ;
   2796 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
   2797 ; X64:       # %bb.0:
   2798 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   2799 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
   2800 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2801 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
   2802 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2803 ; X64-NEXT:    retq # encoding: [0xc3]
   2804   %res = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2805   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2806   %res2 = add <32 x i16> %res, %res1
   2807   ret <32 x i16> %res2
   2808 }
   2809 
   2810 declare <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2811 
   2812 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2813 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
   2814 ; X86:       # %bb.0:
   2815 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   2816 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
   2817 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2818 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
   2819 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2820 ; X86-NEXT:    retl # encoding: [0xc3]
   2821 ;
   2822 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
   2823 ; X64:       # %bb.0:
   2824 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
   2825 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
   2826 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2827 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
   2828 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2829 ; X64-NEXT:    retq # encoding: [0xc3]
   2830   %res = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2831   %res1 = call <32 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2832   %res2 = add <32 x i16> %res, %res1
   2833   ret <32 x i16> %res2
   2834 }
   2835 
   2836 declare <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2837 
   2838 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   2839 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
   2840 ; X86:       # %bb.0:
   2841 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   2842 ; X86-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
   2843 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2844 ; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
   2845 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2846 ; X86-NEXT:    retl # encoding: [0xc3]
   2847 ;
   2848 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
   2849 ; X64:       # %bb.0:
   2850 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   2851 ; X64-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
   2852 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2853 ; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
   2854 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   2855 ; X64-NEXT:    retq # encoding: [0xc3]
   2856   %res = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   2857   %res1 = call <32 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   2858   %res2 = add <32 x i16> %res, %res1
   2859   ret <32 x i16> %res2
   2860 }
   2861 
   2862 declare <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8>, <64 x i8>, i32, <32 x i16>, i32)
   2863 
   2864 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
   2865 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
   2866 ; X86:       # %bb.0:
   2867 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
   2868 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2869 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
   2870 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
   2871 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   2872 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   2873 ; X86-NEXT:    retl # encoding: [0xc3]
   2874 ;
   2875 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
   2876 ; X64:       # %bb.0:
   2877 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
   2878 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2879 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
   2880 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
   2881 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   2882 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   2883 ; X64-NEXT:    retq # encoding: [0xc3]
   2884   %res = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 %x4)
   2885   %res1 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> zeroinitializer, i32 %x4)
   2886   %res2 = call <32 x i16> @llvm.x86.avx512.mask.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2, <32 x i16> %x3, i32 -1)
   2887   %res3 = add <32 x i16> %res, %res1
   2888   %res4 = add <32 x i16> %res3, %res2
   2889   ret <32 x i16> %res4
   2890 }
   2891