Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw,+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
      6 
      7 define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
      8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
     11 ; X86-NEXT:    vpbroadcastb %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xc8]
     12 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
     13 ; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc0]
     14 ; X86-NEXT:    vpbroadcastb %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd0]
     15 ; X86-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
     16 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
     17 ; X86-NEXT:    retl # encoding: [0xc3]
     18 ;
     19 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
     20 ; X64:       # %bb.0:
     21 ; X64-NEXT:    vpbroadcastb %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xcf]
     22 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     23 ; X64-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
     24 ; X64-NEXT:    vpbroadcastb %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xd7]
     25 ; X64-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc2]
     26 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
     27 ; X64-NEXT:    retq # encoding: [0xc3]
     28   %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
     29   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
     30   %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
     31   %res3 = add <16 x i8> %res, %res1
     32   %res4 = add <16 x i8> %res2, %res3
     33   ret <16 x i8> %res4
     34 }
     35 
     36 
     37 declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
     38 
     39 define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
     40 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
     41 ; X86:       # %bb.0:
     42 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
     43 ; X86-NEXT:    vpbroadcastw %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xc8]
     44 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
     45 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
     46 ; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc0]
     47 ; X86-NEXT:    vpbroadcastw %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd0]
     48 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
     49 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
     50 ; X86-NEXT:    retl # encoding: [0xc3]
     51 ;
     52 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
     53 ; X64:       # %bb.0:
     54 ; X64-NEXT:    vpbroadcastw %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xcf]
     55 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     56 ; X64-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
     57 ; X64-NEXT:    vpbroadcastw %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xd7]
     58 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
     59 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
     60 ; X64-NEXT:    retq # encoding: [0xc3]
     61   %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
     62   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
     63   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
     64   %res3 = add <8 x i16> %res, %res1
     65   %res4 = add <8 x i16> %res2, %res3
     66   ret <8 x i16> %res4
     67 }
     68 
     69 
     70  declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
     71 
     72   define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
     73 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
     74 ; X86:       # %bb.0:
     75 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
     76 ; X86-NEXT:    vpbroadcastb %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xc8]
     77 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     78 ; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc0]
     79 ; X86-NEXT:    vpbroadcastb %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd0]
     80 ; X86-NEXT:    vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
     81 ; X86-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
     82 ; X86-NEXT:    retl # encoding: [0xc3]
     83 ;
     84 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
     85 ; X64:       # %bb.0:
     86 ; X64-NEXT:    vpbroadcastb %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xcf]
     87 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     88 ; X64-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
     89 ; X64-NEXT:    vpbroadcastb %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xd7]
     90 ; X64-NEXT:    vpaddb %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc2]
     91 ; X64-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
     92 ; X64-NEXT:    retq # encoding: [0xc3]
     93     %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
     94     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
     95     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
     96     %res3 = add <32 x i8> %res, %res1
     97     %res4 = add <32 x i8> %res2, %res3
     98     ret <32 x i8> %res4
     99   }
    100 
    101 
    102 
    103 declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
    104 
    105   define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
    106 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
    107 ; X86:       # %bb.0:
    108 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
    109 ; X86-NEXT:    vpbroadcastw %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xc8]
    110 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    111 ; X86-NEXT:    vpbroadcastw %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc0]
    112 ; X86-NEXT:    vpbroadcastw %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd0]
    113 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    114 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    115 ; X86-NEXT:    retl # encoding: [0xc3]
    116 ;
    117 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
    118 ; X64:       # %bb.0:
    119 ; X64-NEXT:    vpbroadcastw %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xcf]
    120 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    121 ; X64-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
    122 ; X64-NEXT:    vpbroadcastw %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xd7]
    123 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    124 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    125 ; X64-NEXT:    retq # encoding: [0xc3]
    126     %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
    127     %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
    128     %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
    129     %res3 = add <16 x i16> %res, %res1
    130     %res4 = add <16 x i16> %res2, %res3
    131     ret <16 x i16> %res4
    132   }
    133 
    134 declare <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8>, <32 x i8>, i32)
    135 
    136 define <32 x i8>@test_int_x86_avx512_pbroadcastb_256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask) {
    137 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_256:
    138 ; X86:       # %bb.0:
    139 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0]
    140 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    141 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
    142 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
    143 ; X86-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    144 ; X86-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    145 ; X86-NEXT:    retl # encoding: [0xc3]
    146 ;
    147 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_256:
    148 ; X64:       # %bb.0:
    149 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x78,0xd0]
    150 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    151 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x78,0xc8]
    152 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x78,0xc0]
    153 ; X64-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc0]
    154 ; X64-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    155 ; X64-NEXT:    retq # encoding: [0xc3]
    156   %res = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 -1)
    157   %res1 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> %x1, i32 %mask)
    158   %res2 = call <32 x i8> @llvm.x86.avx512.pbroadcastb.256(<16 x i8> %x0, <32 x i8> zeroinitializer, i32 %mask)
    159   %res3 = add <32 x i8> %res, %res1
    160   %res4 = add <32 x i8> %res2, %res3
    161   ret <32 x i8> %res4
    162 }
    163 
    164 declare <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8>, <16 x i8>, i16)
    165 
    166 define <16 x i8>@test_int_x86_avx512_pbroadcastb_128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask) {
    167 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_128:
    168 ; X86:       # %bb.0:
    169 ; X86-NEXT:    vpbroadcastb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0]
    170 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    171 ; X86-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
    172 ; X86-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
    173 ; X86-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    174 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
    175 ; X86-NEXT:    retl # encoding: [0xc3]
    176 ;
    177 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_128:
    178 ; X64:       # %bb.0:
    179 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x78,0xd0]
    180 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    181 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x78,0xc8]
    182 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x78,0xc0]
    183 ; X64-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc0]
    184 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
    185 ; X64-NEXT:    retq # encoding: [0xc3]
    186   %res = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
    187   %res1 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> %x1, i16 %mask)
    188   %res2 = call <16 x i8> @llvm.x86.avx512.pbroadcastb.128(<16 x i8> %x0, <16 x i8> zeroinitializer, i16 %mask)
    189   %res3 = add <16 x i8> %res, %res1
    190   %res4 = add <16 x i8> %res2, %res3
    191   ret <16 x i8> %res4
    192 }
    193 
    194 declare <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16>, <16 x i16>, i16)
    195 
    196 define <16 x i16>@test_int_x86_avx512_pbroadcastw_256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask) {
    197 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_256:
    198 ; X86:       # %bb.0:
    199 ; X86-NEXT:    vpbroadcastw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0]
    200 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    201 ; X86-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
    202 ; X86-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
    203 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    204 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
    205 ; X86-NEXT:    retl # encoding: [0xc3]
    206 ;
    207 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_256:
    208 ; X64:       # %bb.0:
    209 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x79,0xd0]
    210 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    211 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x79,0xc8]
    212 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x79,0xc0]
    213 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    214 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
    215 ; X64-NEXT:    retq # encoding: [0xc3]
    216   %res = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 -1)
    217   %res1 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> %x1, i16 %mask)
    218   %res2 = call <16 x i16> @llvm.x86.avx512.pbroadcastw.256(<8 x i16> %x0, <16 x i16> zeroinitializer, i16 %mask)
    219   %res3 = add <16 x i16> %res, %res1
    220   %res4 = add <16 x i16> %res2, %res3
    221   ret <16 x i16> %res4
    222 }
    223 
    224 declare <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16>, <8 x i16>, i8)
    225 
    226 define <8 x i16>@test_int_x86_avx512_pbroadcastw_128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask) {
    227 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_128:
    228 ; X86:       # %bb.0:
    229 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0]
    230 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    231 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    232 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
    233 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
    234 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    235 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
    236 ; X86-NEXT:    retl # encoding: [0xc3]
    237 ;
    238 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_128:
    239 ; X64:       # %bb.0:
    240 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x79,0xd0]
    241 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    242 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x79,0xc8]
    243 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x79,0xc0]
    244 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    245 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
    246 ; X64-NEXT:    retq # encoding: [0xc3]
    247   %res = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
    248   %res1 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> %x1, i8 %mask)
    249   %res2 = call <8 x i16> @llvm.x86.avx512.pbroadcastw.128(<8 x i16> %x0, <8 x i16> zeroinitializer, i8 %mask)
    250   %res3 = add <8 x i16> %res, %res1
    251   %res4 = add <8 x i16> %res2, %res3
    252   ret <8 x i16> %res4
    253 }
    254 
    255 declare <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8>, <64 x i8>, i64)
    256 
    257 define <64 x i8>@test_int_x86_avx512_pbroadcastb_512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask) {
    258 ; X86-LABEL: test_int_x86_avx512_pbroadcastb_512:
    259 ; X86:       # %bb.0:
    260 ; X86-NEXT:    vpbroadcastb %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0]
    261 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    262 ; X86-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
    263 ; X86-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
    264 ; X86-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
    265 ; X86-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
    266 ; X86-NEXT:    retl # encoding: [0xc3]
    267 ;
    268 ; X64-LABEL: test_int_x86_avx512_pbroadcastb_512:
    269 ; X64:       # %bb.0:
    270 ; X64-NEXT:    vpbroadcastb %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x78,0xd0]
    271 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    272 ; X64-NEXT:    vpbroadcastb %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x78,0xc8]
    273 ; X64-NEXT:    vpbroadcastb %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x78,0xc0]
    274 ; X64-NEXT:    vpaddb %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfc,0xc0]
    275 ; X64-NEXT:    vpaddb %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfc,0xc0]
    276 ; X64-NEXT:    retq # encoding: [0xc3]
    277   %res = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 -1)
    278   %res1 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> %x1, i64 %mask)
    279   %res2 = call <64 x i8> @llvm.x86.avx512.pbroadcastb.512(<16 x i8> %x0, <64 x i8> zeroinitializer, i64 %mask)
    280   %res3 = add <64 x i8> %res, %res1
    281   %res4 = add <64 x i8> %res2, %res3
    282   ret <64 x i8> %res4
    283 }
    284 
    285 declare <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16>, <32 x i16>, i32)
    286 
    287 define <32 x i16>@test_int_x86_avx512_pbroadcastw_512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask) {
    288 ; X86-LABEL: test_int_x86_avx512_pbroadcastw_512:
    289 ; X86:       # %bb.0:
    290 ; X86-NEXT:    vpbroadcastw %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0]
    291 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    292 ; X86-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
    293 ; X86-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
    294 ; X86-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    295 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    296 ; X86-NEXT:    retl # encoding: [0xc3]
    297 ;
    298 ; X64-LABEL: test_int_x86_avx512_pbroadcastw_512:
    299 ; X64:       # %bb.0:
    300 ; X64-NEXT:    vpbroadcastw %xmm0, %zmm2 # encoding: [0x62,0xf2,0x7d,0x48,0x79,0xd0]
    301 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    302 ; X64-NEXT:    vpbroadcastw %xmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x79,0xc8]
    303 ; X64-NEXT:    vpbroadcastw %xmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x79,0xc0]
    304 ; X64-NEXT:    vpaddw %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc0]
    305 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
    306 ; X64-NEXT:    retq # encoding: [0xc3]
    307   %res = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 -1)
    308   %res1 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> %x1, i32 %mask)
    309   %res2 = call <32 x i16> @llvm.x86.avx512.pbroadcastw.512(<8 x i16> %x0, <32 x i16> zeroinitializer, i32 %mask)
    310   %res3 = add <32 x i16> %res, %res1
    311   %res4 = add <32 x i16> %res2, %res3
    312   ret <32 x i16> %res4
    313 }
    314 
    315 declare void @llvm.x86.avx512.mask.storeu.b.128(i8*, <16 x i8>, i16)
    316 
    317 define void@test_int_x86_avx512_mask_storeu_b_128(i8* %ptr1, i8* %ptr2, <16 x i8> %x1, i16 %x2) {
    318 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_128:
    319 ; X86:       # %bb.0:
    320 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    321 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    322 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    323 ; X86-NEXT:    vmovdqu8 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x01]
    324 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
    325 ; X86-NEXT:    retl # encoding: [0xc3]
    326 ;
    327 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_128:
    328 ; X64:       # %bb.0:
    329 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    330 ; X64-NEXT:    vmovdqu8 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7f,0x07]
    331 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
    332 ; X64-NEXT:    retq # encoding: [0xc3]
    333   call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr1, <16 x i8> %x1, i16 %x2)
    334   call void @llvm.x86.avx512.mask.storeu.b.128(i8* %ptr2, <16 x i8> %x1, i16 -1)
    335   ret void
    336 }
    337 
    338 declare void @llvm.x86.avx512.mask.storeu.b.256(i8*, <32 x i8>, i32)
    339 
    340 define void@test_int_x86_avx512_mask_storeu_b_256(i8* %ptr1, i8* %ptr2, <32 x i8> %x1, i32 %x2) {
    341 ; X86-LABEL: test_int_x86_avx512_mask_storeu_b_256:
    342 ; X86:       # %bb.0:
    343 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    344 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    345 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
    346 ; X86-NEXT:    vmovdqu8 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x01]
    347 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
    348 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    349 ; X86-NEXT:    retl # encoding: [0xc3]
    350 ;
    351 ; X64-LABEL: test_int_x86_avx512_mask_storeu_b_256:
    352 ; X64:       # %bb.0:
    353 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    354 ; X64-NEXT:    vmovdqu8 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7f,0x07]
    355 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
    356 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    357 ; X64-NEXT:    retq # encoding: [0xc3]
    358   call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr1, <32 x i8> %x1, i32 %x2)
    359   call void @llvm.x86.avx512.mask.storeu.b.256(i8* %ptr2, <32 x i8> %x1, i32 -1)
    360   ret void
    361 }
    362 
    363 declare void @llvm.x86.avx512.mask.storeu.w.128(i8*, <8 x i16>, i8)
    364 
    365 define void@test_int_x86_avx512_mask_storeu_w_128(i8* %ptr1, i8* %ptr2, <8 x i16> %x1, i8 %x2) {
    366 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_128:
    367 ; X86:       # %bb.0:
    368 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    369 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    370 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    371 ; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    372 ; X86-NEXT:    vmovdqu16 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x01]
    373 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
    374 ; X86-NEXT:    retl # encoding: [0xc3]
    375 ;
    376 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_128:
    377 ; X64:       # %bb.0:
    378 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    379 ; X64-NEXT:    vmovdqu16 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x7f,0x07]
    380 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
    381 ; X64-NEXT:    retq # encoding: [0xc3]
    382   call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr1, <8 x i16> %x1, i8 %x2)
    383   call void @llvm.x86.avx512.mask.storeu.w.128(i8* %ptr2, <8 x i16> %x1, i8 -1)
    384   ret void
    385 }
    386 
    387 declare void @llvm.x86.avx512.mask.storeu.w.256(i8*, <16 x i16>, i16)
    388 
    389 define void@test_int_x86_avx512_mask_storeu_w_256(i8* %ptr1, i8* %ptr2, <16 x i16> %x1, i16 %x2) {
    390 ; X86-LABEL: test_int_x86_avx512_mask_storeu_w_256:
    391 ; X86:       # %bb.0:
    392 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    393 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    394 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    395 ; X86-NEXT:    vmovdqu16 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x01]
    396 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
    397 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    398 ; X86-NEXT:    retl # encoding: [0xc3]
    399 ;
    400 ; X64-LABEL: test_int_x86_avx512_mask_storeu_w_256:
    401 ; X64:       # %bb.0:
    402 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    403 ; X64-NEXT:    vmovdqu16 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x7f,0x07]
    404 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
    405 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    406 ; X64-NEXT:    retq # encoding: [0xc3]
    407   call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr1, <16 x i16> %x1, i16 %x2)
    408   call void @llvm.x86.avx512.mask.storeu.w.256(i8* %ptr2, <16 x i16> %x1, i16 -1)
    409   ret void
    410 }
    411 
    412 declare <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8*, <8 x i16>, i8)
    413 
    414 define <8 x i16>@test_int_x86_avx512_mask_loadu_w_128(i8* %ptr, i8* %ptr2, <8 x i16> %x1, i8 %mask) {
    415 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_128:
    416 ; X86:       # %bb.0:
    417 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    418 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    419 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
    420 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    421 ; X86-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    422 ; X86-NEXT:    vmovdqu16 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x00]
    423 ; X86-NEXT:    vmovdqu16 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x09]
    424 ; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
    425 ; X86-NEXT:    retl # encoding: [0xc3]
    426 ;
    427 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_128:
    428 ; X64:       # %bb.0:
    429 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
    430 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    431 ; X64-NEXT:    vmovdqu16 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x6f,0x06]
    432 ; X64-NEXT:    vmovdqu16 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x6f,0x0f]
    433 ; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
    434 ; X64-NEXT:    retq # encoding: [0xc3]
    435     %res0 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> %x1, i8 -1)
    436     %res = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr2, <8 x i16> %res0, i8 %mask)
    437     %res1 = call <8 x i16> @llvm.x86.avx512.mask.loadu.w.128(i8* %ptr, <8 x i16> zeroinitializer, i8 %mask)
    438     %res2 = add <8 x i16> %res, %res1
    439     ret <8 x i16> %res2
    440 }
    441 
    442 declare <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8*, <16 x i16>, i16)
    443 
    444 define <16 x i16>@test_int_x86_avx512_mask_loadu_w_256(i8* %ptr, i8* %ptr2, <16 x i16> %x1, i16 %mask) {
    445 ; X86-LABEL: test_int_x86_avx512_mask_loadu_w_256:
    446 ; X86:       # %bb.0:
    447 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    448 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    449 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
    450 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    451 ; X86-NEXT:    vmovdqu16 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x00]
    452 ; X86-NEXT:    vmovdqu16 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x09]
    453 ; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
    454 ; X86-NEXT:    retl # encoding: [0xc3]
    455 ;
    456 ; X64-LABEL: test_int_x86_avx512_mask_loadu_w_256:
    457 ; X64:       # %bb.0:
    458 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
    459 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    460 ; X64-NEXT:    vmovdqu16 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x6f,0x06]
    461 ; X64-NEXT:    vmovdqu16 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x6f,0x0f]
    462 ; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
    463 ; X64-NEXT:    retq # encoding: [0xc3]
    464     %res0 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> %x1, i16 -1)
    465     %res = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr2, <16 x i16> %res0, i16 %mask)
    466     %res1 = call <16 x i16> @llvm.x86.avx512.mask.loadu.w.256(i8* %ptr, <16 x i16> zeroinitializer, i16 %mask)
    467     %res2 = add <16 x i16> %res, %res1
    468     ret <16 x i16> %res2
    469 }
    470 
    471 declare <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8*, <16 x i8>, i16)
    472 
    473 define <16 x i8>@test_int_x86_avx512_mask_loadu_b_128(i8* %ptr, i8* %ptr2, <16 x i8> %x1, i16 %mask) {
    474 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_128:
    475 ; X86:       # %bb.0:
    476 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    477 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    478 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
    479 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x0c]
    480 ; X86-NEXT:    vmovdqu8 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x00]
    481 ; X86-NEXT:    vmovdqu8 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x09]
    482 ; X86-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
    483 ; X86-NEXT:    retl # encoding: [0xc3]
    484 ;
    485 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_128:
    486 ; X64:       # %bb.0:
    487 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
    488 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    489 ; X64-NEXT:    vmovdqu8 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x6f,0x06]
    490 ; X64-NEXT:    vmovdqu8 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x6f,0x0f]
    491 ; X64-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc1]
    492 ; X64-NEXT:    retq # encoding: [0xc3]
    493     %res0 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> %x1, i16 -1)
    494     %res = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr2, <16 x i8> %res0, i16 %mask)
    495     %res1 = call <16 x i8> @llvm.x86.avx512.mask.loadu.b.128(i8* %ptr, <16 x i8> zeroinitializer, i16 %mask)
    496     %res2 = add <16 x i8> %res, %res1
    497     ret <16 x i8> %res2
    498 }
    499 
    500 declare <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8*, <32 x i8>, i32)
    501 
    502 define <32 x i8>@test_int_x86_avx512_mask_loadu_b_256(i8* %ptr, i8* %ptr2, <32 x i8> %x1, i32 %mask) {
    503 ; X86-LABEL: test_int_x86_avx512_mask_loadu_b_256:
    504 ; X86:       # %bb.0:
    505 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    506 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    507 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
    508 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x0c]
    509 ; X86-NEXT:    vmovdqu8 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x00]
    510 ; X86-NEXT:    vmovdqu8 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x09]
    511 ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1]
    512 ; X86-NEXT:    retl # encoding: [0xc3]
    513 ;
    514 ; X64-LABEL: test_int_x86_avx512_mask_loadu_b_256:
    515 ; X64:       # %bb.0:
    516 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
    517 ; X64-NEXT:    kmovd %edx, %k1 # encoding: [0xc5,0xfb,0x92,0xca]
    518 ; X64-NEXT:    vmovdqu8 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x6f,0x06]
    519 ; X64-NEXT:    vmovdqu8 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x6f,0x0f]
    520 ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc1]
    521 ; X64-NEXT:    retq # encoding: [0xc3]
    522     %res0 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> %x1, i32 -1)
    523     %res = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr2, <32 x i8> %res0, i32 %mask)
    524     %res1 = call <32 x i8> @llvm.x86.avx512.mask.loadu.b.256(i8* %ptr, <32 x i8> zeroinitializer, i32 %mask)
    525     %res2 = add <32 x i8> %res, %res1
    526     ret <32 x i8> %res2
    527 }
    528 
    529 declare <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8>, <16 x i8>, i32, <16 x i8>, i16)
    530 
    531 define <16 x i8>@test_int_x86_avx512_mask_palignr_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x3, i16 %x4) {
    532 ; X86-LABEL: test_int_x86_avx512_mask_palignr_128:
    533 ; X86:       # %bb.0:
    534 ; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02]
    535 ; X86-NEXT:    # xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    536 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
    537 ; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
    538 ; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    539 ; X86-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
    540 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    541 ; X86-NEXT:    vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
    542 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
    543 ; X86-NEXT:    retl # encoding: [0xc3]
    544 ;
    545 ; X64-LABEL: test_int_x86_avx512_mask_palignr_128:
    546 ; X64:       # %bb.0:
    547 ; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x02]
    548 ; X64-NEXT:    # xmm3 = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    549 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    550 ; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x0f,0xd1,0x02]
    551 ; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    552 ; X64-NEXT:    vpalignr $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x0f,0xc1,0x02]
    553 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm0[0,1]
    554 ; X64-NEXT:    vpaddb %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfc,0xc3]
    555 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
    556 ; X64-NEXT:    retq # encoding: [0xc3]
    557   %res = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 %x4)
    558   %res1 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> zeroinitializer, i16 %x4)
    559   %res2 = call <16 x i8> @llvm.x86.avx512.mask.palignr.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <16 x i8> %x3, i16 -1)
    560   %res3 = add <16 x i8> %res, %res1
    561   %res4 = add <16 x i8> %res3, %res2
    562   ret <16 x i8> %res4
    563 }
    564 
    565 declare <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8>, <32 x i8>, i32, <32 x i8>, i32)
    566 
    567 define <32 x i8>@test_int_x86_avx512_mask_palignr_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x3, i32 %x4) {
    568 ; X86-LABEL: test_int_x86_avx512_mask_palignr_256:
    569 ; X86:       # %bb.0:
    570 ; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xd9,0x02]
    571 ; X86-NEXT:    # ymm3 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    572 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    573 ; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
    574 ; X86-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    575 ; X86-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
    576 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    577 ; X86-NEXT:    vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
    578 ; X86-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    579 ; X86-NEXT:    retl # encoding: [0xc3]
    580 ;
    581 ; X64-LABEL: test_int_x86_avx512_mask_palignr_256:
    582 ; X64:       # %bb.0:
    583 ; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x0f,0xd9,0x02]
    584 ; X64-NEXT:    # ymm3 = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    585 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    586 ; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x0f,0xd1,0x02]
    587 ; X64-NEXT:    # ymm2 {%k1} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    588 ; X64-NEXT:    vpalignr $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x0f,0xc1,0x02]
    589 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm1[2,3,4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1],ymm1[18,19,20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17]
    590 ; X64-NEXT:    vpaddb %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfc,0xc3]
    591 ; X64-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc0]
    592 ; X64-NEXT:    retq # encoding: [0xc3]
    593   %res = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 %x4)
    594   %res1 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> zeroinitializer, i32 %x4)
    595   %res2 = call <32 x i8> @llvm.x86.avx512.mask.palignr.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <32 x i8> %x3, i32 -1)
    596   %res3 = add <32 x i8> %res, %res1
    597   %res4 = add <32 x i8> %res3, %res2
    598   ret <32 x i8> %res4
    599 }
    600 
    601 declare <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16>, i32, <8 x i16>, i8)
    602 
    603 define <8 x i16>@test_int_x86_avx512_mask_pshufh_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
    604 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
    605 ; X86:       # %bb.0:
    606 ; X86-NEXT:    vpshufhw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03]
    607 ; X86-NEXT:    # xmm2 = xmm0[0,1,2,3,7,4,4,4]
    608 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    609 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    610 ; X86-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
    611 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
    612 ; X86-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
    613 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
    614 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
    615 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    616 ; X86-NEXT:    retl # encoding: [0xc3]
    617 ;
    618 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_128:
    619 ; X64:       # %bb.0:
    620 ; X64-NEXT:    vpshufhw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x70,0xd0,0x03]
    621 ; X64-NEXT:    # xmm2 = xmm0[0,1,2,3,7,4,4,4]
    622 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    623 ; X64-NEXT:    vpshufhw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x70,0xc8,0x03]
    624 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0,1,2,3,7,4,4,4]
    625 ; X64-NEXT:    vpshufhw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x70,0xc0,0x03]
    626 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,1,2,3,7,4,4,4]
    627 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
    628 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    629 ; X64-NEXT:    retq # encoding: [0xc3]
    630   %res = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
    631   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
    632   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufh.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
    633   %res3 = add <8 x i16> %res, %res1
    634   %res4 = add <8 x i16> %res3, %res2
    635   ret <8 x i16> %res4
    636 }
    637 
    638 declare <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16>, i32, <16 x i16>, i16)
    639 
    640 define <16 x i16>@test_int_x86_avx512_mask_pshufh_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
    641 ; X86-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
    642 ; X86:       # %bb.0:
    643 ; X86-NEXT:    vpshufhw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03]
    644 ; X86-NEXT:    # ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    645 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    646 ; X86-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
    647 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    648 ; X86-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
    649 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    650 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    651 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    652 ; X86-NEXT:    retl # encoding: [0xc3]
    653 ;
    654 ; X64-LABEL: test_int_x86_avx512_mask_pshufh_w_256:
    655 ; X64:       # %bb.0:
    656 ; X64-NEXT:    vpshufhw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x70,0xd0,0x03]
    657 ; X64-NEXT:    # ymm2 = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    658 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    659 ; X64-NEXT:    vpshufhw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x70,0xc8,0x03]
    660 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    661 ; X64-NEXT:    vpshufhw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x70,0xc0,0x03]
    662 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,2,3,7,4,4,4,8,9,10,11,15,12,12,12]
    663 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    664 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    665 ; X64-NEXT:    retq # encoding: [0xc3]
    666   %res = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
    667   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
    668   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufh.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
    669   %res3 = add <16 x i16> %res, %res1
    670   %res4 = add <16 x i16> %res3, %res2
    671   ret <16 x i16> %res4
    672 }
    673 
    674 declare <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16>, i32, <8 x i16>, i8)
    675 
    676 define <8 x i16>@test_int_x86_avx512_mask_pshufl_w_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
    677 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
    678 ; X86:       # %bb.0:
    679 ; X86-NEXT:    vpshuflw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03]
    680 ; X86-NEXT:    # xmm2 = xmm0[3,0,0,0,4,5,6,7]
    681 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    682 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
    683 ; X86-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
    684 ; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
    685 ; X86-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
    686 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
    687 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
    688 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    689 ; X86-NEXT:    retl # encoding: [0xc3]
    690 ;
    691 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_128:
    692 ; X64:       # %bb.0:
    693 ; X64-NEXT:    vpshuflw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x70,0xd0,0x03]
    694 ; X64-NEXT:    # xmm2 = xmm0[3,0,0,0,4,5,6,7]
    695 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    696 ; X64-NEXT:    vpshuflw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x70,0xc8,0x03]
    697 ; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0,4,5,6,7]
    698 ; X64-NEXT:    vpshuflw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0x89,0x70,0xc0,0x03]
    699 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0,4,5,6,7]
    700 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
    701 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
    702 ; X64-NEXT:    retq # encoding: [0xc3]
    703   %res = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
    704   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
    705   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pshufl.w.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
    706   %res3 = add <8 x i16> %res, %res1
    707   %res4 = add <8 x i16> %res3, %res2
    708   ret <8 x i16> %res4
    709 }
    710 
    711 declare <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16>, i32, <16 x i16>, i16)
    712 
    713 define <16 x i16>@test_int_x86_avx512_mask_pshufl_w_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
    714 ; X86-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
    715 ; X86:       # %bb.0:
    716 ; X86-NEXT:    vpshuflw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03]
    717 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    718 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
    719 ; X86-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
    720 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    721 ; X86-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
    722 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    723 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    724 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    725 ; X86-NEXT:    retl # encoding: [0xc3]
    726 ;
    727 ; X64-LABEL: test_int_x86_avx512_mask_pshufl_w_256:
    728 ; X64:       # %bb.0:
    729 ; X64-NEXT:    vpshuflw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x70,0xd0,0x03]
    730 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    731 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    732 ; X64-NEXT:    vpshuflw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x70,0xc8,0x03]
    733 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    734 ; X64-NEXT:    vpshuflw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7f,0xa9,0x70,0xc0,0x03]
    735 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,4,5,6,7,11,8,8,8,12,13,14,15]
    736 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
    737 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
    738 ; X64-NEXT:    retq # encoding: [0xc3]
    739   %res = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
    740   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
    741   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pshufl.w.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
    742   %res3 = add <16 x i16> %res, %res1
    743   %res4 = add <16 x i16> %res3, %res2
    744   ret <16 x i16> %res4
    745 }
    746 
    747 define i32 @test_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b) {
    748 ; CHECK-LABEL: test_pcmpeq_b_256:
    749 ; CHECK:       # %bb.0:
    750 ; CHECK-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
    751 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    752 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    753 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    754   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
    755   ret i32 %res
    756 }
    757 
    758 define i32 @test_mask_pcmpeq_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
    759 ; X86-LABEL: test_mask_pcmpeq_b_256:
    760 ; X86:       # %bb.0:
    761 ; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
    762 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    763 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    764 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    765 ; X86-NEXT:    retl # encoding: [0xc3]
    766 ;
    767 ; X64-LABEL: test_mask_pcmpeq_b_256:
    768 ; X64:       # %bb.0:
    769 ; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
    770 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    771 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    772 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    773 ; X64-NEXT:    retq # encoding: [0xc3]
    774   %res = call i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
    775   ret i32 %res
    776 }
    777 
    778 declare i32 @llvm.x86.avx512.mask.pcmpeq.b.256(<32 x i8>, <32 x i8>, i32)
    779 
    780 define i16 @test_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b) {
    781 ; CHECK-LABEL: test_pcmpeq_w_256:
    782 ; CHECK:       # %bb.0:
    783 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
    784 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    785 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
    786 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    787 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    788   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
    789   ret i16 %res
    790 }
    791 
    792 define i16 @test_mask_pcmpeq_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
    793 ; X86-LABEL: test_mask_pcmpeq_w_256:
    794 ; X86:       # %bb.0:
    795 ; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
    796 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    797 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
    798 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
    799 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    800 ; X86-NEXT:    retl # encoding: [0xc3]
    801 ;
    802 ; X64-LABEL: test_mask_pcmpeq_w_256:
    803 ; X64:       # %bb.0:
    804 ; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
    805 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    806 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    807 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    808 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    809 ; X64-NEXT:    retq # encoding: [0xc3]
    810   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
    811   ret i16 %res
    812 }
    813 
    814 declare i16 @llvm.x86.avx512.mask.pcmpeq.w.256(<16 x i16>, <16 x i16>, i16)
    815 
    816 define i32 @test_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b) {
    817 ; CHECK-LABEL: test_pcmpgt_b_256:
    818 ; CHECK:       # %bb.0:
    819 ; CHECK-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
    820 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    821 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    823   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 -1)
    824   ret i32 %res
    825 }
    826 
    827 define i32 @test_mask_pcmpgt_b_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
    828 ; X86-LABEL: test_mask_pcmpgt_b_256:
    829 ; X86:       # %bb.0:
    830 ; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
    831 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    832 ; X86-NEXT:    andl {{[0-9]+}}(%esp), %eax # encoding: [0x23,0x44,0x24,0x04]
    833 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    834 ; X86-NEXT:    retl # encoding: [0xc3]
    835 ;
    836 ; X64-LABEL: test_mask_pcmpgt_b_256:
    837 ; X64:       # %bb.0:
    838 ; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
    839 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    840 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    841 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    842 ; X64-NEXT:    retq # encoding: [0xc3]
    843   %res = call i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8> %a, <32 x i8> %b, i32 %mask)
    844   ret i32 %res
    845 }
    846 
    847 declare i32 @llvm.x86.avx512.mask.pcmpgt.b.256(<32 x i8>, <32 x i8>, i32)
    848 
    849 define i16 @test_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b) {
    850 ; CHECK-LABEL: test_pcmpgt_w_256:
    851 ; CHECK:       # %bb.0:
    852 ; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
    853 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    854 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
    855 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    856 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    857   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 -1)
    858   ret i16 %res
    859 }
    860 
    861 define i16 @test_mask_pcmpgt_w_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
    862 ; X86-LABEL: test_mask_pcmpgt_w_256:
    863 ; X86:       # %bb.0:
    864 ; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
    865 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    866 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
    867 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
    868 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    869 ; X86-NEXT:    retl # encoding: [0xc3]
    870 ;
    871 ; X64-LABEL: test_mask_pcmpgt_w_256:
    872 ; X64:       # %bb.0:
    873 ; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xc1]
    874 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    875 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    876 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    877 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    878 ; X64-NEXT:    retq # encoding: [0xc3]
    879   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16> %a, <16 x i16> %b, i16 %mask)
    880   ret i16 %res
    881 }
    882 
    883 declare i16 @llvm.x86.avx512.mask.pcmpgt.w.256(<16 x i16>, <16 x i16>, i16)
    884 
    885 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
    886 ; CHECK-LABEL: test_pcmpeq_b_128:
    887 ; CHECK:       # %bb.0:
    888 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
    889 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    890 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
    891 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    892   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
    893   ret i16 %res
    894 }
    895 
    896 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
    897 ; X86-LABEL: test_mask_pcmpeq_b_128:
    898 ; X86:       # %bb.0:
    899 ; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
    900 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    901 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
    902 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
    903 ; X86-NEXT:    retl # encoding: [0xc3]
    904 ;
    905 ; X64-LABEL: test_mask_pcmpeq_b_128:
    906 ; X64:       # %bb.0:
    907 ; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
    908 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    909 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    910 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    911 ; X64-NEXT:    retq # encoding: [0xc3]
    912   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
    913   ret i16 %res
    914 }
    915 
    916 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
    917 
    918 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
    919 ; CHECK-LABEL: test_pcmpeq_w_128:
    920 ; CHECK:       # %bb.0:
    921 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
    922 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    923 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
    924 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    925   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
    926   ret i8 %res
    927 }
    928 
    929 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
    930 ; X86-LABEL: test_mask_pcmpeq_w_128:
    931 ; X86:       # %bb.0:
    932 ; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
    933 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    934 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
    935 ; X86-NEXT:    # kill: def $al killed $al killed $eax
    936 ; X86-NEXT:    retl # encoding: [0xc3]
    937 ;
    938 ; X64-LABEL: test_mask_pcmpeq_w_128:
    939 ; X64:       # %bb.0:
    940 ; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
    941 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    942 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
    943 ; X64-NEXT:    # kill: def $al killed $al killed $eax
    944 ; X64-NEXT:    retq # encoding: [0xc3]
    945   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
    946   ret i8 %res
    947 }
    948 
    949 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
    950 
    951 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
    952 ; CHECK-LABEL: test_pcmpgt_b_128:
    953 ; CHECK:       # %bb.0:
    954 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
    955 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    956 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
    957 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    958   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
    959   ret i16 %res
    960 }
    961 
    962 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
    963 ; X86-LABEL: test_mask_pcmpgt_b_128:
    964 ; X86:       # %bb.0:
    965 ; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
    966 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    967 ; X86-NEXT:    andw {{[0-9]+}}(%esp), %ax # encoding: [0x66,0x23,0x44,0x24,0x04]
    968 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
    969 ; X86-NEXT:    retl # encoding: [0xc3]
    970 ;
    971 ; X64-LABEL: test_mask_pcmpgt_b_128:
    972 ; X64:       # %bb.0:
    973 ; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
    974 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    975 ; X64-NEXT:    andl %edi, %eax # encoding: [0x21,0xf8]
    976 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
    977 ; X64-NEXT:    retq # encoding: [0xc3]
    978   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
    979   ret i16 %res
    980 }
    981 
    982 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
    983 
    984 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
    985 ; CHECK-LABEL: test_pcmpgt_w_128:
    986 ; CHECK:       # %bb.0:
    987 ; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
    988 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
    989 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
    990 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    991   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
    992   ret i8 %res
    993 }
    994 
    995 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
    996 ; X86-LABEL: test_mask_pcmpgt_w_128:
    997 ; X86:       # %bb.0:
    998 ; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
    999 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   1000 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
   1001 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1002 ; X86-NEXT:    retl # encoding: [0xc3]
   1003 ;
   1004 ; X64-LABEL: test_mask_pcmpgt_w_128:
   1005 ; X64:       # %bb.0:
   1006 ; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
   1007 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   1008 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
   1009 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1010 ; X64-NEXT:    retq # encoding: [0xc3]
   1011   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
   1012   ret i8 %res
   1013 }
   1014 
   1015 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
   1016 
   1017 declare <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   1018 
   1019 define <16 x i8>@test_int_x86_avx512_mask_punpckhb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   1020 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
   1021 ; X86:       # %bb.0:
   1022 ; X86-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9]
   1023 ; X86-NEXT:    # xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   1024 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1025 ; X86-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
   1026 ; X86-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   1027 ; X86-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   1028 ; X86-NEXT:    retl # encoding: [0xc3]
   1029 ;
   1030 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_128:
   1031 ; X64:       # %bb.0:
   1032 ; X64-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x68,0xd9]
   1033 ; X64-NEXT:    # xmm3 = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   1034 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1035 ; X64-NEXT:    vpunpckhbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x68,0xd1]
   1036 ; X64-NEXT:    # xmm2 {%k1} = xmm0[8],xmm1[8],xmm0[9],xmm1[9],xmm0[10],xmm1[10],xmm0[11],xmm1[11],xmm0[12],xmm1[12],xmm0[13],xmm1[13],xmm0[14],xmm1[14],xmm0[15],xmm1[15]
   1037 ; X64-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   1038 ; X64-NEXT:    retq # encoding: [0xc3]
   1039   %res = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   1040   %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpckhb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   1041   %res2 = add <16 x i8> %res, %res1
   1042   ret <16 x i8> %res2
   1043 }
   1044 
   1045 declare <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   1046 
   1047 define <16 x i8>@test_int_x86_avx512_mask_punpcklb_w_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   1048 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
   1049 ; X86:       # %bb.0:
   1050 ; X86-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9]
   1051 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1052 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1053 ; X86-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
   1054 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1055 ; X86-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   1056 ; X86-NEXT:    retl # encoding: [0xc3]
   1057 ;
   1058 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_128:
   1059 ; X64:       # %bb.0:
   1060 ; X64-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x60,0xd9]
   1061 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1062 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1063 ; X64-NEXT:    vpunpcklbw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x60,0xd1]
   1064 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3],xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1065 ; X64-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   1066 ; X64-NEXT:    retq # encoding: [0xc3]
   1067   %res = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   1068   %res1 = call <16 x i8> @llvm.x86.avx512.mask.punpcklb.w.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   1069   %res2 = add <16 x i8> %res, %res1
   1070   ret <16 x i8> %res2
   1071 }
   1072 
   1073 declare <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   1074 
   1075 define <32 x i8>@test_int_x86_avx512_mask_punpckhb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   1076 ; X86-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
   1077 ; X86:       # %bb.0:
   1078 ; X86-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xd9]
   1079 ; X86-NEXT:    # ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
   1080 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1081 ; X86-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
   1082 ; X86-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
   1083 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   1084 ; X86-NEXT:    retl # encoding: [0xc3]
   1085 ;
   1086 ; X64-LABEL: test_int_x86_avx512_mask_punpckhb_w_256:
   1087 ; X64:       # %bb.0:
   1088 ; X64-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x68,0xd9]
   1089 ; X64-NEXT:    # ymm3 = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
   1090 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1091 ; X64-NEXT:    vpunpckhbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x68,0xd1]
   1092 ; X64-NEXT:    # ymm2 {%k1} = ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15],ymm0[24],ymm1[24],ymm0[25],ymm1[25],ymm0[26],ymm1[26],ymm0[27],ymm1[27],ymm0[28],ymm1[28],ymm0[29],ymm1[29],ymm0[30],ymm1[30],ymm0[31],ymm1[31]
   1093 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   1094 ; X64-NEXT:    retq # encoding: [0xc3]
   1095   %res = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   1096   %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpckhb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   1097   %res2 = add <32 x i8> %res, %res1
   1098   ret <32 x i8> %res2
   1099 }
   1100 
   1101 declare <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   1102 
   1103 define <32 x i8>@test_int_x86_avx512_mask_punpcklb_w_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   1104 ; X86-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
   1105 ; X86:       # %bb.0:
   1106 ; X86-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xd9]
   1107 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
   1108 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1109 ; X86-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
   1110 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
   1111 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   1112 ; X86-NEXT:    retl # encoding: [0xc3]
   1113 ;
   1114 ; X64-LABEL: test_int_x86_avx512_mask_punpcklb_w_256:
   1115 ; X64:       # %bb.0:
   1116 ; X64-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x60,0xd9]
   1117 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
   1118 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1119 ; X64-NEXT:    vpunpcklbw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x60,0xd1]
   1120 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[16],ymm1[16],ymm0[17],ymm1[17],ymm0[18],ymm1[18],ymm0[19],ymm1[19],ymm0[20],ymm1[20],ymm0[21],ymm1[21],ymm0[22],ymm1[22],ymm0[23],ymm1[23]
   1121 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   1122 ; X64-NEXT:    retq # encoding: [0xc3]
   1123   %res = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   1124   %res1 = call <32 x i8> @llvm.x86.avx512.mask.punpcklb.w.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   1125   %res2 = add <32 x i8> %res, %res1
   1126   ret <32 x i8> %res2
   1127 }
   1128 
   1129 declare <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   1130 
   1131 define <8 x i16>@test_int_x86_avx512_mask_punpcklw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   1132 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
   1133 ; X86:       # %bb.0:
   1134 ; X86-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9]
   1135 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1137 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1138 ; X86-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
   1139 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1140 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   1141 ; X86-NEXT:    retl # encoding: [0xc3]
   1142 ;
   1143 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_128:
   1144 ; X64:       # %bb.0:
   1145 ; X64-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x61,0xd9]
   1146 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1147 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1148 ; X64-NEXT:    vpunpcklwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x61,0xd1]
   1149 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1],xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1150 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   1151 ; X64-NEXT:    retq # encoding: [0xc3]
   1152   %res = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   1153   %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpcklw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   1154   %res2 = add <8 x i16> %res, %res1
   1155   ret <8 x i16> %res2
   1156 }
   1157 
   1158 declare <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   1159 
   1160 define <8 x i16>@test_int_x86_avx512_mask_punpckhw_d_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   1161 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
   1162 ; X86:       # %bb.0:
   1163 ; X86-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9]
   1164 ; X86-NEXT:    # xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1165 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1166 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1167 ; X86-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
   1168 ; X86-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1169 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   1170 ; X86-NEXT:    retl # encoding: [0xc3]
   1171 ;
   1172 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_128:
   1173 ; X64:       # %bb.0:
   1174 ; X64-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x69,0xd9]
   1175 ; X64-NEXT:    # xmm3 = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1176 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1177 ; X64-NEXT:    vpunpckhwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x69,0xd1]
   1178 ; X64-NEXT:    # xmm2 {%k1} = xmm0[4],xmm1[4],xmm0[5],xmm1[5],xmm0[6],xmm1[6],xmm0[7],xmm1[7]
   1179 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   1180 ; X64-NEXT:    retq # encoding: [0xc3]
   1181   %res = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   1182   %res1 = call <8 x i16> @llvm.x86.avx512.mask.punpckhw.d.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   1183   %res2 = add <8 x i16> %res, %res1
   1184   ret <8 x i16> %res2
   1185 }
   1186 
   1187 declare <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   1188 
   1189 define <16 x i16>@test_int_x86_avx512_mask_punpcklw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   1190 ; X86-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
   1191 ; X86:       # %bb.0:
   1192 ; X86-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9]
   1193 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   1194 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1195 ; X86-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
   1196 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   1197 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   1198 ; X86-NEXT:    retl # encoding: [0xc3]
   1199 ;
   1200 ; X64-LABEL: test_int_x86_avx512_mask_punpcklw_d_256:
   1201 ; X64:       # %bb.0:
   1202 ; X64-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x61,0xd9]
   1203 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   1204 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1205 ; X64-NEXT:    vpunpcklwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x61,0xd1]
   1206 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[8],ymm1[8],ymm0[9],ymm1[9],ymm0[10],ymm1[10],ymm0[11],ymm1[11]
   1207 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   1208 ; X64-NEXT:    retq # encoding: [0xc3]
   1209   %res = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   1210   %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpcklw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   1211   %res2 = add <16 x i16> %res, %res1
   1212   ret <16 x i16> %res2
   1213 }
   1214 
   1215 declare <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   1216 
   1217 define <16 x i16>@test_int_x86_avx512_mask_punpckhw_d_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   1218 ; X86-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
   1219 ; X86:       # %bb.0:
   1220 ; X86-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9]
   1221 ; X86-NEXT:    # ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   1222 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1223 ; X86-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
   1224 ; X86-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   1225 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   1226 ; X86-NEXT:    retl # encoding: [0xc3]
   1227 ;
   1228 ; X64-LABEL: test_int_x86_avx512_mask_punpckhw_d_256:
   1229 ; X64:       # %bb.0:
   1230 ; X64-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x69,0xd9]
   1231 ; X64-NEXT:    # ymm3 = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   1232 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1233 ; X64-NEXT:    vpunpckhwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x69,0xd1]
   1234 ; X64-NEXT:    # ymm2 {%k1} = ymm0[4],ymm1[4],ymm0[5],ymm1[5],ymm0[6],ymm1[6],ymm0[7],ymm1[7],ymm0[12],ymm1[12],ymm0[13],ymm1[13],ymm0[14],ymm1[14],ymm0[15],ymm1[15]
   1235 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   1236 ; X64-NEXT:    retq # encoding: [0xc3]
   1237   %res = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   1238   %res1 = call <16 x i16> @llvm.x86.avx512.mask.punpckhw.d.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   1239   %res2 = add <16 x i16> %res, %res1
   1240   ret <16 x i16> %res2
   1241 }
   1242 
   1243 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   1244 ; CHECK-LABEL: test_mask_add_epi16_rr_128:
   1245 ; CHECK:       # %bb.0:
   1246 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc1]
   1247 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1248   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   1249   ret <8 x i16> %res
   1250 }
   1251 
   1252 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   1253 ; X86-LABEL: test_mask_add_epi16_rrk_128:
   1254 ; X86:       # %bb.0:
   1255 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1256 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1257 ; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
   1258 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1259 ; X86-NEXT:    retl # encoding: [0xc3]
   1260 ;
   1261 ; X64-LABEL: test_mask_add_epi16_rrk_128:
   1262 ; X64:       # %bb.0:
   1263 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1264 ; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
   1265 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1266 ; X64-NEXT:    retq # encoding: [0xc3]
   1267   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   1268   ret <8 x i16> %res
   1269 }
   1270 
   1271 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   1272 ; X86-LABEL: test_mask_add_epi16_rrkz_128:
   1273 ; X86:       # %bb.0:
   1274 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1275 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1276 ; X86-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
   1277 ; X86-NEXT:    retl # encoding: [0xc3]
   1278 ;
   1279 ; X64-LABEL: test_mask_add_epi16_rrkz_128:
   1280 ; X64:       # %bb.0:
   1281 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1282 ; X64-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
   1283 ; X64-NEXT:    retq # encoding: [0xc3]
   1284   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   1285   ret <8 x i16> %res
   1286 }
   1287 
   1288 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   1289 ; X86-LABEL: test_mask_add_epi16_rm_128:
   1290 ; X86:       # %bb.0:
   1291 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1292 ; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x00]
   1293 ; X86-NEXT:    retl # encoding: [0xc3]
   1294 ;
   1295 ; X64-LABEL: test_mask_add_epi16_rm_128:
   1296 ; X64:       # %bb.0:
   1297 ; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0x07]
   1298 ; X64-NEXT:    retq # encoding: [0xc3]
   1299   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1300   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   1301   ret <8 x i16> %res
   1302 }
   1303 
   1304 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   1305 ; X86-LABEL: test_mask_add_epi16_rmk_128:
   1306 ; X86:       # %bb.0:
   1307 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1308 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1309 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   1310 ; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x08]
   1311 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   1312 ; X86-NEXT:    retl # encoding: [0xc3]
   1313 ;
   1314 ; X64-LABEL: test_mask_add_epi16_rmk_128:
   1315 ; X64:       # %bb.0:
   1316 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1317 ; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
   1318 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   1319 ; X64-NEXT:    retq # encoding: [0xc3]
   1320   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1321   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   1322   ret <8 x i16> %res
   1323 }
   1324 
   1325 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   1326 ; X86-LABEL: test_mask_add_epi16_rmkz_128:
   1327 ; X86:       # %bb.0:
   1328 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1329 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1330 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   1331 ; X86-NEXT:    vpaddw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x00]
   1332 ; X86-NEXT:    retl # encoding: [0xc3]
   1333 ;
   1334 ; X64-LABEL: test_mask_add_epi16_rmkz_128:
   1335 ; X64:       # %bb.0:
   1336 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1337 ; X64-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
   1338 ; X64-NEXT:    retq # encoding: [0xc3]
   1339   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1340   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   1341   ret <8 x i16> %res
   1342 }
   1343 
   1344 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   1345 
   1346 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   1347 ; CHECK-LABEL: test_mask_add_epi16_rr_256:
   1348 ; CHECK:       # %bb.0:
   1349 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc1]
   1350 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1351   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   1352   ret <16 x i16> %res
   1353 }
   1354 
   1355 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   1356 ; X86-LABEL: test_mask_add_epi16_rrk_256:
   1357 ; X86:       # %bb.0:
   1358 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1359 ; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
   1360 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   1361 ; X86-NEXT:    retl # encoding: [0xc3]
   1362 ;
   1363 ; X64-LABEL: test_mask_add_epi16_rrk_256:
   1364 ; X64:       # %bb.0:
   1365 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1366 ; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
   1367 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   1368 ; X64-NEXT:    retq # encoding: [0xc3]
   1369   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   1370   ret <16 x i16> %res
   1371 }
   1372 
   1373 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   1374 ; X86-LABEL: test_mask_add_epi16_rrkz_256:
   1375 ; X86:       # %bb.0:
   1376 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1377 ; X86-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
   1378 ; X86-NEXT:    retl # encoding: [0xc3]
   1379 ;
   1380 ; X64-LABEL: test_mask_add_epi16_rrkz_256:
   1381 ; X64:       # %bb.0:
   1382 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1383 ; X64-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
   1384 ; X64-NEXT:    retq # encoding: [0xc3]
   1385   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   1386   ret <16 x i16> %res
   1387 }
   1388 
   1389 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   1390 ; X86-LABEL: test_mask_add_epi16_rm_256:
   1391 ; X86:       # %bb.0:
   1392 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1393 ; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x00]
   1394 ; X86-NEXT:    retl # encoding: [0xc3]
   1395 ;
   1396 ; X64-LABEL: test_mask_add_epi16_rm_256:
   1397 ; X64:       # %bb.0:
   1398 ; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0x07]
   1399 ; X64-NEXT:    retq # encoding: [0xc3]
   1400   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1401   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   1402   ret <16 x i16> %res
   1403 }
   1404 
   1405 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   1406 ; X86-LABEL: test_mask_add_epi16_rmk_256:
   1407 ; X86:       # %bb.0:
   1408 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1409 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1410 ; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x08]
   1411 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   1412 ; X86-NEXT:    retl # encoding: [0xc3]
   1413 ;
   1414 ; X64-LABEL: test_mask_add_epi16_rmk_256:
   1415 ; X64:       # %bb.0:
   1416 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1417 ; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
   1418 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   1419 ; X64-NEXT:    retq # encoding: [0xc3]
   1420   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1421   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   1422   ret <16 x i16> %res
   1423 }
   1424 
   1425 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   1426 ; X86-LABEL: test_mask_add_epi16_rmkz_256:
   1427 ; X86:       # %bb.0:
   1428 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1429 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1430 ; X86-NEXT:    vpaddw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x00]
   1431 ; X86-NEXT:    retl # encoding: [0xc3]
   1432 ;
   1433 ; X64-LABEL: test_mask_add_epi16_rmkz_256:
   1434 ; X64:       # %bb.0:
   1435 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1436 ; X64-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
   1437 ; X64-NEXT:    retq # encoding: [0xc3]
   1438   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1439   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   1440   ret <16 x i16> %res
   1441 }
   1442 
   1443 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   1444 
   1445 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   1446 ; CHECK-LABEL: test_mask_sub_epi16_rr_128:
   1447 ; CHECK:       # %bb.0:
   1448 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0xc1]
   1449 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1450   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   1451   ret <8 x i16> %res
   1452 }
   1453 
   1454 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   1455 ; X86-LABEL: test_mask_sub_epi16_rrk_128:
   1456 ; X86:       # %bb.0:
   1457 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1458 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1459 ; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
   1460 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1461 ; X86-NEXT:    retl # encoding: [0xc3]
   1462 ;
   1463 ; X64-LABEL: test_mask_sub_epi16_rrk_128:
   1464 ; X64:       # %bb.0:
   1465 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1466 ; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
   1467 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1468 ; X64-NEXT:    retq # encoding: [0xc3]
   1469   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   1470   ret <8 x i16> %res
   1471 }
   1472 
   1473 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   1474 ; X86-LABEL: test_mask_sub_epi16_rrkz_128:
   1475 ; X86:       # %bb.0:
   1476 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1477 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1478 ; X86-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
   1479 ; X86-NEXT:    retl # encoding: [0xc3]
   1480 ;
   1481 ; X64-LABEL: test_mask_sub_epi16_rrkz_128:
   1482 ; X64:       # %bb.0:
   1483 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1484 ; X64-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
   1485 ; X64-NEXT:    retq # encoding: [0xc3]
   1486   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   1487   ret <8 x i16> %res
   1488 }
   1489 
   1490 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   1491 ; X86-LABEL: test_mask_sub_epi16_rm_128:
   1492 ; X86:       # %bb.0:
   1493 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1494 ; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x00]
   1495 ; X86-NEXT:    retl # encoding: [0xc3]
   1496 ;
   1497 ; X64-LABEL: test_mask_sub_epi16_rm_128:
   1498 ; X64:       # %bb.0:
   1499 ; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf9,0x07]
   1500 ; X64-NEXT:    retq # encoding: [0xc3]
   1501   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1502   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   1503   ret <8 x i16> %res
   1504 }
   1505 
   1506 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   1507 ; X86-LABEL: test_mask_sub_epi16_rmk_128:
   1508 ; X86:       # %bb.0:
   1509 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1510 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1511 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   1512 ; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x08]
   1513 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   1514 ; X86-NEXT:    retl # encoding: [0xc3]
   1515 ;
   1516 ; X64-LABEL: test_mask_sub_epi16_rmk_128:
   1517 ; X64:       # %bb.0:
   1518 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1519 ; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
   1520 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   1521 ; X64-NEXT:    retq # encoding: [0xc3]
   1522   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1523   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   1524   ret <8 x i16> %res
   1525 }
   1526 
   1527 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   1528 ; X86-LABEL: test_mask_sub_epi16_rmkz_128:
   1529 ; X86:       # %bb.0:
   1530 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1531 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1532 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   1533 ; X86-NEXT:    vpsubw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x00]
   1534 ; X86-NEXT:    retl # encoding: [0xc3]
   1535 ;
   1536 ; X64-LABEL: test_mask_sub_epi16_rmkz_128:
   1537 ; X64:       # %bb.0:
   1538 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1539 ; X64-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
   1540 ; X64-NEXT:    retq # encoding: [0xc3]
   1541   %b = load <8 x i16>, <8 x i16>* %ptr_b
   1542   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   1543   ret <8 x i16> %res
   1544 }
   1545 
   1546 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   1547 
   1548 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   1549 ; CHECK-LABEL: test_mask_sub_epi16_rr_256:
   1550 ; CHECK:       # %bb.0:
   1551 ; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0xc1]
   1552 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1553   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   1554   ret <16 x i16> %res
   1555 }
   1556 
   1557 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   1558 ; X86-LABEL: test_mask_sub_epi16_rrk_256:
   1559 ; X86:       # %bb.0:
   1560 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1561 ; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
   1562 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   1563 ; X86-NEXT:    retl # encoding: [0xc3]
   1564 ;
   1565 ; X64-LABEL: test_mask_sub_epi16_rrk_256:
   1566 ; X64:       # %bb.0:
   1567 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1568 ; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
   1569 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   1570 ; X64-NEXT:    retq # encoding: [0xc3]
   1571   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   1572   ret <16 x i16> %res
   1573 }
   1574 
   1575 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   1576 ; X86-LABEL: test_mask_sub_epi16_rrkz_256:
   1577 ; X86:       # %bb.0:
   1578 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1579 ; X86-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
   1580 ; X86-NEXT:    retl # encoding: [0xc3]
   1581 ;
   1582 ; X64-LABEL: test_mask_sub_epi16_rrkz_256:
   1583 ; X64:       # %bb.0:
   1584 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1585 ; X64-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
   1586 ; X64-NEXT:    retq # encoding: [0xc3]
   1587   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   1588   ret <16 x i16> %res
   1589 }
   1590 
   1591 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   1592 ; X86-LABEL: test_mask_sub_epi16_rm_256:
   1593 ; X86:       # %bb.0:
   1594 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1595 ; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x00]
   1596 ; X86-NEXT:    retl # encoding: [0xc3]
   1597 ;
   1598 ; X64-LABEL: test_mask_sub_epi16_rm_256:
   1599 ; X64:       # %bb.0:
   1600 ; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf9,0x07]
   1601 ; X64-NEXT:    retq # encoding: [0xc3]
   1602   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1603   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   1604   ret <16 x i16> %res
   1605 }
   1606 
   1607 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   1608 ; X86-LABEL: test_mask_sub_epi16_rmk_256:
   1609 ; X86:       # %bb.0:
   1610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1611 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1612 ; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x08]
   1613 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   1614 ; X86-NEXT:    retl # encoding: [0xc3]
   1615 ;
   1616 ; X64-LABEL: test_mask_sub_epi16_rmk_256:
   1617 ; X64:       # %bb.0:
   1618 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1619 ; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
   1620 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   1621 ; X64-NEXT:    retq # encoding: [0xc3]
   1622   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1623   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   1624   ret <16 x i16> %res
   1625 }
   1626 
   1627 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   1628 ; X86-LABEL: test_mask_sub_epi16_rmkz_256:
   1629 ; X86:       # %bb.0:
   1630 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1631 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   1632 ; X86-NEXT:    vpsubw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x00]
   1633 ; X86-NEXT:    retl # encoding: [0xc3]
   1634 ;
   1635 ; X64-LABEL: test_mask_sub_epi16_rmkz_256:
   1636 ; X64:       # %bb.0:
   1637 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1638 ; X64-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
   1639 ; X64-NEXT:    retq # encoding: [0xc3]
   1640   %b = load <16 x i16>, <16 x i16>* %ptr_b
   1641   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   1642   ret <16 x i16> %res
   1643 }
   1644 
   1645 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   1646 
   1647 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   1648 ; CHECK-LABEL: test_mask_add_epi16_rr_512:
   1649 ; CHECK:       # %bb.0:
   1650 ; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
   1651 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1652   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1653   ret <32 x i16> %res
   1654 }
   1655 
   1656 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   1657 ; X86-LABEL: test_mask_add_epi16_rrk_512:
   1658 ; X86:       # %bb.0:
   1659 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1660 ; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
   1661 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1662 ; X86-NEXT:    retl # encoding: [0xc3]
   1663 ;
   1664 ; X64-LABEL: test_mask_add_epi16_rrk_512:
   1665 ; X64:       # %bb.0:
   1666 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1667 ; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
   1668 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1669 ; X64-NEXT:    retq # encoding: [0xc3]
   1670   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1671   ret <32 x i16> %res
   1672 }
   1673 
   1674 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   1675 ; X86-LABEL: test_mask_add_epi16_rrkz_512:
   1676 ; X86:       # %bb.0:
   1677 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1678 ; X86-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
   1679 ; X86-NEXT:    retl # encoding: [0xc3]
   1680 ;
   1681 ; X64-LABEL: test_mask_add_epi16_rrkz_512:
   1682 ; X64:       # %bb.0:
   1683 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1684 ; X64-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
   1685 ; X64-NEXT:    retq # encoding: [0xc3]
   1686   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1687   ret <32 x i16> %res
   1688 }
   1689 
   1690 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   1691 ; X86-LABEL: test_mask_add_epi16_rm_512:
   1692 ; X86:       # %bb.0:
   1693 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1694 ; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x00]
   1695 ; X86-NEXT:    retl # encoding: [0xc3]
   1696 ;
   1697 ; X64-LABEL: test_mask_add_epi16_rm_512:
   1698 ; X64:       # %bb.0:
   1699 ; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
   1700 ; X64-NEXT:    retq # encoding: [0xc3]
   1701   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1702   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1703   ret <32 x i16> %res
   1704 }
   1705 
   1706 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1707 ; X86-LABEL: test_mask_add_epi16_rmk_512:
   1708 ; X86:       # %bb.0:
   1709 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1710 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1711 ; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x08]
   1712 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1713 ; X86-NEXT:    retl # encoding: [0xc3]
   1714 ;
   1715 ; X64-LABEL: test_mask_add_epi16_rmk_512:
   1716 ; X64:       # %bb.0:
   1717 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1718 ; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
   1719 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1720 ; X64-NEXT:    retq # encoding: [0xc3]
   1721   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1722   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1723   ret <32 x i16> %res
   1724 }
   1725 
   1726 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   1727 ; X86-LABEL: test_mask_add_epi16_rmkz_512:
   1728 ; X86:       # %bb.0:
   1729 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1730 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1731 ; X86-NEXT:    vpaddw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x00]
   1732 ; X86-NEXT:    retl # encoding: [0xc3]
   1733 ;
   1734 ; X64-LABEL: test_mask_add_epi16_rmkz_512:
   1735 ; X64:       # %bb.0:
   1736 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1737 ; X64-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
   1738 ; X64-NEXT:    retq # encoding: [0xc3]
   1739   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1740   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1741   ret <32 x i16> %res
   1742 }
   1743 
   1744 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1745 
   1746 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   1747 ; CHECK-LABEL: test_mask_sub_epi16_rr_512:
   1748 ; CHECK:       # %bb.0:
   1749 ; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
   1750 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1751   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1752   ret <32 x i16> %res
   1753 }
   1754 
   1755 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   1756 ; X86-LABEL: test_mask_sub_epi16_rrk_512:
   1757 ; X86:       # %bb.0:
   1758 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1759 ; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
   1760 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1761 ; X86-NEXT:    retl # encoding: [0xc3]
   1762 ;
   1763 ; X64-LABEL: test_mask_sub_epi16_rrk_512:
   1764 ; X64:       # %bb.0:
   1765 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1766 ; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
   1767 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1768 ; X64-NEXT:    retq # encoding: [0xc3]
   1769   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1770   ret <32 x i16> %res
   1771 }
   1772 
   1773 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   1774 ; X86-LABEL: test_mask_sub_epi16_rrkz_512:
   1775 ; X86:       # %bb.0:
   1776 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1777 ; X86-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
   1778 ; X86-NEXT:    retl # encoding: [0xc3]
   1779 ;
   1780 ; X64-LABEL: test_mask_sub_epi16_rrkz_512:
   1781 ; X64:       # %bb.0:
   1782 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1783 ; X64-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
   1784 ; X64-NEXT:    retq # encoding: [0xc3]
   1785   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1786   ret <32 x i16> %res
   1787 }
   1788 
   1789 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   1790 ; X86-LABEL: test_mask_sub_epi16_rm_512:
   1791 ; X86:       # %bb.0:
   1792 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1793 ; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x00]
   1794 ; X86-NEXT:    retl # encoding: [0xc3]
   1795 ;
   1796 ; X64-LABEL: test_mask_sub_epi16_rm_512:
   1797 ; X64:       # %bb.0:
   1798 ; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
   1799 ; X64-NEXT:    retq # encoding: [0xc3]
   1800   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1801   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1802   ret <32 x i16> %res
   1803 }
   1804 
   1805 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1806 ; X86-LABEL: test_mask_sub_epi16_rmk_512:
   1807 ; X86:       # %bb.0:
   1808 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1809 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1810 ; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x08]
   1811 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1812 ; X86-NEXT:    retl # encoding: [0xc3]
   1813 ;
   1814 ; X64-LABEL: test_mask_sub_epi16_rmk_512:
   1815 ; X64:       # %bb.0:
   1816 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1817 ; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
   1818 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1819 ; X64-NEXT:    retq # encoding: [0xc3]
   1820   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1821   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1822   ret <32 x i16> %res
   1823 }
   1824 
   1825 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   1826 ; X86-LABEL: test_mask_sub_epi16_rmkz_512:
   1827 ; X86:       # %bb.0:
   1828 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1829 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1830 ; X86-NEXT:    vpsubw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x00]
   1831 ; X86-NEXT:    retl # encoding: [0xc3]
   1832 ;
   1833 ; X64-LABEL: test_mask_sub_epi16_rmkz_512:
   1834 ; X64:       # %bb.0:
   1835 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1836 ; X64-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
   1837 ; X64-NEXT:    retq # encoding: [0xc3]
   1838   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1839   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1840   ret <32 x i16> %res
   1841 }
   1842 
   1843 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1844 
   1845 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   1846 ; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
   1847 ; CHECK:       # %bb.0:
   1848 ; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
   1849 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1850   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1851   ret <32 x i16> %res
   1852 }
   1853 
   1854 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   1855 ; X86-LABEL: test_mask_mullo_epi16_rrk_512:
   1856 ; X86:       # %bb.0:
   1857 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1858 ; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
   1859 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1860 ; X86-NEXT:    retl # encoding: [0xc3]
   1861 ;
   1862 ; X64-LABEL: test_mask_mullo_epi16_rrk_512:
   1863 ; X64:       # %bb.0:
   1864 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1865 ; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
   1866 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1867 ; X64-NEXT:    retq # encoding: [0xc3]
   1868   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1869   ret <32 x i16> %res
   1870 }
   1871 
   1872 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   1873 ; X86-LABEL: test_mask_mullo_epi16_rrkz_512:
   1874 ; X86:       # %bb.0:
   1875 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1876 ; X86-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
   1877 ; X86-NEXT:    retl # encoding: [0xc3]
   1878 ;
   1879 ; X64-LABEL: test_mask_mullo_epi16_rrkz_512:
   1880 ; X64:       # %bb.0:
   1881 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1882 ; X64-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
   1883 ; X64-NEXT:    retq # encoding: [0xc3]
   1884   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1885   ret <32 x i16> %res
   1886 }
   1887 
   1888 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   1889 ; X86-LABEL: test_mask_mullo_epi16_rm_512:
   1890 ; X86:       # %bb.0:
   1891 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1892 ; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x00]
   1893 ; X86-NEXT:    retl # encoding: [0xc3]
   1894 ;
   1895 ; X64-LABEL: test_mask_mullo_epi16_rm_512:
   1896 ; X64:       # %bb.0:
   1897 ; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
   1898 ; X64-NEXT:    retq # encoding: [0xc3]
   1899   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1900   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   1901   ret <32 x i16> %res
   1902 }
   1903 
   1904 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   1905 ; X86-LABEL: test_mask_mullo_epi16_rmk_512:
   1906 ; X86:       # %bb.0:
   1907 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1908 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1909 ; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x08]
   1910 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1911 ; X86-NEXT:    retl # encoding: [0xc3]
   1912 ;
   1913 ; X64-LABEL: test_mask_mullo_epi16_rmk_512:
   1914 ; X64:       # %bb.0:
   1915 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1916 ; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
   1917 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1918 ; X64-NEXT:    retq # encoding: [0xc3]
   1919   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1920   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   1921   ret <32 x i16> %res
   1922 }
   1923 
   1924 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   1925 ; X86-LABEL: test_mask_mullo_epi16_rmkz_512:
   1926 ; X86:       # %bb.0:
   1927 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1928 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1929 ; X86-NEXT:    vpmullw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x00]
   1930 ; X86-NEXT:    retl # encoding: [0xc3]
   1931 ;
   1932 ; X64-LABEL: test_mask_mullo_epi16_rmkz_512:
   1933 ; X64:       # %bb.0:
   1934 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1935 ; X64-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
   1936 ; X64-NEXT:    retq # encoding: [0xc3]
   1937   %b = load <32 x i16>, <32 x i16>* %ptr_b
   1938   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   1939   ret <32 x i16> %res
   1940 }
   1941 
   1942 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1943 
   1944 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   1945 ; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
   1946 ; CHECK:       # %bb.0:
   1947 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0xc1]
   1948 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1949   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   1950   ret <8 x i16> %res
   1951 }
   1952 
   1953 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   1954 ; X86-LABEL: test_mask_mullo_epi16_rrk_128:
   1955 ; X86:       # %bb.0:
   1956 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1957 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1958 ; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
   1959 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1960 ; X86-NEXT:    retl # encoding: [0xc3]
   1961 ;
   1962 ; X64-LABEL: test_mask_mullo_epi16_rrk_128:
   1963 ; X64:       # %bb.0:
   1964 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1965 ; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
   1966 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   1967 ; X64-NEXT:    retq # encoding: [0xc3]
   1968   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   1969   ret <8 x i16> %res
   1970 }
   1971 
   1972 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   1973 ; X86-LABEL: test_mask_mullo_epi16_rrkz_128:
   1974 ; X86:       # %bb.0:
   1975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1976 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   1977 ; X86-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
   1978 ; X86-NEXT:    retl # encoding: [0xc3]
   1979 ;
   1980 ; X64-LABEL: test_mask_mullo_epi16_rrkz_128:
   1981 ; X64:       # %bb.0:
   1982 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1983 ; X64-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
   1984 ; X64-NEXT:    retq # encoding: [0xc3]
   1985   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   1986   ret <8 x i16> %res
   1987 }
   1988 
   1989 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   1990 ; X86-LABEL: test_mask_mullo_epi16_rm_128:
   1991 ; X86:       # %bb.0:
   1992 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1993 ; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x00]
   1994 ; X86-NEXT:    retl # encoding: [0xc3]
   1995 ;
   1996 ; X64-LABEL: test_mask_mullo_epi16_rm_128:
   1997 ; X64:       # %bb.0:
   1998 ; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd5,0x07]
   1999 ; X64-NEXT:    retq # encoding: [0xc3]
   2000   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2001   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2002   ret <8 x i16> %res
   2003 }
   2004 
   2005 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2006 ; X86-LABEL: test_mask_mullo_epi16_rmk_128:
   2007 ; X86:       # %bb.0:
   2008 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2009 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2010 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   2011 ; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x08]
   2012 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2013 ; X86-NEXT:    retl # encoding: [0xc3]
   2014 ;
   2015 ; X64-LABEL: test_mask_mullo_epi16_rmk_128:
   2016 ; X64:       # %bb.0:
   2017 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2018 ; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
   2019 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2020 ; X64-NEXT:    retq # encoding: [0xc3]
   2021   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2022   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2023   ret <8 x i16> %res
   2024 }
   2025 
   2026 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   2027 ; X86-LABEL: test_mask_mullo_epi16_rmkz_128:
   2028 ; X86:       # %bb.0:
   2029 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2030 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2031 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   2032 ; X86-NEXT:    vpmullw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x00]
   2033 ; X86-NEXT:    retl # encoding: [0xc3]
   2034 ;
   2035 ; X64-LABEL: test_mask_mullo_epi16_rmkz_128:
   2036 ; X64:       # %bb.0:
   2037 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2038 ; X64-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
   2039 ; X64-NEXT:    retq # encoding: [0xc3]
   2040   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2041   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2042   ret <8 x i16> %res
   2043 }
   2044 
   2045 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2046 
   2047 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   2048 ; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
   2049 ; CHECK:       # %bb.0:
   2050 ; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0xc1]
   2051 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2052   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2053   ret <16 x i16> %res
   2054 }
   2055 
   2056 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   2057 ; X86-LABEL: test_mask_mullo_epi16_rrk_256:
   2058 ; X86:       # %bb.0:
   2059 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2060 ; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
   2061 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2062 ; X86-NEXT:    retl # encoding: [0xc3]
   2063 ;
   2064 ; X64-LABEL: test_mask_mullo_epi16_rrk_256:
   2065 ; X64:       # %bb.0:
   2066 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2067 ; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
   2068 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2069 ; X64-NEXT:    retq # encoding: [0xc3]
   2070   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2071   ret <16 x i16> %res
   2072 }
   2073 
   2074 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   2075 ; X86-LABEL: test_mask_mullo_epi16_rrkz_256:
   2076 ; X86:       # %bb.0:
   2077 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2078 ; X86-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
   2079 ; X86-NEXT:    retl # encoding: [0xc3]
   2080 ;
   2081 ; X64-LABEL: test_mask_mullo_epi16_rrkz_256:
   2082 ; X64:       # %bb.0:
   2083 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2084 ; X64-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
   2085 ; X64-NEXT:    retq # encoding: [0xc3]
   2086   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2087   ret <16 x i16> %res
   2088 }
   2089 
   2090 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   2091 ; X86-LABEL: test_mask_mullo_epi16_rm_256:
   2092 ; X86:       # %bb.0:
   2093 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2094 ; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x00]
   2095 ; X86-NEXT:    retl # encoding: [0xc3]
   2096 ;
   2097 ; X64-LABEL: test_mask_mullo_epi16_rm_256:
   2098 ; X64:       # %bb.0:
   2099 ; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd5,0x07]
   2100 ; X64-NEXT:    retq # encoding: [0xc3]
   2101   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2102   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2103   ret <16 x i16> %res
   2104 }
   2105 
   2106 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2107 ; X86-LABEL: test_mask_mullo_epi16_rmk_256:
   2108 ; X86:       # %bb.0:
   2109 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2110 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2111 ; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x08]
   2112 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2113 ; X86-NEXT:    retl # encoding: [0xc3]
   2114 ;
   2115 ; X64-LABEL: test_mask_mullo_epi16_rmk_256:
   2116 ; X64:       # %bb.0:
   2117 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2118 ; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
   2119 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2120 ; X64-NEXT:    retq # encoding: [0xc3]
   2121   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2122   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2123   ret <16 x i16> %res
   2124 }
   2125 
   2126 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   2127 ; X86-LABEL: test_mask_mullo_epi16_rmkz_256:
   2128 ; X86:       # %bb.0:
   2129 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2130 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2131 ; X86-NEXT:    vpmullw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x00]
   2132 ; X86-NEXT:    retl # encoding: [0xc3]
   2133 ;
   2134 ; X64-LABEL: test_mask_mullo_epi16_rmkz_256:
   2135 ; X64:       # %bb.0:
   2136 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2137 ; X64-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
   2138 ; X64-NEXT:    retq # encoding: [0xc3]
   2139   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2140   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2141   ret <16 x i16> %res
   2142 }
   2143 
   2144 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2145 
   2146 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   2147 
   2148 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   2149 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
   2150 ; X86:       # %bb.0:
   2151 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2152 ; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
   2153 ; X86-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
   2154 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2155 ; X86-NEXT:    retl # encoding: [0xc3]
   2156 ;
   2157 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
   2158 ; X64:       # %bb.0:
   2159 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2160 ; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
   2161 ; X64-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
   2162 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2163 ; X64-NEXT:    retq # encoding: [0xc3]
   2164   %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
   2165   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   2166   %res2 = add <16 x i8> %res, %res1
   2167   ret <16 x i8> %res2
   2168 }
   2169 
   2170 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   2171 
   2172 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   2173 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
   2174 ; X86:       # %bb.0:
   2175 ; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xd9]
   2176 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2177 ; X86-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
   2178 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2179 ; X86-NEXT:    retl # encoding: [0xc3]
   2180 ;
   2181 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
   2182 ; X64:       # %bb.0:
   2183 ; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3c,0xd9]
   2184 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2185 ; X64-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
   2186 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2187 ; X64-NEXT:    retq # encoding: [0xc3]
   2188   %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   2189   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   2190   %res2 = add <32 x i8> %res, %res1
   2191   ret <32 x i8> %res2
   2192 }
   2193 
   2194 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2195 
   2196 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2197 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
   2198 ; X86:       # %bb.0:
   2199 ; X86-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9]
   2200 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2201 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2202 ; X86-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
   2203 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2204 ; X86-NEXT:    retl # encoding: [0xc3]
   2205 ;
   2206 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
   2207 ; X64:       # %bb.0:
   2208 ; X64-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xee,0xd9]
   2209 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2210 ; X64-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
   2211 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2212 ; X64-NEXT:    retq # encoding: [0xc3]
   2213   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2214   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2215   %res2 = add <8 x i16> %res, %res1
   2216   ret <8 x i16> %res2
   2217 }
   2218 
   2219 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2220 
   2221 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   2222 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
   2223 ; X86:       # %bb.0:
   2224 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2225 ; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
   2226 ; X86-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
   2227 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2228 ; X86-NEXT:    retl # encoding: [0xc3]
   2229 ;
   2230 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
   2231 ; X64:       # %bb.0:
   2232 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2233 ; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
   2234 ; X64-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
   2235 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2236 ; X64-NEXT:    retq # encoding: [0xc3]
   2237   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   2238   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   2239   %res2 = add <16 x i16> %res, %res1
   2240   ret <16 x i16> %res2
   2241 }
   2242 
   2243 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   2244 
   2245 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
   2246 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
   2247 ; X86:       # %bb.0:
   2248 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2249 ; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
   2250 ; X86-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
   2251 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2252 ; X86-NEXT:    retl # encoding: [0xc3]
   2253 ;
   2254 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
   2255 ; X64:       # %bb.0:
   2256 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2257 ; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
   2258 ; X64-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
   2259 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2260 ; X64-NEXT:    retq # encoding: [0xc3]
   2261   %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   2262   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   2263   %res2 = add <16 x i8> %res, %res1
   2264   ret <16 x i8> %res2
   2265 }
   2266 
   2267 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   2268 
   2269 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   2270 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
   2271 ; X86:       # %bb.0:
   2272 ; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xd9]
   2273 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2274 ; X86-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
   2275 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2276 ; X86-NEXT:    retl # encoding: [0xc3]
   2277 ;
   2278 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
   2279 ; X64:       # %bb.0:
   2280 ; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xde,0xd9]
   2281 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2282 ; X64-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
   2283 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2284 ; X64-NEXT:    retq # encoding: [0xc3]
   2285   %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   2286   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   2287   %res2 = add <32 x i8> %res, %res1
   2288   ret <32 x i8> %res2
   2289 }
   2290 
   2291 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2292 
   2293 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2294 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
   2295 ; X86:       # %bb.0:
   2296 ; X86-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9]
   2297 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2298 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2299 ; X86-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
   2300 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2301 ; X86-NEXT:    retl # encoding: [0xc3]
   2302 ;
   2303 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
   2304 ; X64:       # %bb.0:
   2305 ; X64-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3e,0xd9]
   2306 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2307 ; X64-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
   2308 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2309 ; X64-NEXT:    retq # encoding: [0xc3]
   2310   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2311   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2312   %res2 = add <8 x i16> %res, %res1
   2313   ret <8 x i16> %res2
   2314 }
   2315 
   2316 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2317 
   2318 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   2319 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
   2320 ; X86:       # %bb.0:
   2321 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2322 ; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
   2323 ; X86-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
   2324 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2325 ; X86-NEXT:    retl # encoding: [0xc3]
   2326 ;
   2327 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
   2328 ; X64:       # %bb.0:
   2329 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2330 ; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
   2331 ; X64-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
   2332 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2333 ; X64-NEXT:    retq # encoding: [0xc3]
   2334   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   2335   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   2336   %res2 = add <16 x i16> %res, %res1
   2337   ret <16 x i16> %res2
   2338 }
   2339 
   2340 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   2341 
   2342 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   2343 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_128:
   2344 ; X86:       # %bb.0:
   2345 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2346 ; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
   2347 ; X86-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
   2348 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2349 ; X86-NEXT:    retl # encoding: [0xc3]
   2350 ;
   2351 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_128:
   2352 ; X64:       # %bb.0:
   2353 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2354 ; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
   2355 ; X64-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
   2356 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2357 ; X64-NEXT:    retq # encoding: [0xc3]
   2358   %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   2359   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   2360   %res2 = add <16 x i8> %res, %res1
   2361   ret <16 x i8> %res2
   2362 }
   2363 
   2364 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   2365 
   2366 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   2367 ; X86-LABEL: test_int_x86_avx512_mask_pmins_b_256:
   2368 ; X86:       # %bb.0:
   2369 ; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xd9]
   2370 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2371 ; X86-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
   2372 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2373 ; X86-NEXT:    retl # encoding: [0xc3]
   2374 ;
   2375 ; X64-LABEL: test_int_x86_avx512_mask_pmins_b_256:
   2376 ; X64:       # %bb.0:
   2377 ; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x38,0xd9]
   2378 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2379 ; X64-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
   2380 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2381 ; X64-NEXT:    retq # encoding: [0xc3]
   2382   %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   2383   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   2384   %res2 = add <32 x i8> %res, %res1
   2385   ret <32 x i8> %res2
   2386 }
   2387 
   2388 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2389 
   2390 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2391 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_128:
   2392 ; X86:       # %bb.0:
   2393 ; X86-NEXT:    vpminsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9]
   2394 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2395 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2396 ; X86-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
   2397 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2398 ; X86-NEXT:    retl # encoding: [0xc3]
   2399 ;
   2400 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_128:
   2401 ; X64:       # %bb.0:
   2402 ; X64-NEXT:    vpminsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xea,0xd9]
   2403 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2404 ; X64-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
   2405 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2406 ; X64-NEXT:    retq # encoding: [0xc3]
   2407   %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2408   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2409   %res2 = add <8 x i16> %res, %res1
   2410   ret <8 x i16> %res2
   2411 }
   2412 
   2413 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2414 
   2415 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   2416 ; X86-LABEL: test_int_x86_avx512_mask_pmins_w_256:
   2417 ; X86:       # %bb.0:
   2418 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2419 ; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
   2420 ; X86-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
   2421 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2422 ; X86-NEXT:    retl # encoding: [0xc3]
   2423 ;
   2424 ; X64-LABEL: test_int_x86_avx512_mask_pmins_w_256:
   2425 ; X64:       # %bb.0:
   2426 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2427 ; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
   2428 ; X64-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
   2429 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2430 ; X64-NEXT:    retq # encoding: [0xc3]
   2431   %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   2432   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   2433   %res2 = add <16 x i16> %res, %res1
   2434   ret <16 x i16> %res2
   2435 }
   2436 
   2437 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   2438 
   2439 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   2440 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_128:
   2441 ; X86:       # %bb.0:
   2442 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2443 ; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
   2444 ; X86-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
   2445 ; X86-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2446 ; X86-NEXT:    retl # encoding: [0xc3]
   2447 ;
   2448 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_128:
   2449 ; X64:       # %bb.0:
   2450 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2451 ; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
   2452 ; X64-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
   2453 ; X64-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc0]
   2454 ; X64-NEXT:    retq # encoding: [0xc3]
   2455   %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   2456   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   2457   %res2 = add <16 x i8> %res, %res1
   2458   ret <16 x i8> %res2
   2459 }
   2460 
   2461 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   2462 
   2463 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   2464 ; X86-LABEL: test_int_x86_avx512_mask_pminu_b_256:
   2465 ; X86:       # %bb.0:
   2466 ; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xd9]
   2467 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2468 ; X86-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
   2469 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2470 ; X86-NEXT:    retl # encoding: [0xc3]
   2471 ;
   2472 ; X64-LABEL: test_int_x86_avx512_mask_pminu_b_256:
   2473 ; X64:       # %bb.0:
   2474 ; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xda,0xd9]
   2475 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2476 ; X64-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
   2477 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2478 ; X64-NEXT:    retq # encoding: [0xc3]
   2479   %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   2480   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   2481   %res2 = add <32 x i8> %res, %res1
   2482   ret <32 x i8> %res2
   2483 }
   2484 
   2485 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2486 
   2487 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2488 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_128:
   2489 ; X86:       # %bb.0:
   2490 ; X86-NEXT:    vpminuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9]
   2491 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2492 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2493 ; X86-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
   2494 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2495 ; X86-NEXT:    retl # encoding: [0xc3]
   2496 ;
   2497 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_128:
   2498 ; X64:       # %bb.0:
   2499 ; X64-NEXT:    vpminuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x3a,0xd9]
   2500 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2501 ; X64-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
   2502 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   2503 ; X64-NEXT:    retq # encoding: [0xc3]
   2504   %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2505   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2506   %res2 = add <8 x i16> %res, %res1
   2507   ret <8 x i16> %res2
   2508 }
   2509 
   2510 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2511 
   2512 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   2513 ; X86-LABEL: test_int_x86_avx512_mask_pminu_w_256:
   2514 ; X86:       # %bb.0:
   2515 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2516 ; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
   2517 ; X86-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
   2518 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2519 ; X86-NEXT:    retl # encoding: [0xc3]
   2520 ;
   2521 ; X64-LABEL: test_int_x86_avx512_mask_pminu_w_256:
   2522 ; X64:       # %bb.0:
   2523 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2524 ; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
   2525 ; X64-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
   2526 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2527 ; X64-NEXT:    retq # encoding: [0xc3]
   2528   %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   2529   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   2530   %res2 = add <16 x i16> %res, %res1
   2531   ret <16 x i16> %res2
   2532 }
   2533 
   2534 declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2535 
   2536 define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2537 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_128:
   2538 ; X86:       # %bb.0:
   2539 ; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9]
   2540 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2541 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2542 ; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
   2543 ; X86-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
   2544 ; X86-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
   2545 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2546 ; X86-NEXT:    retl # encoding: [0xc3]
   2547 ;
   2548 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_128:
   2549 ; X64:       # %bb.0:
   2550 ; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd1,0xd9]
   2551 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2552 ; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
   2553 ; X64-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xc1]
   2554 ; X64-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfd,0xc0]
   2555 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2556 ; X64-NEXT:    retq # encoding: [0xc3]
   2557   %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2558   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2559   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   2560   %res3 = add <8 x i16> %res, %res1
   2561   %res4 = add <8 x i16> %res2, %res3
   2562   ret <8 x i16> %res4
   2563 }
   2564 
   2565 declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   2566 
   2567 define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   2568 ; X86-LABEL: test_int_x86_avx512_mask_psrl_w_256:
   2569 ; X86:       # %bb.0:
   2570 ; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9]
   2571 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2572 ; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
   2573 ; X86-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
   2574 ; X86-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
   2575 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2576 ; X86-NEXT:    retl # encoding: [0xc3]
   2577 ;
   2578 ; X64-LABEL: test_int_x86_avx512_mask_psrl_w_256:
   2579 ; X64:       # %bb.0:
   2580 ; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd1,0xd9]
   2581 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2582 ; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
   2583 ; X64-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xc1]
   2584 ; X64-NEXT:    vpaddw %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfd,0xc0]
   2585 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2586 ; X64-NEXT:    retq # encoding: [0xc3]
   2587   %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   2588   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   2589   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   2590   %res3 = add <16 x i16> %res, %res1
   2591   %res4 = add <16 x i16> %res3, %res2
   2592   ret <16 x i16> %res4
   2593 }
   2594 
   2595 declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2596 
   2597 define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2598 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_128:
   2599 ; X86:       # %bb.0:
   2600 ; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9]
   2601 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2602 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2603 ; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
   2604 ; X86-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
   2605 ; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   2606 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2607 ; X86-NEXT:    retl # encoding: [0xc3]
   2608 ;
   2609 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_128:
   2610 ; X64:       # %bb.0:
   2611 ; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe1,0xd9]
   2612 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2613 ; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
   2614 ; X64-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xc1]
   2615 ; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   2616 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2617 ; X64-NEXT:    retq # encoding: [0xc3]
   2618   %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2619   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   2620   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2621   %res3 = add <8 x i16> %res, %res1
   2622   %res4 = add <8 x i16> %res3, %res2
   2623   ret <8 x i16> %res4
   2624 }
   2625 
   2626 declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   2627 
   2628 define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   2629 ; X86-LABEL: test_int_x86_avx512_mask_psra_w_256:
   2630 ; X86:       # %bb.0:
   2631 ; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9]
   2632 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2633 ; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
   2634 ; X86-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
   2635 ; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   2636 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2637 ; X86-NEXT:    retl # encoding: [0xc3]
   2638 ;
   2639 ; X64-LABEL: test_int_x86_avx512_mask_psra_w_256:
   2640 ; X64:       # %bb.0:
   2641 ; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe1,0xd9]
   2642 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2643 ; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
   2644 ; X64-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xc1]
   2645 ; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   2646 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2647 ; X64-NEXT:    retq # encoding: [0xc3]
   2648   %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   2649   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   2650   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   2651   %res3 = add <16 x i16> %res, %res1
   2652   %res4 = add <16 x i16> %res3, %res2
   2653   ret <16 x i16> %res4
   2654 }
   2655 
   2656 declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2657 
   2658 define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   2659 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_128:
   2660 ; X86:       # %bb.0:
   2661 ; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9]
   2662 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2663 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2664 ; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
   2665 ; X86-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
   2666 ; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   2667 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2668 ; X86-NEXT:    retl # encoding: [0xc3]
   2669 ;
   2670 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_128:
   2671 ; X64:       # %bb.0:
   2672 ; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf1,0xd9]
   2673 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2674 ; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
   2675 ; X64-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xc1]
   2676 ; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   2677 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2678 ; X64-NEXT:    retq # encoding: [0xc3]
   2679   %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   2680   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   2681   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   2682   %res3 = add <8 x i16> %res, %res1
   2683   %res4 = add <8 x i16> %res3, %res2
   2684   ret <8 x i16> %res4
   2685 }
   2686 
   2687 declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   2688 
   2689 define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   2690 ; X86-LABEL: test_int_x86_avx512_mask_psll_w_256:
   2691 ; X86:       # %bb.0:
   2692 ; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9]
   2693 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2694 ; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
   2695 ; X86-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
   2696 ; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   2697 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2698 ; X86-NEXT:    retl # encoding: [0xc3]
   2699 ;
   2700 ; X64-LABEL: test_int_x86_avx512_mask_psll_w_256:
   2701 ; X64:       # %bb.0:
   2702 ; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf1,0xd9]
   2703 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2704 ; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
   2705 ; X64-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xc1]
   2706 ; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   2707 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2708 ; X64-NEXT:    retq # encoding: [0xc3]
   2709   %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   2710   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   2711   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   2712   %res3 = add <16 x i16> %res, %res1
   2713   %res4 = add <16 x i16> %res3, %res2
   2714   ret <16 x i16> %res4
   2715 }
   2716 
   2717 declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   2718 
   2719 define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   2720 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
   2721 ; X86:       # %bb.0:
   2722 ; X86-NEXT:    vpsrlw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03]
   2723 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2724 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2725 ; X86-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
   2726 ; X86-NEXT:    vpsrlw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03]
   2727 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2728 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2729 ; X86-NEXT:    retl # encoding: [0xc3]
   2730 ;
   2731 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
   2732 ; X64:       # %bb.0:
   2733 ; X64-NEXT:    vpsrlw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xd0,0x03]
   2734 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2735 ; X64-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
   2736 ; X64-NEXT:    vpsrlw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xd0,0x03]
   2737 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   2738 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2739 ; X64-NEXT:    retq # encoding: [0xc3]
   2740   %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   2741   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   2742   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   2743   %res3 = add <8 x i16> %res, %res1
   2744   %res4 = add <8 x i16> %res2, %res3
   2745   ret <8 x i16> %res4
   2746 }
   2747 
   2748 declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   2749 
   2750 define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   2751 ; X86-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
   2752 ; X86:       # %bb.0:
   2753 ; X86-NEXT:    vpsrlw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03]
   2754 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2755 ; X86-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
   2756 ; X86-NEXT:    vpsrlw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03]
   2757 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2758 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2759 ; X86-NEXT:    retl # encoding: [0xc3]
   2760 ;
   2761 ; X64-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
   2762 ; X64:       # %bb.0:
   2763 ; X64-NEXT:    vpsrlw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xd0,0x03]
   2764 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2765 ; X64-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
   2766 ; X64-NEXT:    vpsrlw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xd0,0x03]
   2767 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   2768 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2769 ; X64-NEXT:    retq # encoding: [0xc3]
   2770   %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   2771   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   2772   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   2773   %res3 = add <16 x i16> %res, %res1
   2774   %res4 = add <16 x i16> %res3, %res2
   2775   ret <16 x i16> %res4
   2776 }
   2777 
   2778 declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   2779 
   2780 define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   2781 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_128:
   2782 ; X86:       # %bb.0:
   2783 ; X86-NEXT:    vpsraw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03]
   2784 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2785 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2786 ; X86-NEXT:    vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
   2787 ; X86-NEXT:    vpsraw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03]
   2788 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2789 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2790 ; X86-NEXT:    retl # encoding: [0xc3]
   2791 ;
   2792 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_128:
   2793 ; X64:       # %bb.0:
   2794 ; X64-NEXT:    vpsraw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xe0,0x03]
   2795 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2796 ; X64-NEXT:    vpsraw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
   2797 ; X64-NEXT:    vpsraw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xe0,0x03]
   2798 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2799 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2800 ; X64-NEXT:    retq # encoding: [0xc3]
   2801   %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   2802   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   2803   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   2804   %res3 = add <8 x i16> %res, %res1
   2805   %res4 = add <8 x i16> %res3, %res2
   2806   ret <8 x i16> %res4
   2807 }
   2808 
   2809 declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   2810 
   2811 define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   2812 ; X86-LABEL: test_int_x86_avx512_mask_psra_wi_256:
   2813 ; X86:       # %bb.0:
   2814 ; X86-NEXT:    vpsraw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03]
   2815 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2816 ; X86-NEXT:    vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
   2817 ; X86-NEXT:    vpsraw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03]
   2818 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   2819 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2820 ; X86-NEXT:    retl # encoding: [0xc3]
   2821 ;
   2822 ; X64-LABEL: test_int_x86_avx512_mask_psra_wi_256:
   2823 ; X64:       # %bb.0:
   2824 ; X64-NEXT:    vpsraw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xe0,0x03]
   2825 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2826 ; X64-NEXT:    vpsraw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
   2827 ; X64-NEXT:    vpsraw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xe0,0x03]
   2828 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   2829 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2830 ; X64-NEXT:    retq # encoding: [0xc3]
   2831   %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   2832   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   2833   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   2834   %res3 = add <16 x i16> %res, %res1
   2835   %res4 = add <16 x i16> %res3, %res2
   2836   ret <16 x i16> %res4
   2837 }
   2838 
   2839 declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   2840 
   2841 define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   2842 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_128:
   2843 ; X86:       # %bb.0:
   2844 ; X86-NEXT:    vpsllw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03]
   2845 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   2846 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2847 ; X86-NEXT:    vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
   2848 ; X86-NEXT:    vpsllw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03]
   2849 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2850 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2851 ; X86-NEXT:    retl # encoding: [0xc3]
   2852 ;
   2853 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_128:
   2854 ; X64:       # %bb.0:
   2855 ; X64-NEXT:    vpsllw $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x71,0xf0,0x03]
   2856 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2857 ; X64-NEXT:    vpsllw $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
   2858 ; X64-NEXT:    vpsllw $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x71,0xf0,0x03]
   2859 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2860 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2861 ; X64-NEXT:    retq # encoding: [0xc3]
   2862   %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   2863   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   2864   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   2865   %res3 = add <8 x i16> %res, %res1
   2866   %res4 = add <8 x i16> %res3, %res2
   2867   ret <8 x i16> %res4
   2868 }
   2869 
   2870 declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   2871 
   2872 define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   2873 ; X86-LABEL: test_int_x86_avx512_mask_psll_wi_256:
   2874 ; X86:       # %bb.0:
   2875 ; X86-NEXT:    vpsllw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03]
   2876 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   2877 ; X86-NEXT:    vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
   2878 ; X86-NEXT:    vpsllw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03]
   2879 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   2880 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2881 ; X86-NEXT:    retl # encoding: [0xc3]
   2882 ;
   2883 ; X64-LABEL: test_int_x86_avx512_mask_psll_wi_256:
   2884 ; X64:       # %bb.0:
   2885 ; X64-NEXT:    vpsllw $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x71,0xf0,0x03]
   2886 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   2887 ; X64-NEXT:    vpsllw $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
   2888 ; X64-NEXT:    vpsllw $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x71,0xf0,0x03]
   2889 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   2890 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2891 ; X64-NEXT:    retq # encoding: [0xc3]
   2892   %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   2893   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   2894   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   2895   %res3 = add <16 x i16> %res, %res1
   2896   %res4 = add <16 x i16> %res3, %res2
   2897   ret <16 x i16> %res4
   2898 }
   2899 
   2900 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   2901 
   2902 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   2903 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
   2904 ; X86:       # %bb.0:
   2905 ; X86-NEXT:    vpshufb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9]
   2906 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2907 ; X86-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
   2908 ; X86-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   2909 ; X86-NEXT:    retl # encoding: [0xc3]
   2910 ;
   2911 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
   2912 ; X64:       # %bb.0:
   2913 ; X64-NEXT:    vpshufb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x00,0xd9]
   2914 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2915 ; X64-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
   2916 ; X64-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   2917 ; X64-NEXT:    retq # encoding: [0xc3]
   2918   %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   2919   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   2920   %res2 = add <16 x i8> %res, %res1
   2921   ret <16 x i8> %res2
   2922 }
   2923 
   2924 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   2925 
   2926 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   2927 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
   2928 ; X86:       # %bb.0:
   2929 ; X86-NEXT:    vpshufb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xd9]
   2930 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   2931 ; X86-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
   2932 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2933 ; X86-NEXT:    retl # encoding: [0xc3]
   2934 ;
   2935 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
   2936 ; X64:       # %bb.0:
   2937 ; X64-NEXT:    vpshufb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x00,0xd9]
   2938 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2939 ; X64-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
   2940 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   2941 ; X64-NEXT:    retq # encoding: [0xc3]
   2942   %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   2943   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   2944   %res2 = add <32 x i8> %res, %res1
   2945   ret <32 x i8> %res2
   2946 }
   2947 
   2948 declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
   2949 
   2950 define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
   2951 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
   2952 ; X86:       # %bb.0:
   2953 ; X86-NEXT:    vpmovzxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0]
   2954 ; X86-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2955 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2956 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   2957 ; X86-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
   2958 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2959 ; X86-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
   2960 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2961 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2962 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2963 ; X86-NEXT:    retl # encoding: [0xc3]
   2964 ;
   2965 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
   2966 ; X64:       # %bb.0:
   2967 ; X64-NEXT:    vpmovzxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x30,0xd0]
   2968 ; X64-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2969 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   2970 ; X64-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
   2971 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2972 ; X64-NEXT:    vpmovzxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x30,0xc0]
   2973 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   2974 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   2975 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   2976 ; X64-NEXT:    retq # encoding: [0xc3]
   2977   %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
   2978   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
   2979   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
   2980   %res3 = add <8 x i16> %res, %res1
   2981   %res4 = add <8 x i16> %res3, %res2
   2982   ret <8 x i16> %res4
   2983 }
   2984 
   2985 declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
   2986 
   2987 define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
   2988 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
   2989 ; X86:       # %bb.0:
   2990 ; X86-NEXT:    vpmovzxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0]
   2991 ; X86-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   2992 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   2993 ; X86-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
   2994 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   2995 ; X86-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
   2996 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   2997 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   2998 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   2999 ; X86-NEXT:    retl # encoding: [0xc3]
   3000 ;
   3001 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
   3002 ; X64:       # %bb.0:
   3003 ; X64-NEXT:    vpmovzxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x30,0xd0]
   3004 ; X64-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   3005 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3006 ; X64-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
   3007 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   3008 ; X64-NEXT:    vpmovzxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xc0]
   3009 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   3010 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   3011 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   3012 ; X64-NEXT:    retq # encoding: [0xc3]
   3013   %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
   3014   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
   3015   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
   3016   %res3 = add <16 x i16> %res, %res1
   3017   %res4 = add <16 x i16> %res3, %res2
   3018   ret <16 x i16> %res4
   3019 }
   3020 
   3021 
   3022 declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
   3023 
   3024 define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
   3025 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
   3026 ; X86:       # %bb.0:
   3027 ; X86-NEXT:    vpmovsxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0]
   3028 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3029 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3030 ; X86-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
   3031 ; X86-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
   3032 ; X86-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   3033 ; X86-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   3034 ; X86-NEXT:    retl # encoding: [0xc3]
   3035 ;
   3036 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
   3037 ; X64:       # %bb.0:
   3038 ; X64-NEXT:    vpmovsxbw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x20,0xd0]
   3039 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3040 ; X64-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
   3041 ; X64-NEXT:    vpmovsxbw %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x20,0xc0]
   3042 ; X64-NEXT:    vpaddw %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc2]
   3043 ; X64-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc0]
   3044 ; X64-NEXT:    retq # encoding: [0xc3]
   3045   %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
   3046   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
   3047   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
   3048   %res3 = add <8 x i16> %res, %res1
   3049   %res4 = add <8 x i16> %res3, %res2
   3050   ret <8 x i16> %res4
   3051 }
   3052 
   3053 declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
   3054 
   3055 define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
   3056 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
   3057 ; X86:       # %bb.0:
   3058 ; X86-NEXT:    vpmovsxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0]
   3059 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3060 ; X86-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
   3061 ; X86-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
   3062 ; X86-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   3063 ; X86-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   3064 ; X86-NEXT:    retl # encoding: [0xc3]
   3065 ;
   3066 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
   3067 ; X64:       # %bb.0:
   3068 ; X64-NEXT:    vpmovsxbw %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x20,0xd0]
   3069 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3070 ; X64-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
   3071 ; X64-NEXT:    vpmovsxbw %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xc0]
   3072 ; X64-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc2]
   3073 ; X64-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc0]
   3074 ; X64-NEXT:    retq # encoding: [0xc3]
   3075   %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
   3076   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
   3077   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
   3078   %res3 = add <16 x i16> %res, %res1
   3079   %res4 = add <16 x i16> %res3, %res2
   3080   ret <16 x i16> %res4
   3081 }
   3082 
   3083 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
   3084 
   3085 define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
   3086 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
   3087 ; X86:       # %bb.0:
   3088 ; X86-NEXT:    vpmovsxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0]
   3089 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3090 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3091 ; X86-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
   3092 ; X86-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
   3093 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   3094 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   3095 ; X86-NEXT:    retl # encoding: [0xc3]
   3096 ;
   3097 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
   3098 ; X64:       # %bb.0:
   3099 ; X64-NEXT:    vpmovsxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x25,0xd0]
   3100 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3101 ; X64-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
   3102 ; X64-NEXT:    vpmovsxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x25,0xc0]
   3103 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   3104 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   3105 ; X64-NEXT:    retq # encoding: [0xc3]
   3106   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
   3107   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
   3108   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
   3109   %res3 = add <2 x i64> %res, %res1
   3110   %res4 = add <2 x i64> %res3, %res2
   3111   ret <2 x i64> %res4
   3112 }
   3113 
   3114 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
   3115 
   3116 define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
   3117 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
   3118 ; X86:       # %bb.0:
   3119 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0]
   3120 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3121 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3122 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
   3123 ; X86-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
   3124 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   3125 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   3126 ; X86-NEXT:    retl # encoding: [0xc3]
   3127 ;
   3128 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
   3129 ; X64:       # %bb.0:
   3130 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x25,0xd0]
   3131 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3132 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
   3133 ; X64-NEXT:    vpmovsxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xc0]
   3134 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   3135 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   3136 ; X64-NEXT:    retq # encoding: [0xc3]
   3137   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
   3138   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
   3139   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
   3140   %res3 = add <4 x i64> %res, %res1
   3141   %res4 = add <4 x i64> %res3, %res2
   3142   ret <4 x i64> %res4
   3143 }
   3144 
   3145 
   3146 declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
   3147 
   3148 define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
   3149 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_128:
   3150 ; X86:       # %bb.0:
   3151 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
   3152 ; X86-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
   3153 ; X86-NEXT:    retl # encoding: [0xc3]
   3154 ;
   3155 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_128:
   3156 ; X64:       # %bb.0:
   3157 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
   3158 ; X64-NEXT:    vpmovm2b %k0, %xmm0 # encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
   3159 ; X64-NEXT:    retq # encoding: [0xc3]
   3160   %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
   3161   ret <16 x i8> %res
   3162 }
   3163 
   3164 declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
   3165 
   3166 define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
   3167 ; X86-LABEL: test_int_x86_avx512_cvtmask2b_256:
   3168 ; X86:       # %bb.0:
   3169 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k0 # encoding: [0xc4,0xe1,0xf9,0x90,0x44,0x24,0x04]
   3170 ; X86-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
   3171 ; X86-NEXT:    retl # encoding: [0xc3]
   3172 ;
   3173 ; X64-LABEL: test_int_x86_avx512_cvtmask2b_256:
   3174 ; X64:       # %bb.0:
   3175 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
   3176 ; X64-NEXT:    vpmovm2b %k0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
   3177 ; X64-NEXT:    retq # encoding: [0xc3]
   3178   %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
   3179   ret <32 x i8> %res
   3180 }
   3181 
   3182 declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
   3183 
   3184 define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
   3185 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_128:
   3186 ; X86:       # %bb.0:
   3187 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3188 ; X86-NEXT:    kmovd %eax, %k0 # encoding: [0xc5,0xfb,0x92,0xc0]
   3189 ; X86-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
   3190 ; X86-NEXT:    retl # encoding: [0xc3]
   3191 ;
   3192 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_128:
   3193 ; X64:       # %bb.0:
   3194 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
   3195 ; X64-NEXT:    vpmovm2w %k0, %xmm0 # encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
   3196 ; X64-NEXT:    retq # encoding: [0xc3]
   3197   %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
   3198   ret <8 x i16> %res
   3199 }
   3200 
   3201 declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
   3202 
   3203 define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
   3204 ; X86-LABEL: test_int_x86_avx512_cvtmask2w_256:
   3205 ; X86:       # %bb.0:
   3206 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k0 # encoding: [0xc5,0xf8,0x90,0x44,0x24,0x04]
   3207 ; X86-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
   3208 ; X86-NEXT:    retl # encoding: [0xc3]
   3209 ;
   3210 ; X64-LABEL: test_int_x86_avx512_cvtmask2w_256:
   3211 ; X64:       # %bb.0:
   3212 ; X64-NEXT:    kmovd %edi, %k0 # encoding: [0xc5,0xfb,0x92,0xc7]
   3213 ; X64-NEXT:    vpmovm2w %k0, %ymm0 # encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
   3214 ; X64-NEXT:    retq # encoding: [0xc3]
   3215   %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
   3216   ret <16 x i16> %res
   3217 }
   3218 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   3219 ; CHECK-LABEL: test_mask_packs_epi32_rr_128:
   3220 ; CHECK:       # %bb.0:
   3221 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0xc1]
   3222 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3223   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3224   ret <8 x i16> %res
   3225 }
   3226 
   3227 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
   3228 ; X86-LABEL: test_mask_packs_epi32_rrk_128:
   3229 ; X86:       # %bb.0:
   3230 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3231 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3232 ; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
   3233 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3234 ; X86-NEXT:    retl # encoding: [0xc3]
   3235 ;
   3236 ; X64-LABEL: test_mask_packs_epi32_rrk_128:
   3237 ; X64:       # %bb.0:
   3238 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3239 ; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
   3240 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3241 ; X64-NEXT:    retq # encoding: [0xc3]
   3242   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3243   ret <8 x i16> %res
   3244 }
   3245 
   3246 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   3247 ; X86-LABEL: test_mask_packs_epi32_rrkz_128:
   3248 ; X86:       # %bb.0:
   3249 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3250 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3251 ; X86-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
   3252 ; X86-NEXT:    retl # encoding: [0xc3]
   3253 ;
   3254 ; X64-LABEL: test_mask_packs_epi32_rrkz_128:
   3255 ; X64:       # %bb.0:
   3256 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3257 ; X64-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
   3258 ; X64-NEXT:    retq # encoding: [0xc3]
   3259   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3260   ret <8 x i16> %res
   3261 }
   3262 
   3263 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   3264 ; X86-LABEL: test_mask_packs_epi32_rm_128:
   3265 ; X86:       # %bb.0:
   3266 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3267 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x00]
   3268 ; X86-NEXT:    retl # encoding: [0xc3]
   3269 ;
   3270 ; X64-LABEL: test_mask_packs_epi32_rm_128:
   3271 ; X64:       # %bb.0:
   3272 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6b,0x07]
   3273 ; X64-NEXT:    retq # encoding: [0xc3]
   3274   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3275   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3276   ret <8 x i16> %res
   3277 }
   3278 
   3279 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3280 ; X86-LABEL: test_mask_packs_epi32_rmk_128:
   3281 ; X86:       # %bb.0:
   3282 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3283 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3284 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3285 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x08]
   3286 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3287 ; X86-NEXT:    retl # encoding: [0xc3]
   3288 ;
   3289 ; X64-LABEL: test_mask_packs_epi32_rmk_128:
   3290 ; X64:       # %bb.0:
   3291 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3292 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
   3293 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3294 ; X64-NEXT:    retq # encoding: [0xc3]
   3295   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3296   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3297   ret <8 x i16> %res
   3298 }
   3299 
   3300 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   3301 ; X86-LABEL: test_mask_packs_epi32_rmkz_128:
   3302 ; X86:       # %bb.0:
   3303 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3304 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3305 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3306 ; X86-NEXT:    vpackssdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x00]
   3307 ; X86-NEXT:    retl # encoding: [0xc3]
   3308 ;
   3309 ; X64-LABEL: test_mask_packs_epi32_rmkz_128:
   3310 ; X64:       # %bb.0:
   3311 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3312 ; X64-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
   3313 ; X64-NEXT:    retq # encoding: [0xc3]
   3314   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3315   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3316   ret <8 x i16> %res
   3317 }
   3318 
   3319 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   3320 ; X86-LABEL: test_mask_packs_epi32_rmb_128:
   3321 ; X86:       # %bb.0:
   3322 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3323 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x00]
   3324 ; X86-NEXT:    retl # encoding: [0xc3]
   3325 ;
   3326 ; X64-LABEL: test_mask_packs_epi32_rmb_128:
   3327 ; X64:       # %bb.0:
   3328 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
   3329 ; X64-NEXT:    retq # encoding: [0xc3]
   3330   %q = load i32, i32* %ptr_b
   3331   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3332   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3333   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3334   ret <8 x i16> %res
   3335 }
   3336 
   3337 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3338 ; X86-LABEL: test_mask_packs_epi32_rmbk_128:
   3339 ; X86:       # %bb.0:
   3340 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3341 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3342 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3343 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x08]
   3344 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3345 ; X86-NEXT:    retl # encoding: [0xc3]
   3346 ;
   3347 ; X64-LABEL: test_mask_packs_epi32_rmbk_128:
   3348 ; X64:       # %bb.0:
   3349 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3350 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
   3351 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3352 ; X64-NEXT:    retq # encoding: [0xc3]
   3353   %q = load i32, i32* %ptr_b
   3354   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3355   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3356   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3357   ret <8 x i16> %res
   3358 }
   3359 
   3360 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   3361 ; X86-LABEL: test_mask_packs_epi32_rmbkz_128:
   3362 ; X86:       # %bb.0:
   3363 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3364 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3365 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3366 ; X86-NEXT:    vpackssdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x00]
   3367 ; X86-NEXT:    retl # encoding: [0xc3]
   3368 ;
   3369 ; X64-LABEL: test_mask_packs_epi32_rmbkz_128:
   3370 ; X64:       # %bb.0:
   3371 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3372 ; X64-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
   3373 ; X64-NEXT:    retq # encoding: [0xc3]
   3374   %q = load i32, i32* %ptr_b
   3375   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3376   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3377   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3378   ret <8 x i16> %res
   3379 }
   3380 
   3381 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
   3382 
   3383 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   3384 ; CHECK-LABEL: test_mask_packs_epi32_rr_256:
   3385 ; CHECK:       # %bb.0:
   3386 ; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0xc1]
   3387 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3388   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3389   ret <16 x i16> %res
   3390 }
   3391 
   3392 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
   3393 ; X86-LABEL: test_mask_packs_epi32_rrk_256:
   3394 ; X86:       # %bb.0:
   3395 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3396 ; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
   3397 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3398 ; X86-NEXT:    retl # encoding: [0xc3]
   3399 ;
   3400 ; X64-LABEL: test_mask_packs_epi32_rrk_256:
   3401 ; X64:       # %bb.0:
   3402 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3403 ; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
   3404 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3405 ; X64-NEXT:    retq # encoding: [0xc3]
   3406   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3407   ret <16 x i16> %res
   3408 }
   3409 
   3410 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
   3411 ; X86-LABEL: test_mask_packs_epi32_rrkz_256:
   3412 ; X86:       # %bb.0:
   3413 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3414 ; X86-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
   3415 ; X86-NEXT:    retl # encoding: [0xc3]
   3416 ;
   3417 ; X64-LABEL: test_mask_packs_epi32_rrkz_256:
   3418 ; X64:       # %bb.0:
   3419 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3420 ; X64-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
   3421 ; X64-NEXT:    retq # encoding: [0xc3]
   3422   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3423   ret <16 x i16> %res
   3424 }
   3425 
   3426 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3427 ; X86-LABEL: test_mask_packs_epi32_rm_256:
   3428 ; X86:       # %bb.0:
   3429 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3430 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x00]
   3431 ; X86-NEXT:    retl # encoding: [0xc3]
   3432 ;
   3433 ; X64-LABEL: test_mask_packs_epi32_rm_256:
   3434 ; X64:       # %bb.0:
   3435 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6b,0x07]
   3436 ; X64-NEXT:    retq # encoding: [0xc3]
   3437   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3438   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3439   ret <16 x i16> %res
   3440 }
   3441 
   3442 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3443 ; X86-LABEL: test_mask_packs_epi32_rmk_256:
   3444 ; X86:       # %bb.0:
   3445 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3446 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3447 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x08]
   3448 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3449 ; X86-NEXT:    retl # encoding: [0xc3]
   3450 ;
   3451 ; X64-LABEL: test_mask_packs_epi32_rmk_256:
   3452 ; X64:       # %bb.0:
   3453 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3454 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
   3455 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3456 ; X64-NEXT:    retq # encoding: [0xc3]
   3457   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3458   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3459   ret <16 x i16> %res
   3460 }
   3461 
   3462 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
   3463 ; X86-LABEL: test_mask_packs_epi32_rmkz_256:
   3464 ; X86:       # %bb.0:
   3465 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3466 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3467 ; X86-NEXT:    vpackssdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x00]
   3468 ; X86-NEXT:    retl # encoding: [0xc3]
   3469 ;
   3470 ; X64-LABEL: test_mask_packs_epi32_rmkz_256:
   3471 ; X64:       # %bb.0:
   3472 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3473 ; X64-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
   3474 ; X64-NEXT:    retq # encoding: [0xc3]
   3475   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3476   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3477   ret <16 x i16> %res
   3478 }
   3479 
   3480 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   3481 ; X86-LABEL: test_mask_packs_epi32_rmb_256:
   3482 ; X86:       # %bb.0:
   3483 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3484 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x00]
   3485 ; X86-NEXT:    retl # encoding: [0xc3]
   3486 ;
   3487 ; X64-LABEL: test_mask_packs_epi32_rmb_256:
   3488 ; X64:       # %bb.0:
   3489 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
   3490 ; X64-NEXT:    retq # encoding: [0xc3]
   3491   %q = load i32, i32* %ptr_b
   3492   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3493   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3494   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3495   ret <16 x i16> %res
   3496 }
   3497 
   3498 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3499 ; X86-LABEL: test_mask_packs_epi32_rmbk_256:
   3500 ; X86:       # %bb.0:
   3501 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3502 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3503 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x08]
   3504 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3505 ; X86-NEXT:    retl # encoding: [0xc3]
   3506 ;
   3507 ; X64-LABEL: test_mask_packs_epi32_rmbk_256:
   3508 ; X64:       # %bb.0:
   3509 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3510 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
   3511 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3512 ; X64-NEXT:    retq # encoding: [0xc3]
   3513   %q = load i32, i32* %ptr_b
   3514   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3515   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3516   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3517   ret <16 x i16> %res
   3518 }
   3519 
   3520 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
   3521 ; X86-LABEL: test_mask_packs_epi32_rmbkz_256:
   3522 ; X86:       # %bb.0:
   3523 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3524 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3525 ; X86-NEXT:    vpackssdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x00]
   3526 ; X86-NEXT:    retl # encoding: [0xc3]
   3527 ;
   3528 ; X64-LABEL: test_mask_packs_epi32_rmbkz_256:
   3529 ; X64:       # %bb.0:
   3530 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3531 ; X64-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
   3532 ; X64-NEXT:    retq # encoding: [0xc3]
   3533   %q = load i32, i32* %ptr_b
   3534   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3535   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3536   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3537   ret <16 x i16> %res
   3538 }
   3539 
   3540 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
   3541 
   3542 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3543 ; CHECK-LABEL: test_mask_packs_epi16_rr_128:
   3544 ; CHECK:       # %bb.0:
   3545 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0xc1]
   3546 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3547   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   3548   ret <16 x i8> %res
   3549 }
   3550 
   3551 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
   3552 ; X86-LABEL: test_mask_packs_epi16_rrk_128:
   3553 ; X86:       # %bb.0:
   3554 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3555 ; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
   3556 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3557 ; X86-NEXT:    retl # encoding: [0xc3]
   3558 ;
   3559 ; X64-LABEL: test_mask_packs_epi16_rrk_128:
   3560 ; X64:       # %bb.0:
   3561 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3562 ; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
   3563 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3564 ; X64-NEXT:    retq # encoding: [0xc3]
   3565   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   3566   ret <16 x i8> %res
   3567 }
   3568 
   3569 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
   3570 ; X86-LABEL: test_mask_packs_epi16_rrkz_128:
   3571 ; X86:       # %bb.0:
   3572 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3573 ; X86-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
   3574 ; X86-NEXT:    retl # encoding: [0xc3]
   3575 ;
   3576 ; X64-LABEL: test_mask_packs_epi16_rrkz_128:
   3577 ; X64:       # %bb.0:
   3578 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3579 ; X64-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
   3580 ; X64-NEXT:    retq # encoding: [0xc3]
   3581   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   3582   ret <16 x i8> %res
   3583 }
   3584 
   3585 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3586 ; X86-LABEL: test_mask_packs_epi16_rm_128:
   3587 ; X86:       # %bb.0:
   3588 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3589 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x00]
   3590 ; X86-NEXT:    retl # encoding: [0xc3]
   3591 ;
   3592 ; X64-LABEL: test_mask_packs_epi16_rm_128:
   3593 ; X64:       # %bb.0:
   3594 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x63,0x07]
   3595 ; X64-NEXT:    retq # encoding: [0xc3]
   3596   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3597   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   3598   ret <16 x i8> %res
   3599 }
   3600 
   3601 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   3602 ; X86-LABEL: test_mask_packs_epi16_rmk_128:
   3603 ; X86:       # %bb.0:
   3604 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3605 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3606 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x08]
   3607 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3608 ; X86-NEXT:    retl # encoding: [0xc3]
   3609 ;
   3610 ; X64-LABEL: test_mask_packs_epi16_rmk_128:
   3611 ; X64:       # %bb.0:
   3612 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3613 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
   3614 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3615 ; X64-NEXT:    retq # encoding: [0xc3]
   3616   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3617   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   3618   ret <16 x i8> %res
   3619 }
   3620 
   3621 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
   3622 ; X86-LABEL: test_mask_packs_epi16_rmkz_128:
   3623 ; X86:       # %bb.0:
   3624 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3625 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3626 ; X86-NEXT:    vpacksswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x00]
   3627 ; X86-NEXT:    retl # encoding: [0xc3]
   3628 ;
   3629 ; X64-LABEL: test_mask_packs_epi16_rmkz_128:
   3630 ; X64:       # %bb.0:
   3631 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3632 ; X64-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
   3633 ; X64-NEXT:    retq # encoding: [0xc3]
   3634   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3635   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   3636   ret <16 x i8> %res
   3637 }
   3638 
   3639 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
   3640 
   3641 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3642 ; CHECK-LABEL: test_mask_packs_epi16_rr_256:
   3643 ; CHECK:       # %bb.0:
   3644 ; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0xc1]
   3645 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3646   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   3647   ret <32 x i8> %res
   3648 }
   3649 
   3650 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
   3651 ; X86-LABEL: test_mask_packs_epi16_rrk_256:
   3652 ; X86:       # %bb.0:
   3653 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   3654 ; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
   3655 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3656 ; X86-NEXT:    retl # encoding: [0xc3]
   3657 ;
   3658 ; X64-LABEL: test_mask_packs_epi16_rrk_256:
   3659 ; X64:       # %bb.0:
   3660 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3661 ; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
   3662 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3663 ; X64-NEXT:    retq # encoding: [0xc3]
   3664   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   3665   ret <32 x i8> %res
   3666 }
   3667 
   3668 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
   3669 ; X86-LABEL: test_mask_packs_epi16_rrkz_256:
   3670 ; X86:       # %bb.0:
   3671 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   3672 ; X86-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
   3673 ; X86-NEXT:    retl # encoding: [0xc3]
   3674 ;
   3675 ; X64-LABEL: test_mask_packs_epi16_rrkz_256:
   3676 ; X64:       # %bb.0:
   3677 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3678 ; X64-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
   3679 ; X64-NEXT:    retq # encoding: [0xc3]
   3680   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   3681   ret <32 x i8> %res
   3682 }
   3683 
   3684 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3685 ; X86-LABEL: test_mask_packs_epi16_rm_256:
   3686 ; X86:       # %bb.0:
   3687 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3688 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x00]
   3689 ; X86-NEXT:    retl # encoding: [0xc3]
   3690 ;
   3691 ; X64-LABEL: test_mask_packs_epi16_rm_256:
   3692 ; X64:       # %bb.0:
   3693 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x63,0x07]
   3694 ; X64-NEXT:    retq # encoding: [0xc3]
   3695   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3696   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   3697   ret <32 x i8> %res
   3698 }
   3699 
   3700 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   3701 ; X86-LABEL: test_mask_packs_epi16_rmk_256:
   3702 ; X86:       # %bb.0:
   3703 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3704 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   3705 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x08]
   3706 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3707 ; X86-NEXT:    retl # encoding: [0xc3]
   3708 ;
   3709 ; X64-LABEL: test_mask_packs_epi16_rmk_256:
   3710 ; X64:       # %bb.0:
   3711 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3712 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
   3713 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3714 ; X64-NEXT:    retq # encoding: [0xc3]
   3715   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3716   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   3717   ret <32 x i8> %res
   3718 }
   3719 
   3720 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
   3721 ; X86-LABEL: test_mask_packs_epi16_rmkz_256:
   3722 ; X86:       # %bb.0:
   3723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3724 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   3725 ; X86-NEXT:    vpacksswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x00]
   3726 ; X86-NEXT:    retl # encoding: [0xc3]
   3727 ;
   3728 ; X64-LABEL: test_mask_packs_epi16_rmkz_256:
   3729 ; X64:       # %bb.0:
   3730 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3731 ; X64-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
   3732 ; X64-NEXT:    retq # encoding: [0xc3]
   3733   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3734   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   3735   ret <32 x i8> %res
   3736 }
   3737 
   3738 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
   3739 
   3740 
   3741 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   3742 ; CHECK-LABEL: test_mask_packus_epi32_rr_128:
   3743 ; CHECK:       # %bb.0:
   3744 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0xc1]
   3745 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3746   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3747   ret <8 x i16> %res
   3748 }
   3749 
   3750 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
   3751 ; X86-LABEL: test_mask_packus_epi32_rrk_128:
   3752 ; X86:       # %bb.0:
   3753 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3754 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3755 ; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
   3756 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3757 ; X86-NEXT:    retl # encoding: [0xc3]
   3758 ;
   3759 ; X64-LABEL: test_mask_packus_epi32_rrk_128:
   3760 ; X64:       # %bb.0:
   3761 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3762 ; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
   3763 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3764 ; X64-NEXT:    retq # encoding: [0xc3]
   3765   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3766   ret <8 x i16> %res
   3767 }
   3768 
   3769 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   3770 ; X86-LABEL: test_mask_packus_epi32_rrkz_128:
   3771 ; X86:       # %bb.0:
   3772 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3773 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   3774 ; X86-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
   3775 ; X86-NEXT:    retl # encoding: [0xc3]
   3776 ;
   3777 ; X64-LABEL: test_mask_packus_epi32_rrkz_128:
   3778 ; X64:       # %bb.0:
   3779 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3780 ; X64-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
   3781 ; X64-NEXT:    retq # encoding: [0xc3]
   3782   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3783   ret <8 x i16> %res
   3784 }
   3785 
   3786 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   3787 ; X86-LABEL: test_mask_packus_epi32_rm_128:
   3788 ; X86:       # %bb.0:
   3789 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3790 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x00]
   3791 ; X86-NEXT:    retl # encoding: [0xc3]
   3792 ;
   3793 ; X64-LABEL: test_mask_packus_epi32_rm_128:
   3794 ; X64:       # %bb.0:
   3795 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x2b,0x07]
   3796 ; X64-NEXT:    retq # encoding: [0xc3]
   3797   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3798   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3799   ret <8 x i16> %res
   3800 }
   3801 
   3802 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3803 ; X86-LABEL: test_mask_packus_epi32_rmk_128:
   3804 ; X86:       # %bb.0:
   3805 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3806 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3807 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3808 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x08]
   3809 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3810 ; X86-NEXT:    retl # encoding: [0xc3]
   3811 ;
   3812 ; X64-LABEL: test_mask_packus_epi32_rmk_128:
   3813 ; X64:       # %bb.0:
   3814 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3815 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
   3816 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3817 ; X64-NEXT:    retq # encoding: [0xc3]
   3818   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3819   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3820   ret <8 x i16> %res
   3821 }
   3822 
   3823 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   3824 ; X86-LABEL: test_mask_packus_epi32_rmkz_128:
   3825 ; X86:       # %bb.0:
   3826 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3827 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3828 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3829 ; X86-NEXT:    vpackusdw (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x00]
   3830 ; X86-NEXT:    retl # encoding: [0xc3]
   3831 ;
   3832 ; X64-LABEL: test_mask_packus_epi32_rmkz_128:
   3833 ; X64:       # %bb.0:
   3834 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3835 ; X64-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
   3836 ; X64-NEXT:    retq # encoding: [0xc3]
   3837   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3838   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3839   ret <8 x i16> %res
   3840 }
   3841 
   3842 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   3843 ; X86-LABEL: test_mask_packus_epi32_rmb_128:
   3844 ; X86:       # %bb.0:
   3845 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3846 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x00]
   3847 ; X86-NEXT:    retl # encoding: [0xc3]
   3848 ;
   3849 ; X64-LABEL: test_mask_packus_epi32_rmb_128:
   3850 ; X64:       # %bb.0:
   3851 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
   3852 ; X64-NEXT:    retq # encoding: [0xc3]
   3853   %q = load i32, i32* %ptr_b
   3854   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3855   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3856   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   3857   ret <8 x i16> %res
   3858 }
   3859 
   3860 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3861 ; X86-LABEL: test_mask_packus_epi32_rmbk_128:
   3862 ; X86:       # %bb.0:
   3863 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3864 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3865 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3866 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x08]
   3867 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3868 ; X86-NEXT:    retl # encoding: [0xc3]
   3869 ;
   3870 ; X64-LABEL: test_mask_packus_epi32_rmbk_128:
   3871 ; X64:       # %bb.0:
   3872 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3873 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
   3874 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3875 ; X64-NEXT:    retq # encoding: [0xc3]
   3876   %q = load i32, i32* %ptr_b
   3877   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3878   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3879   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3880   ret <8 x i16> %res
   3881 }
   3882 
   3883 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   3884 ; X86-LABEL: test_mask_packus_epi32_rmbkz_128:
   3885 ; X86:       # %bb.0:
   3886 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3887 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3888 ; X86-NEXT:    kmovd %ecx, %k1 # encoding: [0xc5,0xfb,0x92,0xc9]
   3889 ; X86-NEXT:    vpackusdw (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x00]
   3890 ; X86-NEXT:    retl # encoding: [0xc3]
   3891 ;
   3892 ; X64-LABEL: test_mask_packus_epi32_rmbkz_128:
   3893 ; X64:       # %bb.0:
   3894 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3895 ; X64-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
   3896 ; X64-NEXT:    retq # encoding: [0xc3]
   3897   %q = load i32, i32* %ptr_b
   3898   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3899   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3900   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3901   ret <8 x i16> %res
   3902 }
   3903 
   3904 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
   3905 
   3906 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   3907 ; CHECK-LABEL: test_mask_packus_epi32_rr_256:
   3908 ; CHECK:       # %bb.0:
   3909 ; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0xc1]
   3910 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3911   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3912   ret <16 x i16> %res
   3913 }
   3914 
   3915 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
   3916 ; X86-LABEL: test_mask_packus_epi32_rrk_256:
   3917 ; X86:       # %bb.0:
   3918 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3919 ; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
   3920 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3921 ; X86-NEXT:    retl # encoding: [0xc3]
   3922 ;
   3923 ; X64-LABEL: test_mask_packus_epi32_rrk_256:
   3924 ; X64:       # %bb.0:
   3925 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3926 ; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
   3927 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3928 ; X64-NEXT:    retq # encoding: [0xc3]
   3929   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3930   ret <16 x i16> %res
   3931 }
   3932 
   3933 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
   3934 ; X86-LABEL: test_mask_packus_epi32_rrkz_256:
   3935 ; X86:       # %bb.0:
   3936 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   3937 ; X86-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
   3938 ; X86-NEXT:    retl # encoding: [0xc3]
   3939 ;
   3940 ; X64-LABEL: test_mask_packus_epi32_rrkz_256:
   3941 ; X64:       # %bb.0:
   3942 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   3943 ; X64-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
   3944 ; X64-NEXT:    retq # encoding: [0xc3]
   3945   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3946   ret <16 x i16> %res
   3947 }
   3948 
   3949 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3950 ; X86-LABEL: test_mask_packus_epi32_rm_256:
   3951 ; X86:       # %bb.0:
   3952 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3953 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x00]
   3954 ; X86-NEXT:    retl # encoding: [0xc3]
   3955 ;
   3956 ; X64-LABEL: test_mask_packus_epi32_rm_256:
   3957 ; X64:       # %bb.0:
   3958 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x2b,0x07]
   3959 ; X64-NEXT:    retq # encoding: [0xc3]
   3960   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3961   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3962   ret <16 x i16> %res
   3963 }
   3964 
   3965 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3966 ; X86-LABEL: test_mask_packus_epi32_rmk_256:
   3967 ; X86:       # %bb.0:
   3968 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3969 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3970 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x08]
   3971 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3972 ; X86-NEXT:    retl # encoding: [0xc3]
   3973 ;
   3974 ; X64-LABEL: test_mask_packus_epi32_rmk_256:
   3975 ; X64:       # %bb.0:
   3976 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3977 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
   3978 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3979 ; X64-NEXT:    retq # encoding: [0xc3]
   3980   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3981   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3982   ret <16 x i16> %res
   3983 }
   3984 
   3985 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
   3986 ; X86-LABEL: test_mask_packus_epi32_rmkz_256:
   3987 ; X86:       # %bb.0:
   3988 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3989 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   3990 ; X86-NEXT:    vpackusdw (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x00]
   3991 ; X86-NEXT:    retl # encoding: [0xc3]
   3992 ;
   3993 ; X64-LABEL: test_mask_packus_epi32_rmkz_256:
   3994 ; X64:       # %bb.0:
   3995 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   3996 ; X64-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
   3997 ; X64-NEXT:    retq # encoding: [0xc3]
   3998   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3999   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   4000   ret <16 x i16> %res
   4001 }
   4002 
   4003 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   4004 ; X86-LABEL: test_mask_packus_epi32_rmb_256:
   4005 ; X86:       # %bb.0:
   4006 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4007 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x00]
   4008 ; X86-NEXT:    retl # encoding: [0xc3]
   4009 ;
   4010 ; X64-LABEL: test_mask_packus_epi32_rmb_256:
   4011 ; X64:       # %bb.0:
   4012 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
   4013 ; X64-NEXT:    retq # encoding: [0xc3]
   4014   %q = load i32, i32* %ptr_b
   4015   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4016   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4017   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   4018   ret <16 x i16> %res
   4019 }
   4020 
   4021 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   4022 ; X86-LABEL: test_mask_packus_epi32_rmbk_256:
   4023 ; X86:       # %bb.0:
   4024 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4025 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   4026 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x08]
   4027 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4028 ; X86-NEXT:    retl # encoding: [0xc3]
   4029 ;
   4030 ; X64-LABEL: test_mask_packus_epi32_rmbk_256:
   4031 ; X64:       # %bb.0:
   4032 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4033 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
   4034 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4035 ; X64-NEXT:    retq # encoding: [0xc3]
   4036   %q = load i32, i32* %ptr_b
   4037   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4038   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4039   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   4040   ret <16 x i16> %res
   4041 }
   4042 
   4043 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
   4044 ; X86-LABEL: test_mask_packus_epi32_rmbkz_256:
   4045 ; X86:       # %bb.0:
   4046 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4047 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   4048 ; X86-NEXT:    vpackusdw (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x00]
   4049 ; X86-NEXT:    retl # encoding: [0xc3]
   4050 ;
   4051 ; X64-LABEL: test_mask_packus_epi32_rmbkz_256:
   4052 ; X64:       # %bb.0:
   4053 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4054 ; X64-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
   4055 ; X64-NEXT:    retq # encoding: [0xc3]
   4056   %q = load i32, i32* %ptr_b
   4057   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4058   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4059   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   4060   ret <16 x i16> %res
   4061 }
   4062 
   4063 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
   4064 
   4065 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   4066 ; CHECK-LABEL: test_mask_packus_epi16_rr_128:
   4067 ; CHECK:       # %bb.0:
   4068 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0xc1]
   4069 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4070   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   4071   ret <16 x i8> %res
   4072 }
   4073 
   4074 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
   4075 ; X86-LABEL: test_mask_packus_epi16_rrk_128:
   4076 ; X86:       # %bb.0:
   4077 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4078 ; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
   4079 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4080 ; X86-NEXT:    retl # encoding: [0xc3]
   4081 ;
   4082 ; X64-LABEL: test_mask_packus_epi16_rrk_128:
   4083 ; X64:       # %bb.0:
   4084 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4085 ; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
   4086 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4087 ; X64-NEXT:    retq # encoding: [0xc3]
   4088   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   4089   ret <16 x i8> %res
   4090 }
   4091 
   4092 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
   4093 ; X86-LABEL: test_mask_packus_epi16_rrkz_128:
   4094 ; X86:       # %bb.0:
   4095 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   4096 ; X86-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
   4097 ; X86-NEXT:    retl # encoding: [0xc3]
   4098 ;
   4099 ; X64-LABEL: test_mask_packus_epi16_rrkz_128:
   4100 ; X64:       # %bb.0:
   4101 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4102 ; X64-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
   4103 ; X64-NEXT:    retq # encoding: [0xc3]
   4104   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   4105   ret <16 x i8> %res
   4106 }
   4107 
   4108 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   4109 ; X86-LABEL: test_mask_packus_epi16_rm_128:
   4110 ; X86:       # %bb.0:
   4111 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4112 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x00]
   4113 ; X86-NEXT:    retl # encoding: [0xc3]
   4114 ;
   4115 ; X64-LABEL: test_mask_packus_epi16_rm_128:
   4116 ; X64:       # %bb.0:
   4117 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x67,0x07]
   4118 ; X64-NEXT:    retq # encoding: [0xc3]
   4119   %b = load <8 x i16>, <8 x i16>* %ptr_b
   4120   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   4121   ret <16 x i8> %res
   4122 }
   4123 
   4124 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   4125 ; X86-LABEL: test_mask_packus_epi16_rmk_128:
   4126 ; X86:       # %bb.0:
   4127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4128 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   4129 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x08]
   4130 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4131 ; X86-NEXT:    retl # encoding: [0xc3]
   4132 ;
   4133 ; X64-LABEL: test_mask_packus_epi16_rmk_128:
   4134 ; X64:       # %bb.0:
   4135 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4136 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
   4137 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4138 ; X64-NEXT:    retq # encoding: [0xc3]
   4139   %b = load <8 x i16>, <8 x i16>* %ptr_b
   4140   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   4141   ret <16 x i8> %res
   4142 }
   4143 
   4144 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
   4145 ; X86-LABEL: test_mask_packus_epi16_rmkz_128:
   4146 ; X86:       # %bb.0:
   4147 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4148 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x08]
   4149 ; X86-NEXT:    vpackuswb (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x00]
   4150 ; X86-NEXT:    retl # encoding: [0xc3]
   4151 ;
   4152 ; X64-LABEL: test_mask_packus_epi16_rmkz_128:
   4153 ; X64:       # %bb.0:
   4154 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4155 ; X64-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
   4156 ; X64-NEXT:    retq # encoding: [0xc3]
   4157   %b = load <8 x i16>, <8 x i16>* %ptr_b
   4158   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   4159   ret <16 x i8> %res
   4160 }
   4161 
   4162 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
   4163 
   4164 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   4165 ; CHECK-LABEL: test_mask_packus_epi16_rr_256:
   4166 ; CHECK:       # %bb.0:
   4167 ; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0xc1]
   4168 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4169   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   4170   ret <32 x i8> %res
   4171 }
   4172 
   4173 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
   4174 ; X86-LABEL: test_mask_packus_epi16_rrk_256:
   4175 ; X86:       # %bb.0:
   4176 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   4177 ; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
   4178 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4179 ; X86-NEXT:    retl # encoding: [0xc3]
   4180 ;
   4181 ; X64-LABEL: test_mask_packus_epi16_rrk_256:
   4182 ; X64:       # %bb.0:
   4183 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4184 ; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
   4185 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4186 ; X64-NEXT:    retq # encoding: [0xc3]
   4187   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   4188   ret <32 x i8> %res
   4189 }
   4190 
   4191 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
   4192 ; X86-LABEL: test_mask_packus_epi16_rrkz_256:
   4193 ; X86:       # %bb.0:
   4194 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   4195 ; X86-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
   4196 ; X86-NEXT:    retl # encoding: [0xc3]
   4197 ;
   4198 ; X64-LABEL: test_mask_packus_epi16_rrkz_256:
   4199 ; X64:       # %bb.0:
   4200 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4201 ; X64-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
   4202 ; X64-NEXT:    retq # encoding: [0xc3]
   4203   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   4204   ret <32 x i8> %res
   4205 }
   4206 
   4207 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   4208 ; X86-LABEL: test_mask_packus_epi16_rm_256:
   4209 ; X86:       # %bb.0:
   4210 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4211 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x00]
   4212 ; X86-NEXT:    retl # encoding: [0xc3]
   4213 ;
   4214 ; X64-LABEL: test_mask_packus_epi16_rm_256:
   4215 ; X64:       # %bb.0:
   4216 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x67,0x07]
   4217 ; X64-NEXT:    retq # encoding: [0xc3]
   4218   %b = load <16 x i16>, <16 x i16>* %ptr_b
   4219   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   4220   ret <32 x i8> %res
   4221 }
   4222 
   4223 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   4224 ; X86-LABEL: test_mask_packus_epi16_rmk_256:
   4225 ; X86:       # %bb.0:
   4226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4227 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   4228 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x08]
   4229 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4230 ; X86-NEXT:    retl # encoding: [0xc3]
   4231 ;
   4232 ; X64-LABEL: test_mask_packus_epi16_rmk_256:
   4233 ; X64:       # %bb.0:
   4234 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4235 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
   4236 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4237 ; X64-NEXT:    retq # encoding: [0xc3]
   4238   %b = load <16 x i16>, <16 x i16>* %ptr_b
   4239   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   4240   ret <32 x i8> %res
   4241 }
   4242 
   4243 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
   4244 ; X86-LABEL: test_mask_packus_epi16_rmkz_256:
   4245 ; X86:       # %bb.0:
   4246 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4247 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   4248 ; X86-NEXT:    vpackuswb (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x00]
   4249 ; X86-NEXT:    retl # encoding: [0xc3]
   4250 ;
   4251 ; X64-LABEL: test_mask_packus_epi16_rmkz_256:
   4252 ; X64:       # %bb.0:
   4253 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   4254 ; X64-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
   4255 ; X64-NEXT:    retq # encoding: [0xc3]
   4256   %b = load <16 x i16>, <16 x i16>* %ptr_b
   4257   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   4258   ret <32 x i8> %res
   4259 }
   4260 
   4261 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
   4262 
   4263 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
   4264 ; X86-LABEL: test_cmp_b_256:
   4265 ; X86:       # %bb.0:
   4266 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   4267 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4268 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   4269 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4270 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   4271 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4272 ; X86-NEXT:    .cfi_offset %esi, -16
   4273 ; X86-NEXT:    .cfi_offset %edi, -12
   4274 ; X86-NEXT:    .cfi_offset %ebx, -8
   4275 ; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
   4276 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4277 ; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
   4278 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4279 ; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
   4280 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4281 ; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
   4282 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4283 ; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
   4284 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4285 ; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
   4286 ; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
   4287 ; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4288 ; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
   4289 ; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
   4290 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4291 ; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
   4292 ; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
   4293 ; X86-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4294 ; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
   4295 ; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4296 ; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4297 ; X86-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
   4298 ; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4299 ; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4300 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4301 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   4302 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4303 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   4304 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4305 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   4306 ; X86-NEXT:    .cfi_def_cfa_offset 4
   4307 ; X86-NEXT:    retl # encoding: [0xc3]
   4308 ;
   4309 ; X64-LABEL: test_cmp_b_256:
   4310 ; X64:       # %bb.0:
   4311 ; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
   4312 ; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
   4313 ; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 # encoding: [0x62,0xf1,0x75,0x28,0x64,0xc0]
   4314 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4315 ; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
   4316 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4317 ; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
   4318 ; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4319 ; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
   4320 ; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4321 ; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x64,0xc1]
   4322 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4323 ; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4324 ; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
   4325 ; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
   4326 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4327 ; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
   4328 ; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
   4329 ; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4330 ; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
   4331 ; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4332 ; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4333 ; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
   4334 ; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4335 ; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4336 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4337 ; X64-NEXT:    retq # encoding: [0xc3]
   4338   %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
   4339   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
   4340   %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
   4341   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
   4342   %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
   4343   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
   4344   %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
   4345   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
   4346   %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
   4347   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
   4348   %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
   4349   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
   4350   %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
   4351   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
   4352   %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
   4353   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   4354   ret <8 x i32> %vec7
   4355 }
   4356 
   4357 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
   4358 ; X86-LABEL: test_mask_cmp_b_256:
   4359 ; X86:       # %bb.0:
   4360 ; X86-NEXT:    pushl %ebp # encoding: [0x55]
   4361 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4362 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   4363 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4364 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   4365 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4366 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   4367 ; X86-NEXT:    .cfi_def_cfa_offset 20
   4368 ; X86-NEXT:    .cfi_offset %esi, -20
   4369 ; X86-NEXT:    .cfi_offset %edi, -16
   4370 ; X86-NEXT:    .cfi_offset %ebx, -12
   4371 ; X86-NEXT:    .cfi_offset %ebp, -8
   4372 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
   4373 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   4374 ; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
   4375 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4376 ; X86-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
   4377 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4378 ; X86-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
   4379 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4380 ; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
   4381 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4382 ; X86-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
   4383 ; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
   4384 ; X86-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
   4385 ; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
   4386 ; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
   4387 ; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
   4388 ; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
   4389 ; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
   4390 ; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
   4391 ; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
   4392 ; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4393 ; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4394 ; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
   4395 ; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4396 ; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4397 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4398 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   4399 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4400 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   4401 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4402 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   4403 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4404 ; X86-NEXT:    popl %ebp # encoding: [0x5d]
   4405 ; X86-NEXT:    .cfi_def_cfa_offset 4
   4406 ; X86-NEXT:    retl # encoding: [0xc3]
   4407 ;
   4408 ; X64-LABEL: test_mask_cmp_b_256:
   4409 ; X64:       # %bb.0:
   4410 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4411 ; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
   4412 ; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
   4413 ; X64-NEXT:    vpcmpgtb %ymm0, %ymm1, %k0 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x64,0xc0]
   4414 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4415 ; X64-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
   4416 ; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
   4417 ; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
   4418 ; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4419 ; X64-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
   4420 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4421 ; X64-NEXT:    vpcmpgtb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x64,0xc1]
   4422 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4423 ; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4424 ; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
   4425 ; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
   4426 ; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
   4427 ; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4428 ; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
   4429 ; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4430 ; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4431 ; X64-NEXT:    vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
   4432 ; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4433 ; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4434 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4435 ; X64-NEXT:    retq # encoding: [0xc3]
   4436   %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
   4437   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
   4438   %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
   4439   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
   4440   %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
   4441   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
   4442   %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
   4443   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
   4444   %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
   4445   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
   4446   %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
   4447   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
   4448   %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
   4449   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
   4450   %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
   4451   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   4452   ret <8 x i32> %vec7
   4453 }
   4454 
   4455 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
   4456 
   4457 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
   4458 ; X86-LABEL: test_ucmp_b_256:
   4459 ; X86:       # %bb.0:
   4460 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   4461 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4462 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   4463 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4464 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   4465 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4466 ; X86-NEXT:    .cfi_offset %esi, -16
   4467 ; X86-NEXT:    .cfi_offset %edi, -12
   4468 ; X86-NEXT:    .cfi_offset %ebx, -8
   4469 ; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
   4470 ; X86-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4471 ; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
   4472 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4473 ; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
   4474 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4475 ; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
   4476 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4477 ; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
   4478 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4479 ; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
   4480 ; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
   4481 ; X86-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4482 ; X86-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
   4483 ; X86-NEXT:    vpinsrd $2, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x02]
   4484 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4485 ; X86-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
   4486 ; X86-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
   4487 ; X86-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4488 ; X86-NEXT:    vmovd %eax, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd0]
   4489 ; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4490 ; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4491 ; X86-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
   4492 ; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4493 ; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4494 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4495 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   4496 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4497 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   4498 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4499 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   4500 ; X86-NEXT:    .cfi_def_cfa_offset 4
   4501 ; X86-NEXT:    retl # encoding: [0xc3]
   4502 ;
   4503 ; X64-LABEL: test_ucmp_b_256:
   4504 ; X64:       # %bb.0:
   4505 ; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x74,0xc1]
   4506 ; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
   4507 ; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
   4508 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4509 ; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
   4510 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4511 ; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
   4512 ; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4513 ; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
   4514 ; X64-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4515 ; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 # encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
   4516 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4517 ; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4518 ; X64-NEXT:    vpinsrd $1, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x01]
   4519 ; X64-NEXT:    vpinsrd $2, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x02]
   4520 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4521 ; X64-NEXT:    vpblendd $8, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x02,0xc1,0x08]
   4522 ; X64-NEXT:    # xmm0 = xmm0[0,1,2],xmm1[3]
   4523 ; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4524 ; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
   4525 ; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4526 ; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4527 ; X64-NEXT:    vmovd %edx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd2]
   4528 ; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4529 ; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4530 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4531 ; X64-NEXT:    retq # encoding: [0xc3]
   4532   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
   4533   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
   4534   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
   4535   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
   4536   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
   4537   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
   4538   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
   4539   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
   4540   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
   4541   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
   4542   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
   4543   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
   4544   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
   4545   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
   4546   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
   4547   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   4548   ret <8 x i32> %vec7
   4549 }
   4550 
   4551 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
   4552 ; X86-LABEL: test_mask_ucmp_b_256:
   4553 ; X86:       # %bb.0:
   4554 ; X86-NEXT:    pushl %ebp # encoding: [0x55]
   4555 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4556 ; X86-NEXT:    pushl %ebx # encoding: [0x53]
   4557 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4558 ; X86-NEXT:    pushl %edi # encoding: [0x57]
   4559 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4560 ; X86-NEXT:    pushl %esi # encoding: [0x56]
   4561 ; X86-NEXT:    .cfi_def_cfa_offset 20
   4562 ; X86-NEXT:    .cfi_offset %esi, -20
   4563 ; X86-NEXT:    .cfi_offset %edi, -16
   4564 ; X86-NEXT:    .cfi_offset %ebx, -12
   4565 ; X86-NEXT:    .cfi_offset %ebp, -8
   4566 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x14]
   4567 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   4568 ; X86-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
   4569 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4570 ; X86-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
   4571 ; X86-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4572 ; X86-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
   4573 ; X86-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4574 ; X86-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
   4575 ; X86-NEXT:    kmovd %k0, %edi # encoding: [0xc5,0xfb,0x93,0xf8]
   4576 ; X86-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
   4577 ; X86-NEXT:    kmovd %k0, %ebx # encoding: [0xc5,0xfb,0x93,0xd8]
   4578 ; X86-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
   4579 ; X86-NEXT:    kmovd %k0, %ebp # encoding: [0xc5,0xfb,0x93,0xe8]
   4580 ; X86-NEXT:    vmovd %edi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc7]
   4581 ; X86-NEXT:    vpinsrd $1, %ebx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc3,0x01]
   4582 ; X86-NEXT:    vpinsrd $2, %ebp, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc5,0x02]
   4583 ; X86-NEXT:    vpinsrd $3, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x03]
   4584 ; X86-NEXT:    vmovd %edx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xca]
   4585 ; X86-NEXT:    vmovd %ecx, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd1]
   4586 ; X86-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4587 ; X86-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4588 ; X86-NEXT:    vmovd %esi, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xd6]
   4589 ; X86-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4590 ; X86-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4591 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4592 ; X86-NEXT:    popl %esi # encoding: [0x5e]
   4593 ; X86-NEXT:    .cfi_def_cfa_offset 16
   4594 ; X86-NEXT:    popl %edi # encoding: [0x5f]
   4595 ; X86-NEXT:    .cfi_def_cfa_offset 12
   4596 ; X86-NEXT:    popl %ebx # encoding: [0x5b]
   4597 ; X86-NEXT:    .cfi_def_cfa_offset 8
   4598 ; X86-NEXT:    popl %ebp # encoding: [0x5d]
   4599 ; X86-NEXT:    .cfi_def_cfa_offset 4
   4600 ; X86-NEXT:    retl # encoding: [0xc3]
   4601 ;
   4602 ; X64-LABEL: test_mask_ucmp_b_256:
   4603 ; X64:       # %bb.0:
   4604 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4605 ; X64-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x74,0xc1]
   4606 ; X64-NEXT:    kmovd %k0, %r8d # encoding: [0xc5,0x7b,0x93,0xc0]
   4607 ; X64-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
   4608 ; X64-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4609 ; X64-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
   4610 ; X64-NEXT:    kmovd %k0, %r9d # encoding: [0xc5,0x7b,0x93,0xc8]
   4611 ; X64-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
   4612 ; X64-NEXT:    kmovd %k0, %esi # encoding: [0xc5,0xfb,0x93,0xf0]
   4613 ; X64-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
   4614 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4615 ; X64-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
   4616 ; X64-NEXT:    kmovd %k0, %edx # encoding: [0xc5,0xfb,0x93,0xd0]
   4617 ; X64-NEXT:    vmovd %esi, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc6]
   4618 ; X64-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
   4619 ; X64-NEXT:    vpinsrd $2, %edx, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x02]
   4620 ; X64-NEXT:    vpinsrd $3, %edi, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x22,0xc7,0x03]
   4621 ; X64-NEXT:    vmovd %ecx, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6e,0xc9]
   4622 ; X64-NEXT:    vmovd %r8d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd0]
   4623 ; X64-NEXT:    vpunpckldq %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x62,0xc9]
   4624 ; X64-NEXT:    # xmm1 = xmm2[0],xmm1[0],xmm2[1],xmm1[1]
   4625 ; X64-NEXT:    vmovd %r9d, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xc1,0x79,0x6e,0xd1]
   4626 ; X64-NEXT:    vpunpcklqdq %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x6c,0xca]
   4627 ; X64-NEXT:    # xmm1 = xmm1[0],xmm2[0]
   4628 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x75,0x38,0xc0,0x01]
   4629 ; X64-NEXT:    retq # encoding: [0xc3]
   4630   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
   4631   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
   4632   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
   4633   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
   4634   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
   4635   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
   4636   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
   4637   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
   4638   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
   4639   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
   4640   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
   4641   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
   4642   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
   4643   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
   4644   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
   4645   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
   4646   ret <8 x i32> %vec7
   4647 }
   4648 
   4649 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
   4650 
   4651 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
   4652 ; CHECK-LABEL: test_cmp_w_256:
   4653 ; CHECK:       # %bb.0:
   4654 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
   4655 ; CHECK-NEXT:    vpcmpgtw %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x65,0xc8]
   4656 ; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
   4657 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
   4658 ; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x05]
   4659 ; CHECK-NEXT:    vpcmpgtw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x65,0xe9]
   4660 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4661 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4662 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4663 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4664 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4665 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4666 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4667 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4668 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4669 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4670 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4671 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4672 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4673 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4674 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   4675 ; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   4676 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4677 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4678   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   4679   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4680   %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
   4681   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4682   %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
   4683   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4684   %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
   4685   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4686   %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
   4687   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4688   %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
   4689   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4690   %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
   4691   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   4692   %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
   4693   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   4694   ret <8 x i16> %vec7
   4695 }
   4696 
   4697 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
   4698 ; X86-LABEL: test_mask_cmp_w_256:
   4699 ; X86:       # %bb.0:
   4700 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4701 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   4702 ; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
   4703 ; X86-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
   4704 ; X86-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
   4705 ; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
   4706 ; X86-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
   4707 ; X86-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
   4708 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4709 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4710 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   4711 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   4712 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   4713 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   4714 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   4715 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   4716 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   4717 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   4718 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   4719 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   4720 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   4721 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   4722 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4723 ; X86-NEXT:    retl # encoding: [0xc3]
   4724 ;
   4725 ; X64-LABEL: test_mask_cmp_w_256:
   4726 ; X64:       # %bb.0:
   4727 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4728 ; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
   4729 ; X64-NEXT:    vpcmpgtw %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x65,0xd0]
   4730 ; X64-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
   4731 ; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
   4732 ; X64-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x05]
   4733 ; X64-NEXT:    vpcmpgtw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x65,0xc9]
   4734 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4735 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4736 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4737 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4738 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4739 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4740 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4741 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4742 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4743 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4744 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4745 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4746 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4747 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   4748 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4749 ; X64-NEXT:    retq # encoding: [0xc3]
   4750   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   4751   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4752   %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
   4753   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4754   %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
   4755   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4756   %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
   4757   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4758   %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
   4759   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4760   %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
   4761   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4762   %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
   4763   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   4764   %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
   4765   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   4766   ret <8 x i16> %vec7
   4767 }
   4768 
   4769 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
   4770 
   4771 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
   4772 ; CHECK-LABEL: test_ucmp_w_256:
   4773 ; CHECK:       # %bb.0:
   4774 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x75,0xc1]
   4775 ; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
   4776 ; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
   4777 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x04]
   4778 ; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x05]
   4779 ; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x06]
   4780 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4781 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4782 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4783 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4784 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4785 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4786 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4787 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4788 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4789 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4790 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4791 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4792 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4793 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4794 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   4795 ; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   4796 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4797 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4798   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
   4799   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4800   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
   4801   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4802   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
   4803   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4804   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
   4805   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4806   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
   4807   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4808   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
   4809   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4810   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
   4811   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   4812   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
   4813   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   4814   ret <8 x i16> %vec7
   4815 }
   4816 
   4817 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
   4818 ; X86-LABEL: test_mask_ucmp_w_256:
   4819 ; X86:       # %bb.0:
   4820 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4821 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   4822 ; X86-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
   4823 ; X86-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
   4824 ; X86-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
   4825 ; X86-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
   4826 ; X86-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
   4827 ; X86-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
   4828 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4829 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4830 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   4831 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   4832 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   4833 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   4834 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   4835 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   4836 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   4837 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   4838 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   4839 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   4840 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   4841 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   4842 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4843 ; X86-NEXT:    retl # encoding: [0xc3]
   4844 ;
   4845 ; X64-LABEL: test_mask_ucmp_w_256:
   4846 ; X64:       # %bb.0:
   4847 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4848 ; X64-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x75,0xc1]
   4849 ; X64-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
   4850 ; X64-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
   4851 ; X64-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x04]
   4852 ; X64-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x05]
   4853 ; X64-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x06]
   4854 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4855 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4856 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4857 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4858 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4859 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4860 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4861 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4862 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4863 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4864 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4865 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4866 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4867 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   4868 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   4869 ; X64-NEXT:    retq # encoding: [0xc3]
   4870   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
   4871   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4872   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
   4873   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4874   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
   4875   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4876   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
   4877   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4878   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
   4879   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4880   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
   4881   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4882   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
   4883   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   4884   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
   4885   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   4886   ret <8 x i16> %vec7
   4887 }
   4888 
   4889 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
   4890 
   4891 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   4892 ; CHECK-LABEL: test_cmp_b_128:
   4893 ; CHECK:       # %bb.0:
   4894 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
   4895 ; CHECK-NEXT:    vpcmpgtb %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x64,0xc8]
   4896 ; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
   4897 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
   4898 ; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x05]
   4899 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x64,0xe9]
   4900 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4901 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4902 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4903 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4904 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4905 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4906 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4907 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4908 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4909 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4910 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4911 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4912 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4913 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   4914 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   4915 ; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   4916 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4917   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   4918   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4919   %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
   4920   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4921   %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
   4922   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4923   %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
   4924   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4925   %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
   4926   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4927   %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
   4928   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4929   %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
   4930   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   4931   %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
   4932   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   4933   ret <8 x i16> %vec7
   4934 }
   4935 
   4936 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
   4937 ; X86-LABEL: test_mask_cmp_b_128:
   4938 ; X86:       # %bb.0:
   4939 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4940 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   4941 ; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
   4942 ; X86-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
   4943 ; X86-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
   4944 ; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
   4945 ; X86-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
   4946 ; X86-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
   4947 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   4948 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4949 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   4950 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   4951 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   4952 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   4953 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   4954 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   4955 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   4956 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   4957 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   4958 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   4959 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   4960 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   4961 ; X86-NEXT:    retl # encoding: [0xc3]
   4962 ;
   4963 ; X64-LABEL: test_mask_cmp_b_128:
   4964 ; X64:       # %bb.0:
   4965 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   4966 ; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
   4967 ; X64-NEXT:    vpcmpgtb %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x64,0xd0]
   4968 ; X64-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
   4969 ; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
   4970 ; X64-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x05]
   4971 ; X64-NEXT:    vpcmpgtb %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc9]
   4972 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   4973 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   4974 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   4975 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   4976 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   4977 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   4978 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   4979 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   4980 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   4981 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   4982 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   4983 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   4984 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   4985 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   4986 ; X64-NEXT:    retq # encoding: [0xc3]
   4987   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   4988   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   4989   %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
   4990   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   4991   %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
   4992   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   4993   %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
   4994   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   4995   %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
   4996   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   4997   %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
   4998   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   4999   %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
   5000   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5001   %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
   5002   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5003   ret <8 x i16> %vec7
   5004 }
   5005 
   5006 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
   5007 
   5008 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
   5009 ; CHECK-LABEL: test_ucmp_b_128:
   5010 ; CHECK:       # %bb.0:
   5011 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
   5012 ; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
   5013 ; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
   5014 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x04]
   5015 ; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x05]
   5016 ; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x06]
   5017 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5018 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5019 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5020 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5021 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5022 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5023 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5024 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5025 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5026 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5027 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5028 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5029 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5030 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm1, %xmm1 # encoding: [0xc5,0xf1,0x76,0xc9]
   5031 ; CHECK-NEXT:    vpblendw $128, %xmm1, %xmm0, %xmm0 # encoding: [0xc4,0xe3,0x79,0x0e,0xc1,0x80]
   5032 ; CHECK-NEXT:    # xmm0 = xmm0[0,1,2,3,4,5,6],xmm1[7]
   5033 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5034   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
   5035   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5036   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
   5037   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5038   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
   5039   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5040   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
   5041   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5042   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
   5043   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5044   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
   5045   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5046   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
   5047   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5048   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
   5049   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5050   ret <8 x i16> %vec7
   5051 }
   5052 
   5053 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
   5054 ; X86-LABEL: test_mask_ucmp_b_128:
   5055 ; X86:       # %bb.0:
   5056 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5057 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5058 ; X86-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
   5059 ; X86-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
   5060 ; X86-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
   5061 ; X86-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
   5062 ; X86-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
   5063 ; X86-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
   5064 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5065 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5066 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5067 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   5068 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5069 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   5070 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5071 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   5072 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5073 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   5074 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5075 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   5076 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5077 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5078 ; X86-NEXT:    retl # encoding: [0xc3]
   5079 ;
   5080 ; X64-LABEL: test_mask_ucmp_b_128:
   5081 ; X64:       # %bb.0:
   5082 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5083 ; X64-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
   5084 ; X64-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
   5085 ; X64-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
   5086 ; X64-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x04]
   5087 ; X64-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x05]
   5088 ; X64-NEXT:    vpcmpnleub %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x06]
   5089 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5090 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5091 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5092 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5093 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5094 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5095 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5096 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5097 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5098 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5099 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5100 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5101 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5102 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5103 ; X64-NEXT:    retq # encoding: [0xc3]
   5104   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
   5105   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
   5106   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
   5107   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
   5108   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
   5109   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
   5110   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
   5111   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
   5112   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
   5113   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
   5114   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
   5115   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
   5116   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
   5117   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
   5118   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
   5119   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
   5120   ret <8 x i16> %vec7
   5121 }
   5122 
   5123 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
   5124 
   5125 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   5126 ; CHECK-LABEL: test_cmp_w_128:
   5127 ; CHECK:       # %bb.0:
   5128 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
   5129 ; CHECK-NEXT:    vpcmpgtw %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x65,0xc8]
   5130 ; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02]
   5131 ; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04]
   5132 ; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x05]
   5133 ; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x65,0xe9]
   5134 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5135 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5136 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5137 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5138 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5139 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5140 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5141 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5142 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5143 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5144 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5145 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5146 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5147 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
   5148 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5149 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5150   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
   5151   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5152   %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
   5153   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5154   %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
   5155   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5156   %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
   5157   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5158   %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
   5159   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5160   %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
   5161   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5162   %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
   5163   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5164   %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
   5165   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5166   ret <8 x i8> %vec7
   5167 }
   5168 
   5169 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
   5170 ; X86-LABEL: test_mask_cmp_w_128:
   5171 ; X86:       # %bb.0:
   5172 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5173 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5174 ; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
   5175 ; X86-NEXT:    vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0]
   5176 ; X86-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
   5177 ; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
   5178 ; X86-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05]
   5179 ; X86-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
   5180 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5181 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5182 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5183 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   5184 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5185 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   5186 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5187 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   5188 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5189 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   5190 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5191 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   5192 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5193 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5194 ; X86-NEXT:    retl # encoding: [0xc3]
   5195 ;
   5196 ; X64-LABEL: test_mask_cmp_w_128:
   5197 ; X64:       # %bb.0:
   5198 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5199 ; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
   5200 ; X64-NEXT:    vpcmpgtw %xmm0, %xmm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x65,0xd0]
   5201 ; X64-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
   5202 ; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
   5203 ; X64-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x05]
   5204 ; X64-NEXT:    vpcmpgtw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc9]
   5205 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5206 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5207 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5208 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5209 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5210 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5211 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5212 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5213 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5214 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5215 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5216 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5217 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5218 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5219 ; X64-NEXT:    retq # encoding: [0xc3]
   5220   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
   5221   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5222   %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
   5223   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5224   %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
   5225   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5226   %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
   5227   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5228   %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
   5229   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5230   %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
   5231   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5232   %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
   5233   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5234   %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
   5235   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5236   ret <8 x i8> %vec7
   5237 }
   5238 
   5239 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
   5240 
   5241 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
   5242 ; CHECK-LABEL: test_ucmp_w_128:
   5243 ; CHECK:       # %bb.0:
   5244 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
   5245 ; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01]
   5246 ; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02]
   5247 ; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x04]
   5248 ; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x05]
   5249 ; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x06]
   5250 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5251 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5252 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5253 ; CHECK-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5254 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5255 ; CHECK-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5256 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5257 ; CHECK-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5258 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5259 ; CHECK-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5260 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5261 ; CHECK-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5262 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5263 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
   5264 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5265 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5266   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
   5267   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5268   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
   5269   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5270   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
   5271   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5272   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
   5273   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5274   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
   5275   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5276   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
   5277   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5278   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
   5279   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5280   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
   5281   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5282   ret <8 x i8> %vec7
   5283 }
   5284 
   5285 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
   5286 ; X86-LABEL: test_mask_ucmp_w_128:
   5287 ; X86:       # %bb.0:
   5288 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5289 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5290 ; X86-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
   5291 ; X86-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
   5292 ; X86-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
   5293 ; X86-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
   5294 ; X86-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05]
   5295 ; X86-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
   5296 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5297 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5298 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   5299 ; X86-NEXT:    kmovd %k2, %ecx # encoding: [0xc5,0xfb,0x93,0xca]
   5300 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   5301 ; X86-NEXT:    kmovd %k3, %ecx # encoding: [0xc5,0xfb,0x93,0xcb]
   5302 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   5303 ; X86-NEXT:    kmovd %k4, %ecx # encoding: [0xc5,0xfb,0x93,0xcc]
   5304 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   5305 ; X86-NEXT:    kmovd %k5, %ecx # encoding: [0xc5,0xfb,0x93,0xcd]
   5306 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   5307 ; X86-NEXT:    kmovd %k1, %ecx # encoding: [0xc5,0xfb,0x93,0xc9]
   5308 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   5309 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   5310 ; X86-NEXT:    retl # encoding: [0xc3]
   5311 ;
   5312 ; X64-LABEL: test_mask_ucmp_w_128:
   5313 ; X64:       # %bb.0:
   5314 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5315 ; X64-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
   5316 ; X64-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
   5317 ; X64-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
   5318 ; X64-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x04]
   5319 ; X64-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x05]
   5320 ; X64-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x06]
   5321 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5322 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   5323 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   5324 ; X64-NEXT:    kmovd %k2, %eax # encoding: [0xc5,0xfb,0x93,0xc2]
   5325 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   5326 ; X64-NEXT:    kmovd %k3, %eax # encoding: [0xc5,0xfb,0x93,0xc3]
   5327 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   5328 ; X64-NEXT:    kmovd %k4, %eax # encoding: [0xc5,0xfb,0x93,0xc4]
   5329 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   5330 ; X64-NEXT:    kmovd %k5, %eax # encoding: [0xc5,0xfb,0x93,0xc5]
   5331 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   5332 ; X64-NEXT:    kmovd %k1, %eax # encoding: [0xc5,0xfb,0x93,0xc1]
   5333 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   5334 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   5335 ; X64-NEXT:    retq # encoding: [0xc3]
   5336   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
   5337   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   5338   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
   5339   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   5340   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
   5341   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   5342   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
   5343   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   5344   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
   5345   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   5346   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
   5347   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   5348   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
   5349   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   5350   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
   5351   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   5352   ret <8 x i8> %vec7
   5353 }
   5354 
   5355 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
   5356 
   5357 define <16 x i8>@mm_mask_avg_epu8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   5358 ; X86-LABEL: mm_mask_avg_epu8:
   5359 ; X86:       # %bb.0:
   5360 ; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9]
   5361 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5362 ; X86-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
   5363 ; X86-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   5364 ; X86-NEXT:    retl # encoding: [0xc3]
   5365 ;
   5366 ; X64-LABEL: mm_mask_avg_epu8:
   5367 ; X64:       # %bb.0:
   5368 ; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe0,0xd9]
   5369 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5370 ; X64-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
   5371 ; X64-NEXT:    vpaddb %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfc,0xc3]
   5372 ; X64-NEXT:    retq # encoding: [0xc3]
   5373   %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   5374   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   5375    %res2 = add <16 x i8> %res, %res1
   5376    ret <16 x i8> %res2
   5377 }
   5378 
   5379 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
   5380 
   5381 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   5382 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_128:
   5383 ; X86:       # %bb.0:
   5384 ; X86-NEXT:    vpabsb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xd0]
   5385 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5386 ; X86-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
   5387 ; X86-NEXT:    vpaddb %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc2]
   5388 ; X86-NEXT:    retl # encoding: [0xc3]
   5389 ;
   5390 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_128:
   5391 ; X64:       # %bb.0:
   5392 ; X64-NEXT:    vpabsb %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1c,0xd0]
   5393 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5394 ; X64-NEXT:    vpabsb %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
   5395 ; X64-NEXT:    vpaddb %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfc,0xc2]
   5396 ; X64-NEXT:    retq # encoding: [0xc3]
   5397   %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   5398   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
   5399   %res2 = add <16 x i8> %res, %res1
   5400   ret <16 x i8> %res2
   5401 }
   5402 
   5403 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   5404 
   5405 define <32 x i8>@mm256_mask_avg_epu8(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   5406 ; X86-LABEL: mm256_mask_avg_epu8:
   5407 ; X86:       # %bb.0:
   5408 ; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9]
   5409 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   5410 ; X86-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
   5411 ; X86-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   5412 ; X86-NEXT:    retl # encoding: [0xc3]
   5413 ;
   5414 ; X64-LABEL: mm256_mask_avg_epu8:
   5415 ; X64:       # %bb.0:
   5416 ; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe0,0xd9]
   5417 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5418 ; X64-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
   5419 ; X64-NEXT:    vpaddb %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfc,0xc3]
   5420 ; X64-NEXT:    retq # encoding: [0xc3]
   5421   %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   5422   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   5423   %res2 = add <32 x i8> %res, %res1
   5424   ret <32 x i8> %res2
   5425 }
   5426 
   5427 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
   5428 
   5429 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   5430 ; X86-LABEL: test_int_x86_avx512_mask_pabs_b_256:
   5431 ; X86:       # %bb.0:
   5432 ; X86-NEXT:    vpabsb %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xd0]
   5433 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   5434 ; X86-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
   5435 ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc2]
   5436 ; X86-NEXT:    retl # encoding: [0xc3]
   5437 ;
   5438 ; X64-LABEL: test_int_x86_avx512_mask_pabs_b_256:
   5439 ; X64:       # %bb.0:
   5440 ; X64-NEXT:    vpabsb %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1c,0xd0]
   5441 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5442 ; X64-NEXT:    vpabsb %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
   5443 ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfc,0xc2]
   5444 ; X64-NEXT:    retq # encoding: [0xc3]
   5445   %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   5446   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
   5447   %res2 = add <32 x i8> %res, %res1
   5448   ret <32 x i8> %res2
   5449 }
   5450 
   5451 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   5452 
   5453 define <8 x i16>@mm_mask_avg_epu16(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5454 ; X86-LABEL: mm_mask_avg_epu16:
   5455 ; X86:       # %bb.0:
   5456 ; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9]
   5457 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5458 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5459 ; X86-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
   5460 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5461 ; X86-NEXT:    retl # encoding: [0xc3]
   5462 ;
   5463 ; X64-LABEL: mm_mask_avg_epu16:
   5464 ; X64:       # %bb.0:
   5465 ; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe3,0xd9]
   5466 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5467 ; X64-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
   5468 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5469 ; X64-NEXT:    retq # encoding: [0xc3]
   5470   %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5471   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5472   %res2 = add <8 x i16> %res, %res1
   5473   ret <8 x i16> %res2
   5474 }
   5475 
   5476 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
   5477 
   5478 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   5479 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_128:
   5480 ; X86:       # %bb.0:
   5481 ; X86-NEXT:    vpabsw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xd0]
   5482 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5483 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5484 ; X86-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
   5485 ; X86-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc2]
   5486 ; X86-NEXT:    retl # encoding: [0xc3]
   5487 ;
   5488 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_128:
   5489 ; X64:       # %bb.0:
   5490 ; X64-NEXT:    vpabsw %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1d,0xd0]
   5491 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5492 ; X64-NEXT:    vpabsw %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
   5493 ; X64-NEXT:    vpaddw %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc2]
   5494 ; X64-NEXT:    retq # encoding: [0xc3]
   5495   %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   5496   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
   5497   %res2 = add <8 x i16> %res, %res1
   5498   ret <8 x i16> %res2
   5499 }
   5500 
   5501 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5502 
   5503 define <16 x i16>@mm256_mask_avg_epu16(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5504 ; X86-LABEL: mm256_mask_avg_epu16:
   5505 ; X86:       # %bb.0:
   5506 ; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9]
   5507 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5508 ; X86-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
   5509 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5510 ; X86-NEXT:    retl # encoding: [0xc3]
   5511 ;
   5512 ; X64-LABEL: mm256_mask_avg_epu16:
   5513 ; X64:       # %bb.0:
   5514 ; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe3,0xd9]
   5515 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5516 ; X64-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
   5517 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5518 ; X64-NEXT:    retq # encoding: [0xc3]
   5519   %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5520   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5521   %res2 = add <16 x i16> %res, %res1
   5522   ret <16 x i16> %res2
   5523 }
   5524 
   5525 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
   5526 
   5527 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   5528 ; X86-LABEL: test_int_x86_avx512_mask_pabs_w_256:
   5529 ; X86:       # %bb.0:
   5530 ; X86-NEXT:    vpabsw %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xd0]
   5531 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5532 ; X86-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
   5533 ; X86-NEXT:    vpaddw %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc2]
   5534 ; X86-NEXT:    retl # encoding: [0xc3]
   5535 ;
   5536 ; X64-LABEL: test_int_x86_avx512_mask_pabs_w_256:
   5537 ; X64:       # %bb.0:
   5538 ; X64-NEXT:    vpabsw %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1d,0xd0]
   5539 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5540 ; X64-NEXT:    vpabsw %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
   5541 ; X64-NEXT:    vpaddw %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc2]
   5542 ; X64-NEXT:    retq # encoding: [0xc3]
   5543   %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   5544   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
   5545   %res2 = add <16 x i16> %res, %res1
   5546   ret <16 x i16> %res2
   5547 }
   5548 
   5549 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5550 
   5551 declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
   5552 
   5553 define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   5554 ; X86-LABEL: test_int_x86_avx512_ptestm_b_128:
   5555 ; X86:       # %bb.0:
   5556 ; X86-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
   5557 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5558 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5559 ; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
   5560 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5561 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
   5562 ; X86-NEXT:    retl # encoding: [0xc3]
   5563 ;
   5564 ; X64-LABEL: test_int_x86_avx512_ptestm_b_128:
   5565 ; X64:       # %bb.0:
   5566 ; X64-NEXT:    vptestmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
   5567 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5568 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5569 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5570 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
   5571 ; X64-NEXT:    retq # encoding: [0xc3]
   5572   %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   5573   %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
   5574   %res2 = add i16 %res, %res1
   5575   ret i16 %res2
   5576 }
   5577 
   5578 declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
   5579 
   5580 define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   5581 ; X86-LABEL: test_int_x86_avx512_ptestm_b_256:
   5582 ; X86:       # %bb.0:
   5583 ; X86-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
   5584 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5585 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5586 ; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
   5587 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5588 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5589 ; X86-NEXT:    retl # encoding: [0xc3]
   5590 ;
   5591 ; X64-LABEL: test_int_x86_avx512_ptestm_b_256:
   5592 ; X64:       # %bb.0:
   5593 ; X64-NEXT:    vptestmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
   5594 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5595 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5596 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5597 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5598 ; X64-NEXT:    retq # encoding: [0xc3]
   5599   %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   5600   %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
   5601   %res2 = add i32 %res, %res1
   5602   ret i32 %res2
   5603 }
   5604 
   5605 declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
   5606 
   5607 define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   5608 ; X86-LABEL: test_int_x86_avx512_ptestm_w_128:
   5609 ; X86:       # %bb.0:
   5610 ; X86-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
   5611 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5612 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
   5613 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
   5614 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   5615 ; X86-NEXT:    retl # encoding: [0xc3]
   5616 ;
   5617 ; X64-LABEL: test_int_x86_avx512_ptestm_w_128:
   5618 ; X64:       # %bb.0:
   5619 ; X64-NEXT:    vptestmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
   5620 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5621 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
   5622 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
   5623 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   5624 ; X64-NEXT:    retq # encoding: [0xc3]
   5625   %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   5626   %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
   5627   %res2 = add i8 %res, %res1
   5628   ret i8 %res2
   5629 }
   5630 
   5631 declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
   5632 
   5633 define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   5634 ; X86-LABEL: test_int_x86_avx512_ptestm_w_256:
   5635 ; X86:       # %bb.0:
   5636 ; X86-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
   5637 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5638 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5639 ; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
   5640 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5641 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
   5642 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5643 ; X86-NEXT:    retl # encoding: [0xc3]
   5644 ;
   5645 ; X64-LABEL: test_int_x86_avx512_ptestm_w_256:
   5646 ; X64:       # %bb.0:
   5647 ; X64-NEXT:    vptestmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
   5648 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5649 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5650 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5651 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
   5652 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5653 ; X64-NEXT:    retq # encoding: [0xc3]
   5654   %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   5655   %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
   5656   %res2 = add i16 %res, %res1
   5657   ret i16 %res2
   5658 }
   5659 
   5660 declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
   5661 
   5662 define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   5663 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_128:
   5664 ; X86:       # %bb.0:
   5665 ; X86-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
   5666 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5667 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5668 ; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
   5669 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5670 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
   5671 ; X86-NEXT:    retl # encoding: [0xc3]
   5672 ;
   5673 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_128:
   5674 ; X64:       # %bb.0:
   5675 ; X64-NEXT:    vptestnmb %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
   5676 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5677 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5678 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5679 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
   5680 ; X64-NEXT:    retq # encoding: [0xc3]
   5681   %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   5682   %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
   5683   %res2 = add i16 %res, %res1
   5684   ret i16 %res2
   5685 }
   5686 
   5687 declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
   5688 
   5689 define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   5690 ; X86-LABEL: test_int_x86_avx512_ptestnm_b_256:
   5691 ; X86:       # %bb.0:
   5692 ; X86-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
   5693 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5694 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   5695 ; X86-NEXT:    andl %ecx, %eax # encoding: [0x21,0xc8]
   5696 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5697 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5698 ; X86-NEXT:    retl # encoding: [0xc3]
   5699 ;
   5700 ; X64-LABEL: test_int_x86_avx512_ptestnm_b_256:
   5701 ; X64:       # %bb.0:
   5702 ; X64-NEXT:    vptestnmb %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
   5703 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5704 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5705 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5706 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5707 ; X64-NEXT:    retq # encoding: [0xc3]
   5708   %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   5709   %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
   5710   %res2 = add i32 %res, %res1
   5711   ret i32 %res2
   5712 }
   5713 
   5714 declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
   5715 
   5716 define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   5717 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_128:
   5718 ; X86:       # %bb.0:
   5719 ; X86-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
   5720 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5721 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
   5722 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
   5723 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   5724 ; X86-NEXT:    retl # encoding: [0xc3]
   5725 ;
   5726 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_128:
   5727 ; X64:       # %bb.0:
   5728 ; X64-NEXT:    vptestnmw %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
   5729 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5730 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
   5731 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
   5732 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   5733 ; X64-NEXT:    retq # encoding: [0xc3]
   5734   %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   5735   %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
   5736   %res2 = add i8 %res, %res1
   5737   ret i8 %res2
   5738 }
   5739 
   5740 declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
   5741 
   5742 define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   5743 ; X86-LABEL: test_int_x86_avx512_ptestnm_w_256:
   5744 ; X86:       # %bb.0:
   5745 ; X86-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
   5746 ; X86-NEXT:    kmovd %k0, %ecx # encoding: [0xc5,0xfb,0x93,0xc8]
   5747 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   5748 ; X86-NEXT:    andw %cx, %ax # encoding: [0x66,0x21,0xc8]
   5749 ; X86-NEXT:    addl %ecx, %eax # encoding: [0x01,0xc8]
   5750 ; X86-NEXT:    # kill: def $ax killed $ax killed $eax
   5751 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5752 ; X86-NEXT:    retl # encoding: [0xc3]
   5753 ;
   5754 ; X64-LABEL: test_int_x86_avx512_ptestnm_w_256:
   5755 ; X64:       # %bb.0:
   5756 ; X64-NEXT:    vptestnmw %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
   5757 ; X64-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5758 ; X64-NEXT:    andl %eax, %edi # encoding: [0x21,0xc7]
   5759 ; X64-NEXT:    addl %edi, %eax # encoding: [0x01,0xf8]
   5760 ; X64-NEXT:    # kill: def $ax killed $ax killed $eax
   5761 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5762 ; X64-NEXT:    retq # encoding: [0xc3]
   5763   %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   5764   %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
   5765   %res2 = add i16 %res, %res1
   5766   ret i16 %res2
   5767 }
   5768 
   5769 declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
   5770 
   5771 define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
   5772 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
   5773 ; CHECK:       # %bb.0:
   5774 ; CHECK-NEXT:    vpmovb2m %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
   5775 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5776 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
   5777 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5778     %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
   5779     ret i16 %res
   5780 }
   5781 
   5782 declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
   5783 
   5784 define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
   5785 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
   5786 ; CHECK:       # %bb.0:
   5787 ; CHECK-NEXT:    vpmovb2m %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
   5788 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5789 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5790 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5791     %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
   5792     ret i32 %res
   5793 }
   5794 
   5795 declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
   5796 
   5797 define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
   5798 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
   5799 ; CHECK:       # %bb.0:
   5800 ; CHECK-NEXT:    vpmovw2m %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
   5801 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5802 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   5803 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5804     %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
   5805     ret i8 %res
   5806 }
   5807 
   5808 declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
   5809 
   5810 define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
   5811 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
   5812 ; CHECK:       # %bb.0:
   5813 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
   5814 ; CHECK-NEXT:    kmovd %k0, %eax # encoding: [0xc5,0xfb,0x93,0xc0]
   5815 ; CHECK-NEXT:    # kill: def $ax killed $ax killed $eax
   5816 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   5817 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5818     %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
   5819     ret i16 %res
   5820 }
   5821 
   5822 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5823 
   5824 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5825 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
   5826 ; X86:       # %bb.0:
   5827 ; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9]
   5828 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5829 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5830 ; X86-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
   5831 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5832 ; X86-NEXT:    retl # encoding: [0xc3]
   5833 ;
   5834 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
   5835 ; X64:       # %bb.0:
   5836 ; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe4,0xd9]
   5837 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5838 ; X64-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
   5839 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5840 ; X64-NEXT:    retq # encoding: [0xc3]
   5841   %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5842   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5843   %res2 = add <8 x i16> %res, %res1
   5844   ret <8 x i16> %res2
   5845 }
   5846 
   5847 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5848 
   5849 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5850 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
   5851 ; X86:       # %bb.0:
   5852 ; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9]
   5853 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5854 ; X86-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
   5855 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5856 ; X86-NEXT:    retl # encoding: [0xc3]
   5857 ;
   5858 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
   5859 ; X64:       # %bb.0:
   5860 ; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe4,0xd9]
   5861 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5862 ; X64-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
   5863 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5864 ; X64-NEXT:    retq # encoding: [0xc3]
   5865   %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5866   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5867   %res2 = add <16 x i16> %res, %res1
   5868   ret <16 x i16> %res2
   5869 }
   5870 
   5871 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5872 
   5873 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5874 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
   5875 ; X86:       # %bb.0:
   5876 ; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9]
   5877 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5878 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5879 ; X86-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
   5880 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5881 ; X86-NEXT:    retl # encoding: [0xc3]
   5882 ;
   5883 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
   5884 ; X64:       # %bb.0:
   5885 ; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe5,0xd9]
   5886 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5887 ; X64-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
   5888 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5889 ; X64-NEXT:    retq # encoding: [0xc3]
   5890   %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5891   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5892   %res2 = add <8 x i16> %res, %res1
   5893   ret <8 x i16> %res2
   5894 }
   5895 
   5896 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5897 
   5898 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5899 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
   5900 ; X86:       # %bb.0:
   5901 ; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9]
   5902 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5903 ; X86-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
   5904 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5905 ; X86-NEXT:    retl # encoding: [0xc3]
   5906 ;
   5907 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
   5908 ; X64:       # %bb.0:
   5909 ; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe5,0xd9]
   5910 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5911 ; X64-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
   5912 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5913 ; X64-NEXT:    retq # encoding: [0xc3]
   5914   %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5915   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5916   %res2 = add <16 x i16> %res, %res1
   5917   ret <16 x i16> %res2
   5918 }
   5919 
   5920 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5921 
   5922 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5923 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
   5924 ; X86:       # %bb.0:
   5925 ; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9]
   5926 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5927 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5928 ; X86-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
   5929 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5930 ; X86-NEXT:    retl # encoding: [0xc3]
   5931 ;
   5932 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
   5933 ; X64:       # %bb.0:
   5934 ; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0b,0xd9]
   5935 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5936 ; X64-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
   5937 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5938 ; X64-NEXT:    retq # encoding: [0xc3]
   5939   %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5940   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5941   %res2 = add <8 x i16> %res, %res1
   5942   ret <8 x i16> %res2
   5943 }
   5944 
   5945 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5946 
   5947 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5948 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
   5949 ; X86:       # %bb.0:
   5950 ; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9]
   5951 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   5952 ; X86-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
   5953 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5954 ; X86-NEXT:    retl # encoding: [0xc3]
   5955 ;
   5956 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
   5957 ; X64:       # %bb.0:
   5958 ; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0b,0xd9]
   5959 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5960 ; X64-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
   5961 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   5962 ; X64-NEXT:    retq # encoding: [0xc3]
   5963   %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5964   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5965   %res2 = add <16 x i16> %res, %res1
   5966   ret <16 x i16> %res2
   5967 }
   5968 
   5969 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
   5970 
   5971 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
   5972 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
   5973 ; X86:       # %bb.0:
   5974 ; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9]
   5975 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5976 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   5977 ; X86-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
   5978 ; X86-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5979 ; X86-NEXT:    retl # encoding: [0xc3]
   5980 ;
   5981 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
   5982 ; X64:       # %bb.0:
   5983 ; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x04,0xd9]
   5984 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   5985 ; X64-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
   5986 ; X64-NEXT:    vpaddw %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc3]
   5987 ; X64-NEXT:    retq # encoding: [0xc3]
   5988   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
   5989   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
   5990   %res2 = add <8 x i16> %res, %res1
   5991   ret <8 x i16> %res2
   5992 }
   5993 
   5994 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
   5995 
   5996 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
   5997 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
   5998 ; X86:       # %bb.0:
   5999 ; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9]
   6000 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6001 ; X86-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
   6002 ; X86-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   6003 ; X86-NEXT:    retl # encoding: [0xc3]
   6004 ;
   6005 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
   6006 ; X64:       # %bb.0:
   6007 ; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x04,0xd9]
   6008 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6009 ; X64-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
   6010 ; X64-NEXT:    vpaddw %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc3]
   6011 ; X64-NEXT:    retq # encoding: [0xc3]
   6012   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
   6013   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
   6014   %res2 = add <16 x i16> %res, %res1
   6015   ret <16 x i16> %res2
   6016 }
   6017 
   6018 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
   6019 
   6020 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
   6021 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
   6022 ; X86:       # %bb.0:
   6023 ; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9]
   6024 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6025 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6026 ; X86-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
   6027 ; X86-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   6028 ; X86-NEXT:    retl # encoding: [0xc3]
   6029 ;
   6030 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
   6031 ; X64:       # %bb.0:
   6032 ; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf5,0xd9]
   6033 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6034 ; X64-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
   6035 ; X64-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   6036 ; X64-NEXT:    retq # encoding: [0xc3]
   6037   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
   6038   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
   6039   %res2 = add <4 x i32> %res, %res1
   6040   ret <4 x i32> %res2
   6041 }
   6042 
   6043 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
   6044 
   6045 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
   6046 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
   6047 ; X86:       # %bb.0:
   6048 ; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9]
   6049 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6050 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6051 ; X86-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
   6052 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   6053 ; X86-NEXT:    retl # encoding: [0xc3]
   6054 ;
   6055 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
   6056 ; X64:       # %bb.0:
   6057 ; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf5,0xd9]
   6058 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6059 ; X64-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
   6060 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   6061 ; X64-NEXT:    retq # encoding: [0xc3]
   6062   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
   6063   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
   6064   %res2 = add <8 x i32> %res, %res1
   6065   ret <8 x i32> %res2
   6066 }
   6067 
   6068 declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   6069 
   6070 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   6071 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
   6072 ; X86:       # %bb.0:
   6073 ; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
   6074 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6075 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6076 ; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
   6077 ; X86-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
   6078 ; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   6079 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   6080 ; X86-NEXT:    retl # encoding: [0xc3]
   6081 ;
   6082 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
   6083 ; X64:       # %bb.0:
   6084 ; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xd8]
   6085 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6086 ; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
   6087 ; X64-NEXT:    vpermw %xmm0, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xc0]
   6088 ; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   6089 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   6090 ; X64-NEXT:    retq # encoding: [0xc3]
   6091   %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   6092   %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   6093   %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   6094   %res3 = add <8 x i16> %res, %res1
   6095   %res4 = add <8 x i16> %res3, %res2
   6096   ret <8 x i16> %res4
   6097 }
   6098 
   6099 declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   6100 
   6101 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   6102 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
   6103 ; X86:       # %bb.0:
   6104 ; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
   6105 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6106 ; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
   6107 ; X86-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
   6108 ; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   6109 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   6110 ; X86-NEXT:    retl # encoding: [0xc3]
   6111 ;
   6112 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
   6113 ; X64:       # %bb.0:
   6114 ; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xd8]
   6115 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6116 ; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
   6117 ; X64-NEXT:    vpermw %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xc0]
   6118 ; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   6119 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   6120 ; X64-NEXT:    retq # encoding: [0xc3]
   6121   %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   6122   %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   6123   %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   6124   %res3 = add <16 x i16> %res, %res1
   6125   %res4 = add <16 x i16> %res3, %res2
   6126   ret <16 x i16> %res4
   6127 }
   6128 
   6129 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   6130 
   6131 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   6132 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
   6133 ; X86:       # %bb.0:
   6134 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   6135 ; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
   6136 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6137 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6138 ; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
   6139 ; X86-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6140 ; X86-NEXT:    retl # encoding: [0xc3]
   6141 ;
   6142 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
   6143 ; X64:       # %bb.0:
   6144 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   6145 ; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
   6146 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6147 ; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xca]
   6148 ; X64-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6149 ; X64-NEXT:    retq # encoding: [0xc3]
   6150   %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   6151   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   6152   %res2 = add <8 x i16> %res, %res1
   6153   ret <8 x i16> %res2
   6154 }
   6155 
   6156 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   6157 
   6158 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   6159 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
   6160 ; X86:       # %bb.0:
   6161 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   6162 ; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
   6163 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6164 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6165 ; X86-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca]
   6166 ; X86-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6167 ; X86-NEXT:    retl # encoding: [0xc3]
   6168 ;
   6169 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
   6170 ; X64:       # %bb.0:
   6171 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   6172 ; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xda]
   6173 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6174 ; X64-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xca]
   6175 ; X64-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6176 ; X64-NEXT:    retq # encoding: [0xc3]
   6177   %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   6178   %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   6179   %res2 = add <8 x i16> %res, %res1
   6180   ret <8 x i16> %res2
   6181 }
   6182 
   6183 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   6184 
   6185 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   6186 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
   6187 ; X86:       # %bb.0:
   6188 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   6189 ; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
   6190 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6191 ; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
   6192 ; X86-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6193 ; X86-NEXT:    retl # encoding: [0xc3]
   6194 ;
   6195 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
   6196 ; X64:       # %bb.0:
   6197 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   6198 ; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
   6199 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6200 ; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xca]
   6201 ; X64-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6202 ; X64-NEXT:    retq # encoding: [0xc3]
   6203   %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   6204   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   6205   %res2 = add <16 x i16> %res, %res1
   6206   ret <16 x i16> %res2
   6207 }
   6208 
   6209 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   6210 
   6211 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   6212 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
   6213 ; X86:       # %bb.0:
   6214 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   6215 ; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
   6216 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6217 ; X86-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca]
   6218 ; X86-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6219 ; X86-NEXT:    retl # encoding: [0xc3]
   6220 ;
   6221 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
   6222 ; X64:       # %bb.0:
   6223 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   6224 ; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xda]
   6225 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6226 ; X64-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xca]
   6227 ; X64-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6228 ; X64-NEXT:    retq # encoding: [0xc3]
   6229   %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   6230   %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   6231   %res2 = add <16 x i16> %res, %res1
   6232   ret <16 x i16> %res2
   6233 }
   6234 
   6235 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   6236 
   6237 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   6238 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
   6239 ; X86:       # %bb.0:
   6240 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   6241 ; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
   6242 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6243 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6244 ; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
   6245 ; X86-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6246 ; X86-NEXT:    retl # encoding: [0xc3]
   6247 ;
   6248 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
   6249 ; X64:       # %bb.0:
   6250 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   6251 ; X64-NEXT:    vpermt2w %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7d,0xda]
   6252 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6253 ; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x75,0xca]
   6254 ; X64-NEXT:    vpaddw %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfd,0xc3]
   6255 ; X64-NEXT:    retq # encoding: [0xc3]
   6256   %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   6257   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   6258   %res2 = add <8 x i16> %res, %res1
   6259   ret <8 x i16> %res2
   6260 }
   6261 
   6262 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   6263 
   6264 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   6265 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
   6266 ; X86:       # %bb.0:
   6267 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   6268 ; X86-NEXT:    vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
   6269 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6270 ; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
   6271 ; X86-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6272 ; X86-NEXT:    retl # encoding: [0xc3]
   6273 ;
   6274 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
   6275 ; X64:       # %bb.0:
   6276 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   6277 ; X64-NEXT:    vpermt2w %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7d,0xda]
   6278 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6279 ; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x75,0xca]
   6280 ; X64-NEXT:    vpaddw %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfd,0xc3]
   6281 ; X64-NEXT:    retq # encoding: [0xc3]
   6282   %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   6283   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   6284   %res2 = add <16 x i16> %res, %res1
   6285   ret <16 x i16> %res2
   6286 }
   6287 
   6288 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
   6289 
   6290 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
   6291 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
   6292 ; X86:       # %bb.0:
   6293 ; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02]
   6294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6295 ; X86-NEXT:    kmovd %eax, %k1 # encoding: [0xc5,0xfb,0x92,0xc8]
   6296 ; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
   6297 ; X86-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02]
   6298 ; X86-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   6299 ; X86-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   6300 ; X86-NEXT:    retl # encoding: [0xc3]
   6301 ;
   6302 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
   6303 ; X64:       # %bb.0:
   6304 ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf3,0x7d,0x08,0x42,0xd9,0x02]
   6305 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6306 ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
   6307 ; X64-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x42,0xc1,0x02]
   6308 ; X64-NEXT:    vpaddw %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfd,0xc3]
   6309 ; X64-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfd,0xc0]
   6310 ; X64-NEXT:    retq # encoding: [0xc3]
   6311   %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
   6312   %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4)
   6313   %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1)
   6314   %res3 = add <8 x i16> %res, %res1
   6315   %res4 = add <8 x i16> %res2, %res3
   6316   ret <8 x i16> %res4
   6317 }
   6318 
   6319 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
   6320 
   6321 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
   6322 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
   6323 ; X86:       # %bb.0:
   6324 ; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02]
   6325 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   6326 ; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
   6327 ; X86-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02]
   6328 ; X86-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   6329 ; X86-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   6330 ; X86-NEXT:    retl # encoding: [0xc3]
   6331 ;
   6332 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
   6333 ; X64:       # %bb.0:
   6334 ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0x7d,0x28,0x42,0xd9,0x02]
   6335 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   6336 ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
   6337 ; X64-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xc1,0x02]
   6338 ; X64-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfd,0xc3]
   6339 ; X64-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfd,0xc0]
   6340 ; X64-NEXT:    retq # encoding: [0xc3]
   6341   %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
   6342   %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4)
   6343   %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1)
   6344   %res3 = add <16 x i16> %res, %res1
   6345   %res4 = add <16 x i16> %res3, %res2
   6346   ret <16 x i16> %res4
   6347 }
   6348