Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512bw -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      3 
      4 ; 256-bit
      5 
      6 define <8 x i32> @test_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
      7 ; CHECK-LABEL: test_cmp_b_256:
      8 ; CHECK:       ## BB#0:
      9 ; CHECK-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x00]
     10 ; CHECK-NEXT:    kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
     11 ; CHECK-NEXT:    vpcmpltb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x01]
     12 ; CHECK-NEXT:    kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
     13 ; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x02]
     14 ; CHECK-NEXT:    kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
     15 ; CHECK-NEXT:    vpcmpunordb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x03]
     16 ; CHECK-NEXT:    kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
     17 ; CHECK-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x04]
     18 ; CHECK-NEXT:    kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
     19 ; CHECK-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x05]
     20 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
     21 ; CHECK-NEXT:    vpcmpnleb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x06]
     22 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
     23 ; CHECK-NEXT:    vpcmpordb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3f,0xc1,0x07]
     24 ; CHECK-NEXT:    kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
     25 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
     26 ; CHECK-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
     27 ; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
     28 ; CHECK-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03]
     29 ; CHECK-NEXT:    vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8]
     30 ; CHECK-NEXT:    vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
     31 ; CHECK-NEXT:    vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
     32 ; CHECK-NEXT:    vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
     33 ; CHECK-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
     34 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     35   %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
     36   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
     37   %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
     38   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
     39   %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
     40   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
     41   %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
     42   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
     43   %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
     44   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
     45   %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
     46   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
     47   %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
     48   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
     49   %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
     50   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
     51   ret <8 x i32> %vec7
     52 }
     53 
     54 define <8 x i32> @test_mask_cmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
     55 ; CHECK-LABEL: test_mask_cmp_b_256:
     56 ; CHECK:       ## BB#0:
     57 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
     58 ; CHECK-NEXT:    vpcmpeqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x00]
     59 ; CHECK-NEXT:    kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
     60 ; CHECK-NEXT:    vpcmpltb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x01]
     61 ; CHECK-NEXT:    kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
     62 ; CHECK-NEXT:    vpcmpleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x02]
     63 ; CHECK-NEXT:    kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
     64 ; CHECK-NEXT:    vpcmpunordb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x03]
     65 ; CHECK-NEXT:    kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
     66 ; CHECK-NEXT:    vpcmpneqb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x04]
     67 ; CHECK-NEXT:    kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
     68 ; CHECK-NEXT:    vpcmpnltb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x05]
     69 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
     70 ; CHECK-NEXT:    vpcmpnleb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x06]
     71 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
     72 ; CHECK-NEXT:    vpcmpordb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3f,0xc1,0x07]
     73 ; CHECK-NEXT:    kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
     74 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
     75 ; CHECK-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
     76 ; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
     77 ; CHECK-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03]
     78 ; CHECK-NEXT:    vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8]
     79 ; CHECK-NEXT:    vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
     80 ; CHECK-NEXT:    vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
     81 ; CHECK-NEXT:    vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
     82 ; CHECK-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
     83 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     84   %res0 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
     85   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
     86   %res1 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
     87   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
     88   %res2 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
     89   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
     90   %res3 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
     91   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
     92   %res4 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
     93   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
     94   %res5 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
     95   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
     96   %res6 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
     97   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
     98   %res7 = call i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
     99   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
    100   ret <8 x i32> %vec7
    101 }
    102 
    103 declare i32 @llvm.x86.avx512.mask.cmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
    104 
    105 define <8 x i32> @test_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1) {
    106 ; CHECK-LABEL: test_ucmp_b_256:
    107 ; CHECK:       ## BB#0:
    108 ; CHECK-NEXT:    vpcmpequb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x00]
    109 ; CHECK-NEXT:    kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
    110 ; CHECK-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x01]
    111 ; CHECK-NEXT:    kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
    112 ; CHECK-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x02]
    113 ; CHECK-NEXT:    kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
    114 ; CHECK-NEXT:    vpcmpunordub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x03]
    115 ; CHECK-NEXT:    kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
    116 ; CHECK-NEXT:    vpcmpnequb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x04]
    117 ; CHECK-NEXT:    kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
    118 ; CHECK-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x05]
    119 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
    120 ; CHECK-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x06]
    121 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
    122 ; CHECK-NEXT:    vpcmpordub %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x28,0x3e,0xc1,0x07]
    123 ; CHECK-NEXT:    kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
    124 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
    125 ; CHECK-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
    126 ; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
    127 ; CHECK-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03]
    128 ; CHECK-NEXT:    vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8]
    129 ; CHECK-NEXT:    vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
    130 ; CHECK-NEXT:    vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
    131 ; CHECK-NEXT:    vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
    132 ; CHECK-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
    133 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    134   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 -1)
    135   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
    136   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 -1)
    137   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
    138   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 -1)
    139   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
    140   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 -1)
    141   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
    142   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 -1)
    143   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
    144   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 -1)
    145   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
    146   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 -1)
    147   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
    148   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 -1)
    149   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
    150   ret <8 x i32> %vec7
    151 }
    152 
    153 define <8 x i32> @test_mask_ucmp_b_256(<32 x i8> %a0, <32 x i8> %a1, i32 %mask) {
    154 ; CHECK-LABEL: test_mask_ucmp_b_256:
    155 ; CHECK:       ## BB#0:
    156 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
    157 ; CHECK-NEXT:    vpcmpequb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x00]
    158 ; CHECK-NEXT:    kmovd %k0, %r8d ## encoding: [0xc5,0x7b,0x93,0xc0]
    159 ; CHECK-NEXT:    vpcmpltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x01]
    160 ; CHECK-NEXT:    kmovd %k0, %r9d ## encoding: [0xc5,0x7b,0x93,0xc8]
    161 ; CHECK-NEXT:    vpcmpleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x02]
    162 ; CHECK-NEXT:    kmovd %k0, %r10d ## encoding: [0xc5,0x7b,0x93,0xd0]
    163 ; CHECK-NEXT:    vpcmpunordub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x03]
    164 ; CHECK-NEXT:    kmovd %k0, %esi ## encoding: [0xc5,0xfb,0x93,0xf0]
    165 ; CHECK-NEXT:    vpcmpnequb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x04]
    166 ; CHECK-NEXT:    kmovd %k0, %edi ## encoding: [0xc5,0xfb,0x93,0xf8]
    167 ; CHECK-NEXT:    vpcmpnltub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x05]
    168 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
    169 ; CHECK-NEXT:    vpcmpnleub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x06]
    170 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
    171 ; CHECK-NEXT:    vpcmpordub %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x3e,0xc1,0x07]
    172 ; CHECK-NEXT:    kmovd %k0, %edx ## encoding: [0xc5,0xfb,0x93,0xd0]
    173 ; CHECK-NEXT:    vmovd %edi, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc7]
    174 ; CHECK-NEXT:    vpinsrd $1, %eax, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc0,0x01]
    175 ; CHECK-NEXT:    vpinsrd $2, %ecx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc1,0x02]
    176 ; CHECK-NEXT:    vpinsrd $3, %edx, %xmm0, %xmm0 ## encoding: [0xc4,0xe3,0x79,0x22,0xc2,0x03]
    177 ; CHECK-NEXT:    vmovd %r8d, %xmm1 ## encoding: [0x62,0xd1,0x7d,0x08,0x6e,0xc8]
    178 ; CHECK-NEXT:    vpinsrd $1, %r9d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xc9,0x01]
    179 ; CHECK-NEXT:    vpinsrd $2, %r10d, %xmm1, %xmm1 ## encoding: [0xc4,0xc3,0x71,0x22,0xca,0x02]
    180 ; CHECK-NEXT:    vpinsrd $3, %esi, %xmm1, %xmm1 ## encoding: [0xc4,0xe3,0x71,0x22,0xce,0x03]
    181 ; CHECK-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0 ## encoding: [0x62,0xf3,0x75,0x28,0x38,0xc0,0x01]
    182 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    183   %res0 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 0, i32 %mask)
    184   %vec0 = insertelement <8 x i32> undef, i32 %res0, i32 0
    185   %res1 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 1, i32 %mask)
    186   %vec1 = insertelement <8 x i32> %vec0, i32 %res1, i32 1
    187   %res2 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 2, i32 %mask)
    188   %vec2 = insertelement <8 x i32> %vec1, i32 %res2, i32 2
    189   %res3 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 3, i32 %mask)
    190   %vec3 = insertelement <8 x i32> %vec2, i32 %res3, i32 3
    191   %res4 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 4, i32 %mask)
    192   %vec4 = insertelement <8 x i32> %vec3, i32 %res4, i32 4
    193   %res5 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 5, i32 %mask)
    194   %vec5 = insertelement <8 x i32> %vec4, i32 %res5, i32 5
    195   %res6 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 6, i32 %mask)
    196   %vec6 = insertelement <8 x i32> %vec5, i32 %res6, i32 6
    197   %res7 = call i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8> %a0, <32 x i8> %a1, i32 7, i32 %mask)
    198   %vec7 = insertelement <8 x i32> %vec6, i32 %res7, i32 7
    199   ret <8 x i32> %vec7
    200 }
    201 
    202 declare i32 @llvm.x86.avx512.mask.ucmp.b.256(<32 x i8>, <32 x i8>, i32, i32) nounwind readnone
    203 
    204 define <8 x i16> @test_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
    205 ; CHECK-LABEL: test_cmp_w_256:
    206 ; CHECK:       ## BB#0:
    207 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc1,0x00]
    208 ; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xc9,0x01]
    209 ; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd1,0x02]
    210 ; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xd9,0x03]
    211 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe1,0x04]
    212 ; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xe9,0x05]
    213 ; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf1,0x06]
    214 ; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3f,0xf9,0x07]
    215 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    216 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    217 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    218 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    219 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    220 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    221 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    222 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    223 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    224 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    225 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    226 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    227 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    228 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    229 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    230 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    231 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    232   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
    233   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    234   %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
    235   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    236   %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
    237   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    238   %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
    239   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    240   %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
    241   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    242   %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
    243   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    244   %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
    245   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    246   %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
    247   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    248   ret <8 x i16> %vec7
    249 }
    250 
    251 define <8 x i16> @test_mask_cmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
    252 ; CHECK-LABEL: test_mask_cmp_w_256:
    253 ; CHECK:       ## BB#0:
    254 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    255 ; CHECK-NEXT:    vpcmpeqw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc1,0x00]
    256 ; CHECK-NEXT:    vpcmpltw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd1,0x01]
    257 ; CHECK-NEXT:    vpcmplew %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xd9,0x02]
    258 ; CHECK-NEXT:    vpcmpunordw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe1,0x03]
    259 ; CHECK-NEXT:    vpcmpneqw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xe9,0x04]
    260 ; CHECK-NEXT:    vpcmpnltw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf1,0x05]
    261 ; CHECK-NEXT:    vpcmpnlew %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xf9,0x06]
    262 ; CHECK-NEXT:    vpcmpordw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3f,0xc9,0x07]
    263 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    264 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    265 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    266 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    267 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    268 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    269 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    270 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    271 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    272 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    273 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    274 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    275 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    276 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    277 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    278 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    279 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    280   %res0 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
    281   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    282   %res1 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
    283   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    284   %res2 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
    285   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    286   %res3 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
    287   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    288   %res4 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
    289   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    290   %res5 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
    291   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    292   %res6 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
    293   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    294   %res7 = call i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
    295   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    296   ret <8 x i16> %vec7
    297 }
    298 
    299 declare i16 @llvm.x86.avx512.mask.cmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
    300 
    301 define <8 x i16> @test_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1) {
    302 ; CHECK-LABEL: test_ucmp_w_256:
    303 ; CHECK:       ## BB#0:
    304 ; CHECK-NEXT:    vpcmpequw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc1,0x00]
    305 ; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xc9,0x01]
    306 ; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd1,0x02]
    307 ; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xd9,0x03]
    308 ; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe1,0x04]
    309 ; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xe9,0x05]
    310 ; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf1,0x06]
    311 ; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x28,0x3e,0xf9,0x07]
    312 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    313 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    314 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    315 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    316 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    317 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    318 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    319 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    320 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    321 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    322 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    323 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    324 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    325 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    326 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    327 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    328 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    329   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 -1)
    330   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    331   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 -1)
    332   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    333   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 -1)
    334   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    335   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 -1)
    336   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    337   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 -1)
    338   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    339   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 -1)
    340   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    341   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 -1)
    342   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    343   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 -1)
    344   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    345   ret <8 x i16> %vec7
    346 }
    347 
    348 define <8 x i16> @test_mask_ucmp_w_256(<16 x i16> %a0, <16 x i16> %a1, i16 %mask) {
    349 ; CHECK-LABEL: test_mask_ucmp_w_256:
    350 ; CHECK:       ## BB#0:
    351 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    352 ; CHECK-NEXT:    vpcmpequw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc1,0x00]
    353 ; CHECK-NEXT:    vpcmpltuw %ymm1, %ymm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd1,0x01]
    354 ; CHECK-NEXT:    vpcmpleuw %ymm1, %ymm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xd9,0x02]
    355 ; CHECK-NEXT:    vpcmpunorduw %ymm1, %ymm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe1,0x03]
    356 ; CHECK-NEXT:    vpcmpnequw %ymm1, %ymm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xe9,0x04]
    357 ; CHECK-NEXT:    vpcmpnltuw %ymm1, %ymm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf1,0x05]
    358 ; CHECK-NEXT:    vpcmpnleuw %ymm1, %ymm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xf9,0x06]
    359 ; CHECK-NEXT:    vpcmporduw %ymm1, %ymm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x3e,0xc9,0x07]
    360 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    361 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    362 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    363 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    364 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    365 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    366 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    367 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    368 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    369 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    370 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    371 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    372 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    373 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    374 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    375 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    376 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    377   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 0, i16 %mask)
    378   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    379   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 1, i16 %mask)
    380   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    381   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 2, i16 %mask)
    382   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    383   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 3, i16 %mask)
    384   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    385   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 4, i16 %mask)
    386   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    387   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 5, i16 %mask)
    388   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    389   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 6, i16 %mask)
    390   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    391   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16> %a0, <16 x i16> %a1, i32 7, i16 %mask)
    392   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    393   ret <8 x i16> %vec7
    394 }
    395 
    396 declare i16 @llvm.x86.avx512.mask.ucmp.w.256(<16 x i16>, <16 x i16>, i32, i16) nounwind readnone
    397 
    398 ; 128-bit
    399 
    400 define i16 @test_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b) {
    401 ; CHECK-LABEL: test_pcmpeq_b_128:
    402 ; CHECK:       ## BB#0:
    403 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x74,0xc1]
    404 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    405 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    406 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    407   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
    408   ret i16 %res
    409 }
    410 
    411 define i16 @test_mask_pcmpeq_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
    412 ; CHECK-LABEL: test_mask_pcmpeq_b_128:
    413 ; CHECK:       ## BB#0:
    414 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    415 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x74,0xc1]
    416 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    417 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    418 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    419   %res = call i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
    420   ret i16 %res
    421 }
    422 
    423 declare i16 @llvm.x86.avx512.mask.pcmpeq.b.128(<16 x i8>, <16 x i8>, i16)
    424 
    425 define i8 @test_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b) {
    426 ; CHECK-LABEL: test_pcmpeq_w_128:
    427 ; CHECK:       ## BB#0:
    428 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x75,0xc1]
    429 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    430 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    431 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    432   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
    433   ret i8 %res
    434 }
    435 
    436 define i8 @test_mask_pcmpeq_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
    437 ; CHECK-LABEL: test_mask_pcmpeq_w_128:
    438 ; CHECK:       ## BB#0:
    439 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    440 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x75,0xc1]
    441 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    442 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    443 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    444   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
    445   ret i8 %res
    446 }
    447 
    448 declare i8 @llvm.x86.avx512.mask.pcmpeq.w.128(<8 x i16>, <8 x i16>, i8)
    449 
    450 define i16 @test_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b) {
    451 ; CHECK-LABEL: test_pcmpgt_b_128:
    452 ; CHECK:       ## BB#0:
    453 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x64,0xc1]
    454 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    455 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    456 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    457   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 -1)
    458   ret i16 %res
    459 }
    460 
    461 define i16 @test_mask_pcmpgt_b_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
    462 ; CHECK-LABEL: test_mask_pcmpgt_b_128:
    463 ; CHECK:       ## BB#0:
    464 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    465 ; CHECK-NEXT:    vpcmpgtb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x64,0xc1]
    466 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    467 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
    468 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    469   %res = call i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8> %a, <16 x i8> %b, i16 %mask)
    470   ret i16 %res
    471 }
    472 
    473 declare i16 @llvm.x86.avx512.mask.pcmpgt.b.128(<16 x i8>, <16 x i8>, i16)
    474 
    475 define i8 @test_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b) {
    476 ; CHECK-LABEL: test_pcmpgt_w_128:
    477 ; CHECK:       ## BB#0:
    478 ; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x65,0xc1]
    479 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    480 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    481 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    482   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 -1)
    483   ret i8 %res
    484 }
    485 
    486 define i8 @test_mask_pcmpgt_w_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
    487 ; CHECK-LABEL: test_mask_pcmpgt_w_128:
    488 ; CHECK:       ## BB#0:
    489 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    490 ; CHECK-NEXT:    vpcmpgtw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x65,0xc1]
    491 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    492 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    493 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    494   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16> %a, <8 x i16> %b, i8 %mask)
    495   ret i8 %res
    496 }
    497 
    498 declare i8 @llvm.x86.avx512.mask.pcmpgt.w.128(<8 x i16>, <8 x i16>, i8)
    499 
    500 define <8 x i16> @test_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
    501 ; CHECK-LABEL: test_cmp_b_128:
    502 ; CHECK:       ## BB#0:
    503 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc1,0x00]
    504 ; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xc9,0x01]
    505 ; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd1,0x02]
    506 ; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xd9,0x03]
    507 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe1,0x04]
    508 ; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xe9,0x05]
    509 ; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf1,0x06]
    510 ; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3f,0xf9,0x07]
    511 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    512 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    513 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    514 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    515 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    516 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    517 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    518 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    519 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    520 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    521 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    522 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    523 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    524 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    525 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    526 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    527 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    528   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
    529   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    530   %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
    531   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    532   %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
    533   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    534   %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
    535   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    536   %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
    537   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    538   %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
    539   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    540   %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
    541   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    542   %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
    543   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    544   ret <8 x i16> %vec7
    545 }
    546 
    547 define <8 x i16> @test_mask_cmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
    548 ; CHECK-LABEL: test_mask_cmp_b_128:
    549 ; CHECK:       ## BB#0:
    550 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    551 ; CHECK-NEXT:    vpcmpeqb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc1,0x00]
    552 ; CHECK-NEXT:    vpcmpltb %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd1,0x01]
    553 ; CHECK-NEXT:    vpcmpleb %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xd9,0x02]
    554 ; CHECK-NEXT:    vpcmpunordb %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe1,0x03]
    555 ; CHECK-NEXT:    vpcmpneqb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xe9,0x04]
    556 ; CHECK-NEXT:    vpcmpnltb %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf1,0x05]
    557 ; CHECK-NEXT:    vpcmpnleb %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xf9,0x06]
    558 ; CHECK-NEXT:    vpcmpordb %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3f,0xc9,0x07]
    559 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    560 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    561 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    562 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    563 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    564 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    565 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    566 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    567 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    568 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    569 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    570 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    571 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    572 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    573 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    574 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    575 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    576   %res0 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
    577   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    578   %res1 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
    579   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    580   %res2 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
    581   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    582   %res3 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
    583   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    584   %res4 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
    585   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    586   %res5 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
    587   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    588   %res6 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
    589   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    590   %res7 = call i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
    591   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    592   ret <8 x i16> %vec7
    593 }
    594 
    595 declare i16 @llvm.x86.avx512.mask.cmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
    596 
    597 define <8 x i16> @test_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1) {
    598 ; CHECK-LABEL: test_ucmp_b_128:
    599 ; CHECK:       ## BB#0:
    600 ; CHECK-NEXT:    vpcmpequb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc1,0x00]
    601 ; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xc9,0x01]
    602 ; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd1,0x02]
    603 ; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xd9,0x03]
    604 ; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe1,0x04]
    605 ; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xe9,0x05]
    606 ; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf1,0x06]
    607 ; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0x7d,0x08,0x3e,0xf9,0x07]
    608 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    609 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    610 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    611 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    612 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    613 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    614 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    615 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    616 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    617 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    618 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    619 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    620 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    621 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    622 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    623 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    624 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    625   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 -1)
    626   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    627   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 -1)
    628   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    629   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 -1)
    630   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    631   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 -1)
    632   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    633   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 -1)
    634   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    635   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 -1)
    636   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    637   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 -1)
    638   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    639   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 -1)
    640   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    641   ret <8 x i16> %vec7
    642 }
    643 
    644 define <8 x i16> @test_mask_ucmp_b_128(<16 x i8> %a0, <16 x i8> %a1, i16 %mask) {
    645 ; CHECK-LABEL: test_mask_ucmp_b_128:
    646 ; CHECK:       ## BB#0:
    647 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    648 ; CHECK-NEXT:    vpcmpequb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc1,0x00]
    649 ; CHECK-NEXT:    vpcmpltub %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd1,0x01]
    650 ; CHECK-NEXT:    vpcmpleub %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xd9,0x02]
    651 ; CHECK-NEXT:    vpcmpunordub %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe1,0x03]
    652 ; CHECK-NEXT:    vpcmpnequb %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xe9,0x04]
    653 ; CHECK-NEXT:    vpcmpnltub %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf1,0x05]
    654 ; CHECK-NEXT:    vpcmpnleub %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xf9,0x06]
    655 ; CHECK-NEXT:    vpcmpordub %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x3e,0xc9,0x07]
    656 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    657 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
    658 ; CHECK-NEXT:    vmovd %ecx, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6e,0xc1]
    659 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x01]
    660 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    661 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x02]
    662 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    663 ; CHECK-NEXT:    vpinsrw $3, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x03]
    664 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    665 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x04]
    666 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    667 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x05]
    668 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    669 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x06]
    670 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    671 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xc4,0xc0,0x07]
    672 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    673   %res0 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 0, i16 %mask)
    674   %vec0 = insertelement <8 x i16> undef, i16 %res0, i32 0
    675   %res1 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 1, i16 %mask)
    676   %vec1 = insertelement <8 x i16> %vec0, i16 %res1, i32 1
    677   %res2 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 2, i16 %mask)
    678   %vec2 = insertelement <8 x i16> %vec1, i16 %res2, i32 2
    679   %res3 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 3, i16 %mask)
    680   %vec3 = insertelement <8 x i16> %vec2, i16 %res3, i32 3
    681   %res4 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 4, i16 %mask)
    682   %vec4 = insertelement <8 x i16> %vec3, i16 %res4, i32 4
    683   %res5 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 5, i16 %mask)
    684   %vec5 = insertelement <8 x i16> %vec4, i16 %res5, i32 5
    685   %res6 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 6, i16 %mask)
    686   %vec6 = insertelement <8 x i16> %vec5, i16 %res6, i32 6
    687   %res7 = call i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8> %a0, <16 x i8> %a1, i32 7, i16 %mask)
    688   %vec7 = insertelement <8 x i16> %vec6, i16 %res7, i32 7
    689   ret <8 x i16> %vec7
    690 }
    691 
    692 declare i16 @llvm.x86.avx512.mask.ucmp.b.128(<16 x i8>, <16 x i8>, i32, i16) nounwind readnone
    693 
    694 define <8 x i8> @test_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
    695 ; CHECK-LABEL: test_cmp_w_128:
    696 ; CHECK:       ## BB#0:
    697 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc1,0x00]
    698 ; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xc9,0x01]
    699 ; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd1,0x02]
    700 ; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xd9,0x03]
    701 ; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe1,0x04]
    702 ; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xe9,0x05]
    703 ; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf1,0x06]
    704 ; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3f,0xf9,0x07]
    705 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    706 ; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
    707 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    708 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
    709 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    710 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
    711 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    712 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
    713 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    714 ; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
    715 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    716 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
    717 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    718 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
    719 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    720 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
    721 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    722   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
    723   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
    724   %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
    725   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
    726   %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
    727   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
    728   %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
    729   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
    730   %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
    731   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
    732   %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
    733   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
    734   %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
    735   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
    736   %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
    737   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
    738   ret <8 x i8> %vec7
    739 }
    740 
    741 define <8 x i8> @test_mask_cmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
    742 ; CHECK-LABEL: test_mask_cmp_w_128:
    743 ; CHECK:       ## BB#0:
    744 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    745 ; CHECK-NEXT:    vpcmpeqw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc1,0x00]
    746 ; CHECK-NEXT:    vpcmpltw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd1,0x01]
    747 ; CHECK-NEXT:    vpcmplew %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xd9,0x02]
    748 ; CHECK-NEXT:    vpcmpunordw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe1,0x03]
    749 ; CHECK-NEXT:    vpcmpneqw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xe9,0x04]
    750 ; CHECK-NEXT:    vpcmpnltw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf1,0x05]
    751 ; CHECK-NEXT:    vpcmpnlew %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xf9,0x06]
    752 ; CHECK-NEXT:    vpcmpordw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3f,0xc9,0x07]
    753 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    754 ; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
    755 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    756 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
    757 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    758 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
    759 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    760 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
    761 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    762 ; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
    763 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    764 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
    765 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    766 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
    767 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    768 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
    769 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    770   %res0 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
    771   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
    772   %res1 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
    773   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
    774   %res2 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
    775   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
    776   %res3 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
    777   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
    778   %res4 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
    779   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
    780   %res5 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
    781   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
    782   %res6 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
    783   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
    784   %res7 = call i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
    785   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
    786   ret <8 x i8> %vec7
    787 }
    788 
    789 declare i8 @llvm.x86.avx512.mask.cmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
    790 
    791 define <8 x i8> @test_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1) {
    792 ; CHECK-LABEL: test_ucmp_w_128:
    793 ; CHECK:       ## BB#0:
    794 ; CHECK-NEXT:    vpcmpequw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc1,0x00]
    795 ; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k1 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xc9,0x01]
    796 ; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k2 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd1,0x02]
    797 ; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k3 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xd9,0x03]
    798 ; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k4 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe1,0x04]
    799 ; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k5 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xe9,0x05]
    800 ; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k6 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf1,0x06]
    801 ; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k7 ## encoding: [0x62,0xf3,0xfd,0x08,0x3e,0xf9,0x07]
    802 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    803 ; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
    804 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    805 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
    806 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    807 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
    808 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    809 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
    810 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    811 ; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
    812 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    813 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
    814 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    815 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
    816 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    817 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
    818 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    819   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 -1)
    820   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
    821   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 -1)
    822   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
    823   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 -1)
    824   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
    825   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 -1)
    826   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
    827   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 -1)
    828   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
    829   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 -1)
    830   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
    831   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 -1)
    832   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
    833   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 -1)
    834   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
    835   ret <8 x i8> %vec7
    836 }
    837 
    838 define <8 x i8> @test_mask_ucmp_w_128(<8 x i16> %a0, <8 x i16> %a1, i8 %mask) {
    839 ; CHECK-LABEL: test_mask_ucmp_w_128:
    840 ; CHECK:       ## BB#0:
    841 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    842 ; CHECK-NEXT:    vpcmpequw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc1,0x00]
    843 ; CHECK-NEXT:    vpcmpltuw %xmm1, %xmm0, %k2 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd1,0x01]
    844 ; CHECK-NEXT:    vpcmpleuw %xmm1, %xmm0, %k3 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xd9,0x02]
    845 ; CHECK-NEXT:    vpcmpunorduw %xmm1, %xmm0, %k4 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe1,0x03]
    846 ; CHECK-NEXT:    vpcmpnequw %xmm1, %xmm0, %k5 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xe9,0x04]
    847 ; CHECK-NEXT:    vpcmpnltuw %xmm1, %xmm0, %k6 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf1,0x05]
    848 ; CHECK-NEXT:    vpcmpnleuw %xmm1, %xmm0, %k7 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xf9,0x06]
    849 ; CHECK-NEXT:    vpcmporduw %xmm1, %xmm0, %k1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x3e,0xc9,0x07]
    850 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    851 ; CHECK-NEXT:    vpinsrb $0, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x00]
    852 ; CHECK-NEXT:    kmovw %k2, %eax ## encoding: [0xc5,0xf8,0x93,0xc2]
    853 ; CHECK-NEXT:    vpinsrb $2, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x02]
    854 ; CHECK-NEXT:    kmovw %k3, %eax ## encoding: [0xc5,0xf8,0x93,0xc3]
    855 ; CHECK-NEXT:    vpinsrb $4, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x04]
    856 ; CHECK-NEXT:    kmovw %k4, %eax ## encoding: [0xc5,0xf8,0x93,0xc4]
    857 ; CHECK-NEXT:    vpinsrb $6, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x06]
    858 ; CHECK-NEXT:    kmovw %k5, %eax ## encoding: [0xc5,0xf8,0x93,0xc5]
    859 ; CHECK-NEXT:    vpinsrb $8, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x08]
    860 ; CHECK-NEXT:    kmovw %k6, %eax ## encoding: [0xc5,0xf8,0x93,0xc6]
    861 ; CHECK-NEXT:    vpinsrb $10, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0a]
    862 ; CHECK-NEXT:    kmovw %k7, %eax ## encoding: [0xc5,0xf8,0x93,0xc7]
    863 ; CHECK-NEXT:    vpinsrb $12, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0c]
    864 ; CHECK-NEXT:    kmovw %k1, %eax ## encoding: [0xc5,0xf8,0x93,0xc1]
    865 ; CHECK-NEXT:    vpinsrb $14, %eax, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x20,0xc0,0x0e]
    866 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    867   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 0, i8 %mask)
    868   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
    869   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 1, i8 %mask)
    870   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
    871   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 2, i8 %mask)
    872   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
    873   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 3, i8 %mask)
    874   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
    875   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 4, i8 %mask)
    876   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
    877   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 5, i8 %mask)
    878   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
    879   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 6, i8 %mask)
    880   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
    881   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16> %a0, <8 x i16> %a1, i32 7, i8 %mask)
    882   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
    883   ret <8 x i8> %vec7
    884 }
    885 
    886 declare i8 @llvm.x86.avx512.mask.ucmp.w.128(<8 x i16>, <8 x i16>, i32, i8) nounwind readnone
    887 
    888 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
    889 
    890 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
    891 ; CHECK-LABEL: test_mask_vfmadd256_ps:
    892 ; CHECK:       ## BB#0:
    893 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    894 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xc2]
    895 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    896   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
    897   ret <8 x float> %res
    898 }
    899 
    900 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
    901 
    902 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
    903 ; CHECK-LABEL: test_mask_vfmadd128_ps:
    904 ; CHECK:       ## BB#0:
    905 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    906 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
    907 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    908   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
    909   ret <4 x float> %res
    910 }
    911 
    912 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
    913 
    914 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
    915 ; CHECK-LABEL: test_mask_fmadd256_pd:
    916 ; CHECK:       ## BB#0:
    917 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    918 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
    919 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    920   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
    921   ret <4 x double> %res
    922 }
    923 
    924 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
    925 
    926 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
    927 ; CHECK-LABEL: test_mask_fmadd128_pd:
    928 ; CHECK:       ## BB#0:
    929 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    930 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
    931 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    932   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
    933   ret <2 x double> %res
    934 }
    935 
    936 define <2 x double>@test_int_x86_avx512_mask_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
    937 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_128:
    938 ; CHECK:       ## BB#0:
    939 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    940 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
    941 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xda]
    942 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
    943 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
    944 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    945   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
    946   %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
    947   %res2 = fadd <2 x double> %res, %res1
    948   ret <2 x double> %res2
    949 }
    950 
    951 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
    952 
    953 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
    954 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
    955 ; CHECK:       ## BB#0:
    956 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    957 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
    958 ; CHECK-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd9]
    959 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
    960 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
    961 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    962   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
    963   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
    964   %res2 = fadd <2 x double> %res, %res1
    965   ret <2 x double> %res2
    966 }
    967 
    968 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
    969 
    970 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
    971 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
    972 ; CHECK:       ## BB#0:
    973 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    974 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
    975 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xda]
    976 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
    977 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
    978 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    979   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
    980   %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
    981   %res2 = fadd <2 x double> %res, %res1
    982   ret <2 x double> %res2
    983 }
    984 
    985 define <4 x double>@test_int_x86_avx512_mask_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
    986 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_pd_256:
    987 ; CHECK:       ## BB#0:
    988 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    989 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
    990 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xda]
    991 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
    992 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
    993 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    994   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
    995   %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
    996   %res2 = fadd <4 x double> %res, %res1
    997   ret <4 x double> %res2
    998 }
    999 
   1000 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1001 
   1002 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1003 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
   1004 ; CHECK:       ## BB#0:
   1005 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1006 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1007 ; CHECK-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd9]
   1008 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
   1009 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1010 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1011   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1012   %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1013   %res2 = fadd <4 x double> %res, %res1
   1014   ret <4 x double> %res2
   1015 }
   1016 
   1017 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1018 
   1019 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1020 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
   1021 ; CHECK:       ## BB#0:
   1022 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1023 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1024 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xda]
   1025 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
   1026 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1027 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1028   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1029   %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1030   %res2 = fadd <4 x double> %res, %res1
   1031   ret <4 x double> %res2
   1032 }
   1033 
   1034 define <4 x float>@test_int_x86_avx512_mask_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1035 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_128:
   1036 ; CHECK:       ## BB#0:
   1037 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1038 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1039 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xda]
   1040 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
   1041 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1042 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1043   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1044   %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1045   %res2 = fadd <4 x float> %res, %res1
   1046   ret <4 x float> %res2
   1047 }
   1048 
   1049 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1050 
   1051 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1052 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
   1053 ; CHECK:       ## BB#0:
   1054 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1055 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1056 ; CHECK-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd9]
   1057 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
   1058 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1059 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1060   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1061   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1062   %res2 = fadd <4 x float> %res, %res1
   1063   ret <4 x float> %res2
   1064 }
   1065 
   1066 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1067 
   1068 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1069 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
   1070 ; CHECK:       ## BB#0:
   1071 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1072 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1073 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa8,0xda]
   1074 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
   1075 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1076 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1077   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1078   %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1079   %res2 = fadd <4 x float> %res, %res1
   1080   ret <4 x float> %res2
   1081 }
   1082 
   1083 define <8 x float>@test_int_x86_avx512_mask_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1084 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmadd_ps_256:
   1085 ; CHECK:       ## BB#0:
   1086 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1087 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1088 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa8,0xda]
   1089 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2]
   1090 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1091 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1092   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1093   %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1094   %res2 = fadd <8 x float> %res, %res1
   1095   ret <8 x float> %res2
   1096 }
   1097 
   1098 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1099 
   1100 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1101 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
   1102 ; CHECK:       ## BB#0:
   1103 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1104 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1105 ; CHECK-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd9]
   1106 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2]
   1107 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1108 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1109   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1110   %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1111   %res2 = fadd <8 x float> %res, %res1
   1112   ret <8 x float> %res2
   1113 }
   1114 
   1115 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1116 
   1117 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1118 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
   1119 ; CHECK:       ## BB#0:
   1120 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1121 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1122 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xda]
   1123 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa8,0xc2]
   1124 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1125 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1126   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1127   %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1128   %res2 = fadd <8 x float> %res, %res1
   1129   ret <8 x float> %res2
   1130 }
   1131 
   1132 
   1133 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1134 
   1135 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1136 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
   1137 ; CHECK:       ## BB#0:
   1138 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1139 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1140 ; CHECK-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd9]
   1141 ; CHECK-NEXT:    vfmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xaa,0xc2]
   1142 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1143 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1144   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1145   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1146   %res2 = fadd <2 x double> %res, %res1
   1147   ret <2 x double> %res2
   1148 }
   1149 
   1150 
   1151 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1152 
   1153 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1154 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
   1155 ; CHECK:       ## BB#0:
   1156 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1157 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1158 ; CHECK-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd9]
   1159 ; CHECK-NEXT:    vfmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xaa,0xc2]
   1160 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1161 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1162   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1163   %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1164   %res2 = fadd <4 x double> %res, %res1
   1165   ret <4 x double> %res2
   1166 }
   1167 
   1168 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1169 
   1170 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1171 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
   1172 ; CHECK:       ## BB#0:
   1173 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1174 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1175 ; CHECK-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd9]
   1176 ; CHECK-NEXT:    vfmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xaa,0xc2]
   1177 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1178 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1179   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1180   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1181   %res2 = fadd <4 x float> %res, %res1
   1182   ret <4 x float> %res2
   1183 }
   1184 
   1185 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1186 
   1187 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1188 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
   1189 ; CHECK:       ## BB#0:
   1190 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1191 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1192 ; CHECK-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd9]
   1193 ; CHECK-NEXT:    vfmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xaa,0xc2]
   1194 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1195 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1196   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1197   %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1198   %res2 = fadd <8 x float> %res, %res1
   1199   ret <8 x float> %res2
   1200 }
   1201 
   1202 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   1203 
   1204 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   1205 ; CHECK-LABEL: test_mask_vfnmadd256_ps:
   1206 ; CHECK:       ## BB#0:
   1207 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1208 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xc2]
   1209 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1210   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   1211   ret <8 x float> %res
   1212 }
   1213 
   1214 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   1215 
   1216 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   1217 ; CHECK-LABEL: test_mask_vfnmadd128_ps:
   1218 ; CHECK:       ## BB#0:
   1219 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1220 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xc2]
   1221 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1222   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   1223   ret <4 x float> %res
   1224 }
   1225 
   1226 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   1227 
   1228 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   1229 ; CHECK-LABEL: test_mask_vfnmadd256_pd:
   1230 ; CHECK:       ## BB#0:
   1231 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1232 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xc2]
   1233 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1234   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   1235   ret <4 x double> %res
   1236 }
   1237 
   1238 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   1239 
   1240 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   1241 ; CHECK-LABEL: test_mask_vfnmadd128_pd:
   1242 ; CHECK:       ## BB#0:
   1243 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1244 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xc2]
   1245 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1246   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   1247   ret <2 x double> %res
   1248 }
   1249 
   1250 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   1251 
   1252 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   1253 ; CHECK-LABEL: test_mask_vfnmsub256_ps:
   1254 ; CHECK:       ## BB#0:
   1255 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1256 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xc2]
   1257 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1258   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   1259   ret <8 x float> %res
   1260 }
   1261 
   1262 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   1263 
   1264 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   1265 ; CHECK-LABEL: test_mask_vfnmsub128_ps:
   1266 ; CHECK:       ## BB#0:
   1267 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1268 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xc2]
   1269 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1270   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   1271   ret <4 x float> %res
   1272 }
   1273 
   1274 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   1275 
   1276 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   1277 ; CHECK-LABEL: test_mask_vfnmsub256_pd:
   1278 ; CHECK:       ## BB#0:
   1279 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1280 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xc2]
   1281 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1282   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   1283   ret <4 x double> %res
   1284 }
   1285 
   1286 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   1287 
   1288 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   1289 ; CHECK-LABEL: test_mask_vfnmsub128_pd:
   1290 ; CHECK:       ## BB#0:
   1291 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1292 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xc2]
   1293 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1294   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   1295   ret <2 x double> %res
   1296 }
   1297 
   1298 
   1299 define <2 x double>@test_int_x86_avx512_mask_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1300 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_128:
   1301 ; CHECK:       ## BB#0:
   1302 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1303 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1304 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xae,0xda]
   1305 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2]
   1306 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1307 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1308   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1309   %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1310   %res2 = fadd <2 x double> %res, %res1
   1311   ret <2 x double> %res2
   1312 }
   1313 
   1314 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1315 
   1316 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1317 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
   1318 ; CHECK:       ## BB#0:
   1319 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1320 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1321 ; CHECK-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd9]
   1322 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xae,0xc2]
   1323 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1324 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1325   %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1326   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1327   %res2 = fadd <2 x double> %res, %res1
   1328   ret <2 x double> %res2
   1329 }
   1330 
   1331 define <4 x double>@test_int_x86_avx512_mask_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1332 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_pd_256:
   1333 ; CHECK:       ## BB#0:
   1334 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1335 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1336 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xae,0xda]
   1337 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2]
   1338 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1339 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1340   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1341   %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1342   %res2 = fadd <4 x double> %res, %res1
   1343   ret <4 x double> %res2
   1344 }
   1345 
   1346 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1347 
   1348 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1349 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
   1350 ; CHECK:       ## BB#0:
   1351 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1352 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1353 ; CHECK-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd9]
   1354 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xae,0xc2]
   1355 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1356 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1357   %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1358   %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1359   %res2 = fadd <4 x double> %res, %res1
   1360   ret <4 x double> %res2
   1361 }
   1362 
   1363 define <4 x float>@test_int_x86_avx512_mask_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1364 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_128:
   1365 ; CHECK:       ## BB#0:
   1366 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1367 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1368 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xae,0xda]
   1369 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2]
   1370 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1371 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1372   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1373   %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1374   %res2 = fadd <4 x float> %res, %res1
   1375   ret <4 x float> %res2
   1376 }
   1377 
   1378 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1379 
   1380 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1381 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
   1382 ; CHECK:       ## BB#0:
   1383 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1384 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1385 ; CHECK-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd9]
   1386 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xae,0xc2]
   1387 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1388 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1389   %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1390   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1391   %res2 = fadd <4 x float> %res, %res1
   1392   ret <4 x float> %res2
   1393 }
   1394 
   1395 define <8 x float>@test_int_x86_avx512_mask_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1396 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmsub_ps_256:
   1397 ; CHECK:       ## BB#0:
   1398 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1399 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1400 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xae,0xda]
   1401 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2]
   1402 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1403 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1404   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1405   %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1406   %res2 = fadd <8 x float> %res, %res1
   1407   ret <8 x float> %res2
   1408 }
   1409 
   1410 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1411 
   1412 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1413 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
   1414 ; CHECK:       ## BB#0:
   1415 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1416 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1417 ; CHECK-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd9]
   1418 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xae,0xc2]
   1419 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1420 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1421   %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1422   %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1423   %res2 = fadd <8 x float> %res, %res1
   1424   ret <8 x float> %res2
   1425 }
   1426 
   1427 define <2 x double>@test_int_x86_avx512_mask_vfnmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1428 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_128:
   1429 ; CHECK:       ## BB#0:
   1430 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1431 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1432 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xac,0xda]
   1433 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xac,0xc2]
   1434 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1435 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1436   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1437   %res1 = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1438   %res2 = fadd <2 x double> %res, %res1
   1439   ret <2 x double> %res2
   1440 }
   1441 
   1442 define <4 x double>@test_int_x86_avx512_mask_vfnmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1443 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_pd_256:
   1444 ; CHECK:       ## BB#0:
   1445 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1446 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1447 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xac,0xda]
   1448 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xac,0xc2]
   1449 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1450 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1451   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1452   %res1 = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1453   %res2 = fadd <4 x double> %res, %res1
   1454   ret <4 x double> %res2
   1455 }
   1456 
   1457 define <4 x float>@test_int_x86_avx512_mask_vfnmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1458 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_128:
   1459 ; CHECK:       ## BB#0:
   1460 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1461 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1462 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xac,0xda]
   1463 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xac,0xc2]
   1464 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1465 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1466   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1467   %res1 = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1468   %res2 = fadd <4 x float> %res, %res1
   1469   ret <4 x float> %res2
   1470 }
   1471 
   1472 define <8 x float>@test_int_x86_avx512_mask_vfnmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1473 ; CHECK-LABEL: test_int_x86_avx512_mask_vfnmadd_ps_256:
   1474 ; CHECK:       ## BB#0:
   1475 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1476 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1477 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xac,0xda]
   1478 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xac,0xc2]
   1479 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1480 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1481   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1482   %res1 = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1483   %res2 = fadd <8 x float> %res, %res1
   1484   ret <8 x float> %res2
   1485 }
   1486 
   1487 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   1488 
   1489 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
   1490 ; CHECK-LABEL: test_mask_fmaddsub256_ps:
   1491 ; CHECK:       ## BB#0:
   1492 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1493 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xc2]
   1494 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1495   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
   1496   ret <8 x float> %res
   1497 }
   1498 
   1499 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   1500 
   1501 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
   1502 ; CHECK-LABEL: test_mask_fmaddsub128_ps:
   1503 ; CHECK:       ## BB#0:
   1504 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1505 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xc2]
   1506 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1507   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
   1508   ret <4 x float> %res
   1509 }
   1510 
   1511 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   1512 
   1513 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   1514 ; CHECK-LABEL: test_mask_vfmaddsub256_pd:
   1515 ; CHECK:       ## BB#0:
   1516 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1517 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xc2]
   1518 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1519   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   1520   ret <4 x double> %res
   1521 }
   1522 
   1523 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   1524 
   1525 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   1526 ; CHECK-LABEL: test_mask_vfmaddsub128_pd:
   1527 ; CHECK:       ## BB#0:
   1528 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1529 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xc2]
   1530 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1531   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   1532   ret <2 x double> %res
   1533 }
   1534 
   1535 define <2 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1536 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_128:
   1537 ; CHECK:       ## BB#0:
   1538 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1539 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1540 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa6,0xda]
   1541 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2]
   1542 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1543 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1544   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1545   %res1 = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1546   %res2 = fadd <2 x double> %res, %res1
   1547   ret <2 x double> %res2
   1548 }
   1549 
   1550 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1551 
   1552 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1553 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
   1554 ; CHECK:       ## BB#0:
   1555 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1556 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1557 ; CHECK-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd9]
   1558 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2]
   1559 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1560 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1561   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1562   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1563   %res2 = fadd <2 x double> %res, %res1
   1564   ret <2 x double> %res2
   1565 }
   1566 
   1567 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1568 
   1569 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1570 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
   1571 ; CHECK:       ## BB#0:
   1572 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1573 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1574 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xda]
   1575 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa6,0xc2]
   1576 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1577 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1578   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1579   %res1 = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1580   %res2 = fadd <2 x double> %res, %res1
   1581   ret <2 x double> %res2
   1582 }
   1583 
   1584 define <4 x double>@test_int_x86_avx512_mask_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1585 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_pd_256:
   1586 ; CHECK:       ## BB#0:
   1587 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1588 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1589 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa6,0xda]
   1590 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2]
   1591 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1592 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1593   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1594   %res1 = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1595   %res2 = fadd <4 x double> %res, %res1
   1596   ret <4 x double> %res2
   1597 }
   1598 
   1599 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1600 
   1601 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1602 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
   1603 ; CHECK:       ## BB#0:
   1604 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1605 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1606 ; CHECK-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd9]
   1607 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2]
   1608 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1609 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1610   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1611   %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1612   %res2 = fadd <4 x double> %res, %res1
   1613   ret <4 x double> %res2
   1614 }
   1615 
   1616 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1617 
   1618 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1619 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
   1620 ; CHECK:       ## BB#0:
   1621 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1622 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1623 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xda]
   1624 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa6,0xc2]
   1625 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1626 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1627   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1628   %res1 = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1629   %res2 = fadd <4 x double> %res, %res1
   1630   ret <4 x double> %res2
   1631 }
   1632 
   1633 define <4 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1634 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_128:
   1635 ; CHECK:       ## BB#0:
   1636 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1637 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1638 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa6,0xda]
   1639 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2]
   1640 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1641 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1642   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1643   %res1 = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1644   %res2 = fadd <4 x float> %res, %res1
   1645   ret <4 x float> %res2
   1646 }
   1647 
   1648 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1649 
   1650 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1651 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
   1652 ; CHECK:       ## BB#0:
   1653 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1654 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1655 ; CHECK-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd9]
   1656 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2]
   1657 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1658 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1659   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1660   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1661   %res2 = fadd <4 x float> %res, %res1
   1662   ret <4 x float> %res2
   1663 }
   1664 
   1665 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1666 
   1667 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1668 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
   1669 ; CHECK:       ## BB#0:
   1670 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1671 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1672 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0x89,0xa6,0xda]
   1673 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa6,0xc2]
   1674 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1675 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1676   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1677   %res1 = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1678   %res2 = fadd <4 x float> %res, %res1
   1679   ret <4 x float> %res2
   1680 }
   1681 
   1682 define <8 x float>@test_int_x86_avx512_mask_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1683 ; CHECK-LABEL: test_int_x86_avx512_mask_vfmaddsub_ps_256:
   1684 ; CHECK:       ## BB#0:
   1685 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1686 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1687 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x75,0x29,0xa6,0xda]
   1688 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2]
   1689 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1690 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1691   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1692   %res1 = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1693   %res2 = fadd <8 x float> %res, %res1
   1694   ret <8 x float> %res2
   1695 }
   1696 
   1697 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1698 
   1699 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1700 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
   1701 ; CHECK:       ## BB#0:
   1702 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1703 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1704 ; CHECK-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd9]
   1705 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2]
   1706 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1707 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1708   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1709   %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1710   %res2 = fadd <8 x float> %res, %res1
   1711   ret <8 x float> %res2
   1712 }
   1713 
   1714 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1715 
   1716 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1717 ; CHECK-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
   1718 ; CHECK:       ## BB#0:
   1719 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1720 ; CHECK-NEXT:    vmovaps %zmm0, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd8]
   1721 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xda]
   1722 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa6,0xc2]
   1723 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1724 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1725   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1726   %res1 = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1727   %res2 = fadd <8 x float> %res, %res1
   1728   ret <8 x float> %res2
   1729 }
   1730 
   1731 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1732 
   1733 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1734 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
   1735 ; CHECK:       ## BB#0:
   1736 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1737 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1738 ; CHECK-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd9]
   1739 ; CHECK-NEXT:    vfmsubadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa7,0xc2]
   1740 ; CHECK-NEXT:    vaddpd %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0xe5,0x08,0x58,0xc0]
   1741 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1742   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1743   %res1 = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1744   %res2=fadd <2 x double> %res, %res1
   1745   ret <2 x double> %res2
   1746 }
   1747 
   1748 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1749 
   1750 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1751 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
   1752 ; CHECK:       ## BB#0:
   1753 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1754 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1755 ; CHECK-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd9]
   1756 ; CHECK-NEXT:    vfmsubadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa7,0xc2]
   1757 ; CHECK-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0xe5,0x28,0x58,0xc0]
   1758 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1759   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1760   %res1 = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1761   %res2=fadd <4 x double> %res, %res1
   1762   ret <4 x double> %res2
   1763 }
   1764 
   1765 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1766 
   1767 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1768 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
   1769 ; CHECK:       ## BB#0:
   1770 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1771 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1772 ; CHECK-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd9]
   1773 ; CHECK-NEXT:    vfmsubadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa7,0xc2]
   1774 ; CHECK-NEXT:    vaddps %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x64,0x08,0x58,0xc0]
   1775 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1776   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1777   %res1 = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1778   %res2=fadd <4 x float> %res, %res1
   1779   ret <4 x float> %res2
   1780 }
   1781 
   1782 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1783 
   1784 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1785 ; CHECK-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
   1786 ; CHECK:       ## BB#0:
   1787 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1788 ; CHECK-NEXT:    vmovaps %zmm2, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xda]
   1789 ; CHECK-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd9]
   1790 ; CHECK-NEXT:    vfmsubadd213ps %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0x75,0x28,0xa7,0xc2]
   1791 ; CHECK-NEXT:    vaddps %ymm0, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x64,0x28,0x58,0xc0]
   1792 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1793   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1794   %res1 = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1795   %res2=fadd <8 x float> %res, %res1
   1796   ret <8 x float> %res2
   1797 }
   1798 
   1799 
   1800 define <4 x float> @test_mask_vfmadd128_ps_r(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   1801 ; CHECK-LABEL: test_mask_vfmadd128_ps_r:
   1802 ; CHECK:       ## BB#0:
   1803 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1804 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0xc2]
   1805 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1806   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   1807   ret <4 x float> %res
   1808 }
   1809 
   1810 define <4 x float> @test_mask_vfmadd128_ps_rz(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   1811 ; CHECK-LABEL: test_mask_vfmadd128_ps_rz:
   1812 ; CHECK:       ## BB#0:
   1813 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0xc2]
   1814 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1815   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   1816   ret <4 x float> %res
   1817 }
   1818 
   1819 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   1820 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmk:
   1821 ; CHECK:       ## BB#0:
   1822 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1823 ; CHECK-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   1824 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1825   %a2 = load <4 x float>, <4 x float>* %ptr_a2
   1826   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   1827   ret <4 x float> %res
   1828 }
   1829 
   1830 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   1831 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmka:
   1832 ; CHECK:       ## BB#0:
   1833 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1834 ; CHECK-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   1835 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1836   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
   1837   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   1838   ret <4 x float> %res
   1839 }
   1840 
   1841 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   1842 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkz:
   1843 ; CHECK:       ## BB#0:
   1844 ; CHECK-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07]
   1845 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1846   %a2 = load <4 x float>, <4 x float>* %ptr_a2
   1847   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   1848   ret <4 x float> %res
   1849 }
   1850 
   1851 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   1852 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmkza:
   1853 ; CHECK:       ## BB#0:
   1854 ; CHECK-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x08,0xa8,0x07]
   1855 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1856   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
   1857   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   1858   ret <4 x float> %res
   1859 }
   1860 
   1861 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   1862 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmb:
   1863 ; CHECK:       ## BB#0:
   1864 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1865 ; CHECK-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
   1866 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1867   %q = load float, float* %ptr_a2
   1868   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1869   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   1870   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   1871   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   1872   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   1873   ret <4 x float> %res
   1874 }
   1875 
   1876 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   1877 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmba:
   1878 ; CHECK:       ## BB#0:
   1879 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1880 ; CHECK-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
   1881 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1882   %q = load float, float* %ptr_a2, align 4
   1883   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1884   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   1885   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   1886   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   1887   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   1888   ret <4 x float> %res
   1889 }
   1890 
   1891 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   1892 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbz:
   1893 ; CHECK:       ## BB#0:
   1894 ; CHECK-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
   1895 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1896   %q = load float, float* %ptr_a2
   1897   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1898   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   1899   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   1900   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   1901   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   1902   ret <4 x float> %res
   1903 }
   1904 
   1905 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   1906 ; CHECK-LABEL: test_mask_vfmadd128_ps_rmbza:
   1907 ; CHECK:       ## BB#0:
   1908 ; CHECK-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
   1909 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1910   %q = load float, float* %ptr_a2, align 4
   1911   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   1912   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   1913   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   1914   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   1915   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   1916   ret <4 x float> %res
   1917 }
   1918 
   1919 define <2 x double> @test_mask_vfmadd128_pd_r(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   1920 ; CHECK-LABEL: test_mask_vfmadd128_pd_r:
   1921 ; CHECK:       ## BB#0:
   1922 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1923 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0xc2]
   1924 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1925   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   1926   ret <2 x double> %res
   1927 }
   1928 
   1929 define <2 x double> @test_mask_vfmadd128_pd_rz(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   1930 ; CHECK-LABEL: test_mask_vfmadd128_pd_rz:
   1931 ; CHECK:       ## BB#0:
   1932 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0xc2]
   1933 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1934   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   1935   ret <2 x double> %res
   1936 }
   1937 
   1938 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
   1939 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmk:
   1940 ; CHECK:       ## BB#0:
   1941 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1942 ; CHECK-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
   1943 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1944   %a2 = load <2 x double>, <2 x double>* %ptr_a2
   1945   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   1946   ret <2 x double> %res
   1947 }
   1948 
   1949 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
   1950 ; CHECK-LABEL: test_mask_vfmadd128_pd_rmkz:
   1951 ; CHECK:       ## BB#0:
   1952 ; CHECK-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0xa8,0x07]
   1953 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1954   %a2 = load <2 x double>, <2 x double>* %ptr_a2
   1955   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   1956   ret <2 x double> %res
   1957 }
   1958 
   1959 define <4 x double> @test_mask_vfmadd256_pd_r(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   1960 ; CHECK-LABEL: test_mask_vfmadd256_pd_r:
   1961 ; CHECK:       ## BB#0:
   1962 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1963 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0xc2]
   1964 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1965   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   1966   ret <4 x double> %res
   1967 }
   1968 
   1969 define <4 x double> @test_mask_vfmadd256_pd_rz(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   1970 ; CHECK-LABEL: test_mask_vfmadd256_pd_rz:
   1971 ; CHECK:       ## BB#0:
   1972 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0xc2]
   1973 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1974   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   1975   ret <4 x double> %res
   1976 }
   1977 
   1978 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
   1979 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmk:
   1980 ; CHECK:       ## BB#0:
   1981 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1982 ; CHECK-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
   1983 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1984   %a2 = load <4 x double>, <4 x double>* %ptr_a2
   1985   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   1986   ret <4 x double> %res
   1987 }
   1988 
   1989 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
   1990 ; CHECK-LABEL: test_mask_vfmadd256_pd_rmkz:
   1991 ; CHECK:       ## BB#0:
   1992 ; CHECK-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0xa8,0x07]
   1993 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1994   %a2 = load <4 x double>, <4 x double>* %ptr_a2
   1995   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   1996   ret <4 x double> %res
   1997 }
   1998 define <8 x i16> @test_mask_add_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   1999 ; CHECK-LABEL: test_mask_add_epi16_rr_128:
   2000 ; CHECK:       ## BB#0:
   2001 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
   2002 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2003   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2004   ret <8 x i16> %res
   2005 }
   2006 
   2007 define <8 x i16> @test_mask_add_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   2008 ; CHECK-LABEL: test_mask_add_epi16_rrk_128:
   2009 ; CHECK:       ## BB#0:
   2010 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2011 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0xd1]
   2012 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2013 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2014   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2015   ret <8 x i16> %res
   2016 }
   2017 
   2018 define <8 x i16> @test_mask_add_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   2019 ; CHECK-LABEL: test_mask_add_epi16_rrkz_128:
   2020 ; CHECK:       ## BB#0:
   2021 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2022 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0xc1]
   2023 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2024   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2025   ret <8 x i16> %res
   2026 }
   2027 
   2028 define <8 x i16> @test_mask_add_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   2029 ; CHECK-LABEL: test_mask_add_epi16_rm_128:
   2030 ; CHECK:       ## BB#0:
   2031 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0x07]
   2032 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2033   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2034   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2035   ret <8 x i16> %res
   2036 }
   2037 
   2038 define <8 x i16> @test_mask_add_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2039 ; CHECK-LABEL: test_mask_add_epi16_rmk_128:
   2040 ; CHECK:       ## BB#0:
   2041 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2042 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xfd,0x0f]
   2043 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2044 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2045   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2046   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2047   ret <8 x i16> %res
   2048 }
   2049 
   2050 define <8 x i16> @test_mask_add_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   2051 ; CHECK-LABEL: test_mask_add_epi16_rmkz_128:
   2052 ; CHECK:       ## BB#0:
   2053 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2054 ; CHECK-NEXT:    vpaddw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xfd,0x07]
   2055 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2056   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2057   %res = call <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2058   ret <8 x i16> %res
   2059 }
   2060 
   2061 declare <8 x i16> @llvm.x86.avx512.mask.padd.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2062 
   2063 define <16 x i16> @test_mask_add_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   2064 ; CHECK-LABEL: test_mask_add_epi16_rr_256:
   2065 ; CHECK:       ## BB#0:
   2066 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc1]
   2067 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2068   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2069   ret <16 x i16> %res
   2070 }
   2071 
   2072 define <16 x i16> @test_mask_add_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   2073 ; CHECK-LABEL: test_mask_add_epi16_rrk_256:
   2074 ; CHECK:       ## BB#0:
   2075 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2076 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0xd1]
   2077 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2078 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2079   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2080   ret <16 x i16> %res
   2081 }
   2082 
   2083 define <16 x i16> @test_mask_add_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   2084 ; CHECK-LABEL: test_mask_add_epi16_rrkz_256:
   2085 ; CHECK:       ## BB#0:
   2086 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2087 ; CHECK-NEXT:    vpaddw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0xc1]
   2088 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2089   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2090   ret <16 x i16> %res
   2091 }
   2092 
   2093 define <16 x i16> @test_mask_add_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   2094 ; CHECK-LABEL: test_mask_add_epi16_rm_256:
   2095 ; CHECK:       ## BB#0:
   2096 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0x07]
   2097 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2098   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2099   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2100   ret <16 x i16> %res
   2101 }
   2102 
   2103 define <16 x i16> @test_mask_add_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2104 ; CHECK-LABEL: test_mask_add_epi16_rmk_256:
   2105 ; CHECK:       ## BB#0:
   2106 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2107 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xfd,0x0f]
   2108 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2109 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2110   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2111   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2112   ret <16 x i16> %res
   2113 }
   2114 
   2115 define <16 x i16> @test_mask_add_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   2116 ; CHECK-LABEL: test_mask_add_epi16_rmkz_256:
   2117 ; CHECK:       ## BB#0:
   2118 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2119 ; CHECK-NEXT:    vpaddw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xfd,0x07]
   2120 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2121   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2122   %res = call <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2123   ret <16 x i16> %res
   2124 }
   2125 
   2126 declare <16 x i16> @llvm.x86.avx512.mask.padd.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2127 
   2128 define <8 x i16> @test_mask_sub_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   2129 ; CHECK-LABEL: test_mask_sub_epi16_rr_128:
   2130 ; CHECK:       ## BB#0:
   2131 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0xc1]
   2132 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2133   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2134   ret <8 x i16> %res
   2135 }
   2136 
   2137 define <8 x i16> @test_mask_sub_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   2138 ; CHECK-LABEL: test_mask_sub_epi16_rrk_128:
   2139 ; CHECK:       ## BB#0:
   2140 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2141 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0xd1]
   2142 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2143 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2144   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2145   ret <8 x i16> %res
   2146 }
   2147 
   2148 define <8 x i16> @test_mask_sub_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   2149 ; CHECK-LABEL: test_mask_sub_epi16_rrkz_128:
   2150 ; CHECK:       ## BB#0:
   2151 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2152 ; CHECK-NEXT:    vpsubw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0xc1]
   2153 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2154   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2155   ret <8 x i16> %res
   2156 }
   2157 
   2158 define <8 x i16> @test_mask_sub_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   2159 ; CHECK-LABEL: test_mask_sub_epi16_rm_128:
   2160 ; CHECK:       ## BB#0:
   2161 ; CHECK-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf9,0x07]
   2162 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2163   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2164   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2165   ret <8 x i16> %res
   2166 }
   2167 
   2168 define <8 x i16> @test_mask_sub_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2169 ; CHECK-LABEL: test_mask_sub_epi16_rmk_128:
   2170 ; CHECK:       ## BB#0:
   2171 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2172 ; CHECK-NEXT:    vpsubw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf9,0x0f]
   2173 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2174 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2175   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2176   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2177   ret <8 x i16> %res
   2178 }
   2179 
   2180 define <8 x i16> @test_mask_sub_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   2181 ; CHECK-LABEL: test_mask_sub_epi16_rmkz_128:
   2182 ; CHECK:       ## BB#0:
   2183 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2184 ; CHECK-NEXT:    vpsubw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf9,0x07]
   2185 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2186   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2187   %res = call <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2188   ret <8 x i16> %res
   2189 }
   2190 
   2191 declare <8 x i16> @llvm.x86.avx512.mask.psub.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2192 
   2193 define <16 x i16> @test_mask_sub_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   2194 ; CHECK-LABEL: test_mask_sub_epi16_rr_256:
   2195 ; CHECK:       ## BB#0:
   2196 ; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0xc1]
   2197 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2198   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2199   ret <16 x i16> %res
   2200 }
   2201 
   2202 define <16 x i16> @test_mask_sub_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   2203 ; CHECK-LABEL: test_mask_sub_epi16_rrk_256:
   2204 ; CHECK:       ## BB#0:
   2205 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2206 ; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0xd1]
   2207 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2208 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2209   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2210   ret <16 x i16> %res
   2211 }
   2212 
   2213 define <16 x i16> @test_mask_sub_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   2214 ; CHECK-LABEL: test_mask_sub_epi16_rrkz_256:
   2215 ; CHECK:       ## BB#0:
   2216 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2217 ; CHECK-NEXT:    vpsubw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0xc1]
   2218 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2219   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2220   ret <16 x i16> %res
   2221 }
   2222 
   2223 define <16 x i16> @test_mask_sub_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   2224 ; CHECK-LABEL: test_mask_sub_epi16_rm_256:
   2225 ; CHECK:       ## BB#0:
   2226 ; CHECK-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf9,0x07]
   2227 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2228   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2229   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2230   ret <16 x i16> %res
   2231 }
   2232 
   2233 define <16 x i16> @test_mask_sub_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2234 ; CHECK-LABEL: test_mask_sub_epi16_rmk_256:
   2235 ; CHECK:       ## BB#0:
   2236 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2237 ; CHECK-NEXT:    vpsubw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf9,0x0f]
   2238 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2239 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2240   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2241   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2242   ret <16 x i16> %res
   2243 }
   2244 
   2245 define <16 x i16> @test_mask_sub_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   2246 ; CHECK-LABEL: test_mask_sub_epi16_rmkz_256:
   2247 ; CHECK:       ## BB#0:
   2248 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2249 ; CHECK-NEXT:    vpsubw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf9,0x07]
   2250 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2251   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2252   %res = call <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2253   ret <16 x i16> %res
   2254 }
   2255 
   2256 declare <16 x i16> @llvm.x86.avx512.mask.psub.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2257 
   2258 define <32 x i16> @test_mask_add_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   2259 ; CHECK-LABEL: test_mask_add_epi16_rr_512:
   2260 ; CHECK:       ## BB#0:
   2261 ; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc1]
   2262 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2263   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2264   ret <32 x i16> %res
   2265 }
   2266 
   2267 define <32 x i16> @test_mask_add_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   2268 ; CHECK-LABEL: test_mask_add_epi16_rrk_512:
   2269 ; CHECK:       ## BB#0:
   2270 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2271 ; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0xd1]
   2272 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2273 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2274   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2275   ret <32 x i16> %res
   2276 }
   2277 
   2278 define <32 x i16> @test_mask_add_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   2279 ; CHECK-LABEL: test_mask_add_epi16_rrkz_512:
   2280 ; CHECK:       ## BB#0:
   2281 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2282 ; CHECK-NEXT:    vpaddw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0xc1]
   2283 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2284   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2285   ret <32 x i16> %res
   2286 }
   2287 
   2288 define <32 x i16> @test_mask_add_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   2289 ; CHECK-LABEL: test_mask_add_epi16_rm_512:
   2290 ; CHECK:       ## BB#0:
   2291 ; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xfd,0x07]
   2292 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2293   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2294   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2295   ret <32 x i16> %res
   2296 }
   2297 
   2298 define <32 x i16> @test_mask_add_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   2299 ; CHECK-LABEL: test_mask_add_epi16_rmk_512:
   2300 ; CHECK:       ## BB#0:
   2301 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2302 ; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xfd,0x0f]
   2303 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2304 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2305   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2306   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2307   ret <32 x i16> %res
   2308 }
   2309 
   2310 define <32 x i16> @test_mask_add_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   2311 ; CHECK-LABEL: test_mask_add_epi16_rmkz_512:
   2312 ; CHECK:       ## BB#0:
   2313 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2314 ; CHECK-NEXT:    vpaddw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xfd,0x07]
   2315 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2316   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2317   %res = call <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2318   ret <32 x i16> %res
   2319 }
   2320 
   2321 declare <32 x i16> @llvm.x86.avx512.mask.padd.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2322 
   2323 define <32 x i16> @test_mask_sub_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   2324 ; CHECK-LABEL: test_mask_sub_epi16_rr_512:
   2325 ; CHECK:       ## BB#0:
   2326 ; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0xc1]
   2327 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2328   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2329   ret <32 x i16> %res
   2330 }
   2331 
   2332 define <32 x i16> @test_mask_sub_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   2333 ; CHECK-LABEL: test_mask_sub_epi16_rrk_512:
   2334 ; CHECK:       ## BB#0:
   2335 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2336 ; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0xd1]
   2337 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2338 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2339   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2340   ret <32 x i16> %res
   2341 }
   2342 
   2343 define <32 x i16> @test_mask_sub_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   2344 ; CHECK-LABEL: test_mask_sub_epi16_rrkz_512:
   2345 ; CHECK:       ## BB#0:
   2346 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2347 ; CHECK-NEXT:    vpsubw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0xc1]
   2348 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2349   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2350   ret <32 x i16> %res
   2351 }
   2352 
   2353 define <32 x i16> @test_mask_sub_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   2354 ; CHECK-LABEL: test_mask_sub_epi16_rm_512:
   2355 ; CHECK:       ## BB#0:
   2356 ; CHECK-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xf9,0x07]
   2357 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2358   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2359   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2360   ret <32 x i16> %res
   2361 }
   2362 
   2363 define <32 x i16> @test_mask_sub_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   2364 ; CHECK-LABEL: test_mask_sub_epi16_rmk_512:
   2365 ; CHECK:       ## BB#0:
   2366 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2367 ; CHECK-NEXT:    vpsubw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xf9,0x0f]
   2368 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2369 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2370   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2371   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2372   ret <32 x i16> %res
   2373 }
   2374 
   2375 define <32 x i16> @test_mask_sub_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   2376 ; CHECK-LABEL: test_mask_sub_epi16_rmkz_512:
   2377 ; CHECK:       ## BB#0:
   2378 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2379 ; CHECK-NEXT:    vpsubw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xf9,0x07]
   2380 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2381   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2382   %res = call <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2383   ret <32 x i16> %res
   2384 }
   2385 
   2386 declare <32 x i16> @llvm.x86.avx512.mask.psub.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2387 
   2388 define <32 x i16> @test_mask_mullo_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
   2389 ; CHECK-LABEL: test_mask_mullo_epi16_rr_512:
   2390 ; CHECK:       ## BB#0:
   2391 ; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0xc1]
   2392 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2393   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2394   ret <32 x i16> %res
   2395 }
   2396 
   2397 define <32 x i16> @test_mask_mullo_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
   2398 ; CHECK-LABEL: test_mask_mullo_epi16_rrk_512:
   2399 ; CHECK:       ## BB#0:
   2400 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2401 ; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0xd1]
   2402 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2403 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2404   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2405   ret <32 x i16> %res
   2406 }
   2407 
   2408 define <32 x i16> @test_mask_mullo_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
   2409 ; CHECK-LABEL: test_mask_mullo_epi16_rrkz_512:
   2410 ; CHECK:       ## BB#0:
   2411 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2412 ; CHECK-NEXT:    vpmullw %zmm1, %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0xc1]
   2413 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2414   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2415   ret <32 x i16> %res
   2416 }
   2417 
   2418 define <32 x i16> @test_mask_mullo_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
   2419 ; CHECK-LABEL: test_mask_mullo_epi16_rm_512:
   2420 ; CHECK:       ## BB#0:
   2421 ; CHECK-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 ## encoding: [0x62,0xf1,0x7d,0x48,0xd5,0x07]
   2422 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2423   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2424   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
   2425   ret <32 x i16> %res
   2426 }
   2427 
   2428 define <32 x i16> @test_mask_mullo_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
   2429 ; CHECK-LABEL: test_mask_mullo_epi16_rmk_512:
   2430 ; CHECK:       ## BB#0:
   2431 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2432 ; CHECK-NEXT:    vpmullw (%rdi), %zmm0, %zmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x49,0xd5,0x0f]
   2433 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2434 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2435   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2436   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
   2437   ret <32 x i16> %res
   2438 }
   2439 
   2440 define <32 x i16> @test_mask_mullo_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
   2441 ; CHECK-LABEL: test_mask_mullo_epi16_rmkz_512:
   2442 ; CHECK:       ## BB#0:
   2443 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2444 ; CHECK-NEXT:    vpmullw (%rdi), %zmm0, %zmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xc9,0xd5,0x07]
   2445 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2446   %b = load <32 x i16>, <32 x i16>* %ptr_b
   2447   %res = call <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
   2448   ret <32 x i16> %res
   2449 }
   2450 
   2451 declare <32 x i16> @llvm.x86.avx512.mask.pmull.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   2452 
   2453 define <8 x i16> @test_mask_mullo_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   2454 ; CHECK-LABEL: test_mask_mullo_epi16_rr_128:
   2455 ; CHECK:       ## BB#0:
   2456 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0xc1]
   2457 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2458   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2459   ret <8 x i16> %res
   2460 }
   2461 
   2462 define <8 x i16> @test_mask_mullo_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   2463 ; CHECK-LABEL: test_mask_mullo_epi16_rrk_128:
   2464 ; CHECK:       ## BB#0:
   2465 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2466 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0xd1]
   2467 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2468 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2469   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2470   ret <8 x i16> %res
   2471 }
   2472 
   2473 define <8 x i16> @test_mask_mullo_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   2474 ; CHECK-LABEL: test_mask_mullo_epi16_rrkz_128:
   2475 ; CHECK:       ## BB#0:
   2476 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2477 ; CHECK-NEXT:    vpmullw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0xc1]
   2478 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2479   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2480   ret <8 x i16> %res
   2481 }
   2482 
   2483 define <8 x i16> @test_mask_mullo_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   2484 ; CHECK-LABEL: test_mask_mullo_epi16_rm_128:
   2485 ; CHECK:       ## BB#0:
   2486 ; CHECK-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd5,0x07]
   2487 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2488   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2489   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   2490   ret <8 x i16> %res
   2491 }
   2492 
   2493 define <8 x i16> @test_mask_mullo_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2494 ; CHECK-LABEL: test_mask_mullo_epi16_rmk_128:
   2495 ; CHECK:       ## BB#0:
   2496 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2497 ; CHECK-NEXT:    vpmullw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd5,0x0f]
   2498 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2499 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2500   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2501   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   2502   ret <8 x i16> %res
   2503 }
   2504 
   2505 define <8 x i16> @test_mask_mullo_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   2506 ; CHECK-LABEL: test_mask_mullo_epi16_rmkz_128:
   2507 ; CHECK:       ## BB#0:
   2508 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2509 ; CHECK-NEXT:    vpmullw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd5,0x07]
   2510 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2511   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2512   %res = call <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   2513   ret <8 x i16> %res
   2514 }
   2515 
   2516 declare <8 x i16> @llvm.x86.avx512.mask.pmull.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   2517 
   2518 define <16 x i16> @test_mask_mullo_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   2519 ; CHECK-LABEL: test_mask_mullo_epi16_rr_256:
   2520 ; CHECK:       ## BB#0:
   2521 ; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0xc1]
   2522 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2523   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2524   ret <16 x i16> %res
   2525 }
   2526 
   2527 define <16 x i16> @test_mask_mullo_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   2528 ; CHECK-LABEL: test_mask_mullo_epi16_rrk_256:
   2529 ; CHECK:       ## BB#0:
   2530 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2531 ; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0xd1]
   2532 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2533 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2534   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2535   ret <16 x i16> %res
   2536 }
   2537 
   2538 define <16 x i16> @test_mask_mullo_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   2539 ; CHECK-LABEL: test_mask_mullo_epi16_rrkz_256:
   2540 ; CHECK:       ## BB#0:
   2541 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2542 ; CHECK-NEXT:    vpmullw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0xc1]
   2543 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2544   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2545   ret <16 x i16> %res
   2546 }
   2547 
   2548 define <16 x i16> @test_mask_mullo_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   2549 ; CHECK-LABEL: test_mask_mullo_epi16_rm_256:
   2550 ; CHECK:       ## BB#0:
   2551 ; CHECK-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd5,0x07]
   2552 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2553   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2554   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   2555   ret <16 x i16> %res
   2556 }
   2557 
   2558 define <16 x i16> @test_mask_mullo_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2559 ; CHECK-LABEL: test_mask_mullo_epi16_rmk_256:
   2560 ; CHECK:       ## BB#0:
   2561 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2562 ; CHECK-NEXT:    vpmullw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd5,0x0f]
   2563 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2564 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2565   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2566   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   2567   ret <16 x i16> %res
   2568 }
   2569 
   2570 define <16 x i16> @test_mask_mullo_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   2571 ; CHECK-LABEL: test_mask_mullo_epi16_rmkz_256:
   2572 ; CHECK:       ## BB#0:
   2573 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2574 ; CHECK-NEXT:    vpmullw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd5,0x07]
   2575 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2576   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2577   %res = call <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   2578   ret <16 x i16> %res
   2579 }
   2580 
   2581 declare <16 x i16> @llvm.x86.avx512.mask.pmull.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   2582 
   2583 
   2584 define <8 x i16> @test_mask_packs_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   2585 ; CHECK-LABEL: test_mask_packs_epi32_rr_128:
   2586 ; CHECK:       ## BB#0:
   2587 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0xc1]
   2588 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2589   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2590   ret <8 x i16> %res
   2591 }
   2592 
   2593 define <8 x i16> @test_mask_packs_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
   2594 ; CHECK-LABEL: test_mask_packs_epi32_rrk_128:
   2595 ; CHECK:       ## BB#0:
   2596 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2597 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0xd1]
   2598 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2599 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2600   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   2601   ret <8 x i16> %res
   2602 }
   2603 
   2604 define <8 x i16> @test_mask_packs_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   2605 ; CHECK-LABEL: test_mask_packs_epi32_rrkz_128:
   2606 ; CHECK:       ## BB#0:
   2607 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2608 ; CHECK-NEXT:    vpackssdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0xc1]
   2609 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2610   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   2611   ret <8 x i16> %res
   2612 }
   2613 
   2614 define <8 x i16> @test_mask_packs_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   2615 ; CHECK-LABEL: test_mask_packs_epi32_rm_128:
   2616 ; CHECK:       ## BB#0:
   2617 ; CHECK-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6b,0x07]
   2618 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2619   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2620   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2621   ret <8 x i16> %res
   2622 }
   2623 
   2624 define <8 x i16> @test_mask_packs_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2625 ; CHECK-LABEL: test_mask_packs_epi32_rmk_128:
   2626 ; CHECK:       ## BB#0:
   2627 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2628 ; CHECK-NEXT:    vpackssdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6b,0x0f]
   2629 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2630 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2631   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2632   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   2633   ret <8 x i16> %res
   2634 }
   2635 
   2636 define <8 x i16> @test_mask_packs_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   2637 ; CHECK-LABEL: test_mask_packs_epi32_rmkz_128:
   2638 ; CHECK:       ## BB#0:
   2639 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2640 ; CHECK-NEXT:    vpackssdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6b,0x07]
   2641 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2642   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2643   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   2644   ret <8 x i16> %res
   2645 }
   2646 
   2647 define <8 x i16> @test_mask_packs_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   2648 ; CHECK-LABEL: test_mask_packs_epi32_rmb_128:
   2649 ; CHECK:       ## BB#0:
   2650 ; CHECK-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0x6b,0x07]
   2651 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2652   %q = load i32, i32* %ptr_b
   2653   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2654   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2655   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2656   ret <8 x i16> %res
   2657 }
   2658 
   2659 define <8 x i16> @test_mask_packs_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2660 ; CHECK-LABEL: test_mask_packs_epi32_rmbk_128:
   2661 ; CHECK:       ## BB#0:
   2662 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2663 ; CHECK-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0x6b,0x0f]
   2664 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2665 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2666   %q = load i32, i32* %ptr_b
   2667   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2668   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2669   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   2670   ret <8 x i16> %res
   2671 }
   2672 
   2673 define <8 x i16> @test_mask_packs_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   2674 ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_128:
   2675 ; CHECK:       ## BB#0:
   2676 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2677 ; CHECK-NEXT:    vpackssdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0x6b,0x07]
   2678 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2679   %q = load i32, i32* %ptr_b
   2680   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2681   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2682   %res = call <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   2683   ret <8 x i16> %res
   2684 }
   2685 
   2686 declare <8 x i16> @llvm.x86.avx512.mask.packssdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
   2687 
   2688 define <16 x i16> @test_mask_packs_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   2689 ; CHECK-LABEL: test_mask_packs_epi32_rr_256:
   2690 ; CHECK:       ## BB#0:
   2691 ; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0xc1]
   2692 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2693   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   2694   ret <16 x i16> %res
   2695 }
   2696 
   2697 define <16 x i16> @test_mask_packs_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
   2698 ; CHECK-LABEL: test_mask_packs_epi32_rrk_256:
   2699 ; CHECK:       ## BB#0:
   2700 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2701 ; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0xd1]
   2702 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2703 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2704   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   2705   ret <16 x i16> %res
   2706 }
   2707 
   2708 define <16 x i16> @test_mask_packs_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
   2709 ; CHECK-LABEL: test_mask_packs_epi32_rrkz_256:
   2710 ; CHECK:       ## BB#0:
   2711 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2712 ; CHECK-NEXT:    vpackssdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0xc1]
   2713 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2714   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   2715   ret <16 x i16> %res
   2716 }
   2717 
   2718 define <16 x i16> @test_mask_packs_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   2719 ; CHECK-LABEL: test_mask_packs_epi32_rm_256:
   2720 ; CHECK:       ## BB#0:
   2721 ; CHECK-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6b,0x07]
   2722 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2723   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2724   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   2725   ret <16 x i16> %res
   2726 }
   2727 
   2728 define <16 x i16> @test_mask_packs_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2729 ; CHECK-LABEL: test_mask_packs_epi32_rmk_256:
   2730 ; CHECK:       ## BB#0:
   2731 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2732 ; CHECK-NEXT:    vpackssdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6b,0x0f]
   2733 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2734 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2735   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2736   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   2737   ret <16 x i16> %res
   2738 }
   2739 
   2740 define <16 x i16> @test_mask_packs_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
   2741 ; CHECK-LABEL: test_mask_packs_epi32_rmkz_256:
   2742 ; CHECK:       ## BB#0:
   2743 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2744 ; CHECK-NEXT:    vpackssdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6b,0x07]
   2745 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2746   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2747   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   2748   ret <16 x i16> %res
   2749 }
   2750 
   2751 define <16 x i16> @test_mask_packs_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   2752 ; CHECK-LABEL: test_mask_packs_epi32_rmb_256:
   2753 ; CHECK:       ## BB#0:
   2754 ; CHECK-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0x6b,0x07]
   2755 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2756   %q = load i32, i32* %ptr_b
   2757   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2758   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2759   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   2760   ret <16 x i16> %res
   2761 }
   2762 
   2763 define <16 x i16> @test_mask_packs_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   2764 ; CHECK-LABEL: test_mask_packs_epi32_rmbk_256:
   2765 ; CHECK:       ## BB#0:
   2766 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2767 ; CHECK-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0x6b,0x0f]
   2768 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2769 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2770   %q = load i32, i32* %ptr_b
   2771   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2772   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2773   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   2774   ret <16 x i16> %res
   2775 }
   2776 
   2777 define <16 x i16> @test_mask_packs_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
   2778 ; CHECK-LABEL: test_mask_packs_epi32_rmbkz_256:
   2779 ; CHECK:       ## BB#0:
   2780 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2781 ; CHECK-NEXT:    vpackssdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0x6b,0x07]
   2782 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2783   %q = load i32, i32* %ptr_b
   2784   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2785   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2786   %res = call <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   2787   ret <16 x i16> %res
   2788 }
   2789 
   2790 declare <16 x i16> @llvm.x86.avx512.mask.packssdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
   2791 
   2792 define <16 x i8> @test_mask_packs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   2793 ; CHECK-LABEL: test_mask_packs_epi16_rr_128:
   2794 ; CHECK:       ## BB#0:
   2795 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0xc1]
   2796 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2797   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   2798   ret <16 x i8> %res
   2799 }
   2800 
   2801 define <16 x i8> @test_mask_packs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
   2802 ; CHECK-LABEL: test_mask_packs_epi16_rrk_128:
   2803 ; CHECK:       ## BB#0:
   2804 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2805 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0xd1]
   2806 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2807 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2808   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   2809   ret <16 x i8> %res
   2810 }
   2811 
   2812 define <16 x i8> @test_mask_packs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
   2813 ; CHECK-LABEL: test_mask_packs_epi16_rrkz_128:
   2814 ; CHECK:       ## BB#0:
   2815 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2816 ; CHECK-NEXT:    vpacksswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0xc1]
   2817 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2818   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   2819   ret <16 x i8> %res
   2820 }
   2821 
   2822 define <16 x i8> @test_mask_packs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   2823 ; CHECK-LABEL: test_mask_packs_epi16_rm_128:
   2824 ; CHECK:       ## BB#0:
   2825 ; CHECK-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x63,0x07]
   2826 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2827   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2828   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   2829   ret <16 x i8> %res
   2830 }
   2831 
   2832 define <16 x i8> @test_mask_packs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   2833 ; CHECK-LABEL: test_mask_packs_epi16_rmk_128:
   2834 ; CHECK:       ## BB#0:
   2835 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2836 ; CHECK-NEXT:    vpacksswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x63,0x0f]
   2837 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2838 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2839   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2840   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   2841   ret <16 x i8> %res
   2842 }
   2843 
   2844 define <16 x i8> @test_mask_packs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
   2845 ; CHECK-LABEL: test_mask_packs_epi16_rmkz_128:
   2846 ; CHECK:       ## BB#0:
   2847 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2848 ; CHECK-NEXT:    vpacksswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x63,0x07]
   2849 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2850   %b = load <8 x i16>, <8 x i16>* %ptr_b
   2851   %res = call <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   2852   ret <16 x i8> %res
   2853 }
   2854 
   2855 declare <16 x i8> @llvm.x86.avx512.mask.packsswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
   2856 
   2857 define <32 x i8> @test_mask_packs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   2858 ; CHECK-LABEL: test_mask_packs_epi16_rr_256:
   2859 ; CHECK:       ## BB#0:
   2860 ; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0xc1]
   2861 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2862   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   2863   ret <32 x i8> %res
   2864 }
   2865 
   2866 define <32 x i8> @test_mask_packs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
   2867 ; CHECK-LABEL: test_mask_packs_epi16_rrk_256:
   2868 ; CHECK:       ## BB#0:
   2869 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2870 ; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0xd1]
   2871 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2872 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2873   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   2874   ret <32 x i8> %res
   2875 }
   2876 
   2877 define <32 x i8> @test_mask_packs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
   2878 ; CHECK-LABEL: test_mask_packs_epi16_rrkz_256:
   2879 ; CHECK:       ## BB#0:
   2880 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   2881 ; CHECK-NEXT:    vpacksswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x63,0xc1]
   2882 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2883   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   2884   ret <32 x i8> %res
   2885 }
   2886 
   2887 define <32 x i8> @test_mask_packs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   2888 ; CHECK-LABEL: test_mask_packs_epi16_rm_256:
   2889 ; CHECK:       ## BB#0:
   2890 ; CHECK-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x63,0x07]
   2891 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2892   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2893   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   2894   ret <32 x i8> %res
   2895 }
   2896 
   2897 define <32 x i8> @test_mask_packs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   2898 ; CHECK-LABEL: test_mask_packs_epi16_rmk_256:
   2899 ; CHECK:       ## BB#0:
   2900 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2901 ; CHECK-NEXT:    vpacksswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x63,0x0f]
   2902 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2903 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2904   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2905   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   2906   ret <32 x i8> %res
   2907 }
   2908 
   2909 define <32 x i8> @test_mask_packs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
   2910 ; CHECK-LABEL: test_mask_packs_epi16_rmkz_256:
   2911 ; CHECK:       ## BB#0:
   2912 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   2913 ; CHECK-NEXT:    vpacksswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x63,0x07]
   2914 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2915   %b = load <16 x i16>, <16 x i16>* %ptr_b
   2916   %res = call <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   2917   ret <32 x i8> %res
   2918 }
   2919 
   2920 declare <32 x i8> @llvm.x86.avx512.mask.packsswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
   2921 
   2922 
   2923 define <8 x i16> @test_mask_packus_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   2924 ; CHECK-LABEL: test_mask_packus_epi32_rr_128:
   2925 ; CHECK:       ## BB#0:
   2926 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0xc1]
   2927 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2928   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2929   ret <8 x i16> %res
   2930 }
   2931 
   2932 define <8 x i16> @test_mask_packus_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask) {
   2933 ; CHECK-LABEL: test_mask_packus_epi32_rrk_128:
   2934 ; CHECK:       ## BB#0:
   2935 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2936 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0xd1]
   2937 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2938 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2939   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   2940   ret <8 x i16> %res
   2941 }
   2942 
   2943 define <8 x i16> @test_mask_packus_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   2944 ; CHECK-LABEL: test_mask_packus_epi32_rrkz_128:
   2945 ; CHECK:       ## BB#0:
   2946 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2947 ; CHECK-NEXT:    vpackusdw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0xc1]
   2948 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2949   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   2950   ret <8 x i16> %res
   2951 }
   2952 
   2953 define <8 x i16> @test_mask_packus_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   2954 ; CHECK-LABEL: test_mask_packus_epi32_rm_128:
   2955 ; CHECK:       ## BB#0:
   2956 ; CHECK-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x2b,0x07]
   2957 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2958   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2959   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2960   ret <8 x i16> %res
   2961 }
   2962 
   2963 define <8 x i16> @test_mask_packus_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2964 ; CHECK-LABEL: test_mask_packus_epi32_rmk_128:
   2965 ; CHECK:       ## BB#0:
   2966 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2967 ; CHECK-NEXT:    vpackusdw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x2b,0x0f]
   2968 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2969 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2970   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2971   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   2972   ret <8 x i16> %res
   2973 }
   2974 
   2975 define <8 x i16> @test_mask_packus_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   2976 ; CHECK-LABEL: test_mask_packus_epi32_rmkz_128:
   2977 ; CHECK:       ## BB#0:
   2978 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2979 ; CHECK-NEXT:    vpackusdw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x2b,0x07]
   2980 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2981   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2982   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   2983   ret <8 x i16> %res
   2984 }
   2985 
   2986 define <8 x i16> @test_mask_packus_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   2987 ; CHECK-LABEL: test_mask_packus_epi32_rmb_128:
   2988 ; CHECK:       ## BB#0:
   2989 ; CHECK-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x18,0x2b,0x07]
   2990 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2991   %q = load i32, i32* %ptr_b
   2992   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2993   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2994   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 -1)
   2995   ret <8 x i16> %res
   2996 }
   2997 
   2998 define <8 x i16> @test_mask_packus_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   2999 ; CHECK-LABEL: test_mask_packus_epi32_rmbk_128:
   3000 ; CHECK:       ## BB#0:
   3001 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3002 ; CHECK-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x19,0x2b,0x0f]
   3003 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3004 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3005   %q = load i32, i32* %ptr_b
   3006   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3007   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3008   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> %passThru, i8 %mask)
   3009   ret <8 x i16> %res
   3010 }
   3011 
   3012 define <8 x i16> @test_mask_packus_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   3013 ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_128:
   3014 ; CHECK:       ## BB#0:
   3015 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3016 ; CHECK-NEXT:    vpackusdw (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x99,0x2b,0x07]
   3017 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3018   %q = load i32, i32* %ptr_b
   3019   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3020   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3021   %res = call <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32> %a, <4 x i32> %b, <8 x i16> zeroinitializer, i8 %mask)
   3022   ret <8 x i16> %res
   3023 }
   3024 
   3025 declare <8 x i16> @llvm.x86.avx512.mask.packusdw.128(<4 x i32>, <4 x i32>, <8 x i16>, i8)
   3026 
   3027 define <16 x i16> @test_mask_packus_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   3028 ; CHECK-LABEL: test_mask_packus_epi32_rr_256:
   3029 ; CHECK:       ## BB#0:
   3030 ; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0xc1]
   3031 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3032   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3033   ret <16 x i16> %res
   3034 }
   3035 
   3036 define <16 x i16> @test_mask_packus_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask) {
   3037 ; CHECK-LABEL: test_mask_packus_epi32_rrk_256:
   3038 ; CHECK:       ## BB#0:
   3039 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3040 ; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0xd1]
   3041 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3042 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3043   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3044   ret <16 x i16> %res
   3045 }
   3046 
   3047 define <16 x i16> @test_mask_packus_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i16 %mask) {
   3048 ; CHECK-LABEL: test_mask_packus_epi32_rrkz_256:
   3049 ; CHECK:       ## BB#0:
   3050 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3051 ; CHECK-NEXT:    vpackusdw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0xc1]
   3052 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3053   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3054   ret <16 x i16> %res
   3055 }
   3056 
   3057 define <16 x i16> @test_mask_packus_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3058 ; CHECK-LABEL: test_mask_packus_epi32_rm_256:
   3059 ; CHECK:       ## BB#0:
   3060 ; CHECK-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x2b,0x07]
   3061 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3062   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3063   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3064   ret <16 x i16> %res
   3065 }
   3066 
   3067 define <16 x i16> @test_mask_packus_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3068 ; CHECK-LABEL: test_mask_packus_epi32_rmk_256:
   3069 ; CHECK:       ## BB#0:
   3070 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3071 ; CHECK-NEXT:    vpackusdw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x2b,0x0f]
   3072 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3073 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3074   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3075   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3076   ret <16 x i16> %res
   3077 }
   3078 
   3079 define <16 x i16> @test_mask_packus_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i16 %mask) {
   3080 ; CHECK-LABEL: test_mask_packus_epi32_rmkz_256:
   3081 ; CHECK:       ## BB#0:
   3082 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3083 ; CHECK-NEXT:    vpackusdw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x2b,0x07]
   3084 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3085   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3086   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3087   ret <16 x i16> %res
   3088 }
   3089 
   3090 define <16 x i16> @test_mask_packus_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   3091 ; CHECK-LABEL: test_mask_packus_epi32_rmb_256:
   3092 ; CHECK:       ## BB#0:
   3093 ; CHECK-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x38,0x2b,0x07]
   3094 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3095   %q = load i32, i32* %ptr_b
   3096   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3097   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3098   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 -1)
   3099   ret <16 x i16> %res
   3100 }
   3101 
   3102 define <16 x i16> @test_mask_packus_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3103 ; CHECK-LABEL: test_mask_packus_epi32_rmbk_256:
   3104 ; CHECK:       ## BB#0:
   3105 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3106 ; CHECK-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x39,0x2b,0x0f]
   3107 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3108 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3109   %q = load i32, i32* %ptr_b
   3110   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3111   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3112   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> %passThru, i16 %mask)
   3113   ret <16 x i16> %res
   3114 }
   3115 
   3116 define <16 x i16> @test_mask_packus_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i16 %mask) {
   3117 ; CHECK-LABEL: test_mask_packus_epi32_rmbkz_256:
   3118 ; CHECK:       ## BB#0:
   3119 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3120 ; CHECK-NEXT:    vpackusdw (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xb9,0x2b,0x07]
   3121 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3122   %q = load i32, i32* %ptr_b
   3123   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3124   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3125   %res = call <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32> %a, <8 x i32> %b, <16 x i16> zeroinitializer, i16 %mask)
   3126   ret <16 x i16> %res
   3127 }
   3128 
   3129 declare <16 x i16> @llvm.x86.avx512.mask.packusdw.256(<8 x i32>, <8 x i32>, <16 x i16>, i16)
   3130 
   3131 define <16 x i8> @test_mask_packus_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3132 ; CHECK-LABEL: test_mask_packus_epi16_rr_128:
   3133 ; CHECK:       ## BB#0:
   3134 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0xc1]
   3135 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3136   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   3137   ret <16 x i8> %res
   3138 }
   3139 
   3140 define <16 x i8> @test_mask_packus_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask) {
   3141 ; CHECK-LABEL: test_mask_packus_epi16_rrk_128:
   3142 ; CHECK:       ## BB#0:
   3143 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3144 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0xd1]
   3145 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3146 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3147   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   3148   ret <16 x i8> %res
   3149 }
   3150 
   3151 define <16 x i8> @test_mask_packus_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i16 %mask) {
   3152 ; CHECK-LABEL: test_mask_packus_epi16_rrkz_128:
   3153 ; CHECK:       ## BB#0:
   3154 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3155 ; CHECK-NEXT:    vpackuswb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0xc1]
   3156 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3157   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   3158   ret <16 x i8> %res
   3159 }
   3160 
   3161 define <16 x i8> @test_mask_packus_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3162 ; CHECK-LABEL: test_mask_packus_epi16_rm_128:
   3163 ; CHECK:       ## BB#0:
   3164 ; CHECK-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x67,0x07]
   3165 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3166   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3167   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 -1)
   3168   ret <16 x i8> %res
   3169 }
   3170 
   3171 define <16 x i8> @test_mask_packus_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   3172 ; CHECK-LABEL: test_mask_packus_epi16_rmk_128:
   3173 ; CHECK:       ## BB#0:
   3174 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3175 ; CHECK-NEXT:    vpackuswb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x67,0x0f]
   3176 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3177 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3178   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3179   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> %passThru, i16 %mask)
   3180   ret <16 x i8> %res
   3181 }
   3182 
   3183 define <16 x i8> @test_mask_packus_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i16 %mask) {
   3184 ; CHECK-LABEL: test_mask_packus_epi16_rmkz_128:
   3185 ; CHECK:       ## BB#0:
   3186 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3187 ; CHECK-NEXT:    vpackuswb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x67,0x07]
   3188 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3189   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3190   %res = call <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16> %a, <8 x i16> %b, <16 x i8> zeroinitializer, i16 %mask)
   3191   ret <16 x i8> %res
   3192 }
   3193 
   3194 declare <16 x i8> @llvm.x86.avx512.mask.packuswb.128(<8 x i16>, <8 x i16>, <16 x i8>, i16)
   3195 
   3196 define <32 x i8> @test_mask_packus_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3197 ; CHECK-LABEL: test_mask_packus_epi16_rr_256:
   3198 ; CHECK:       ## BB#0:
   3199 ; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0xc1]
   3200 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3201   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   3202   ret <32 x i8> %res
   3203 }
   3204 
   3205 define <32 x i8> @test_mask_packus_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask) {
   3206 ; CHECK-LABEL: test_mask_packus_epi16_rrk_256:
   3207 ; CHECK:       ## BB#0:
   3208 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   3209 ; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0xd1]
   3210 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3211 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3212   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   3213   ret <32 x i8> %res
   3214 }
   3215 
   3216 define <32 x i8> @test_mask_packus_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i32 %mask) {
   3217 ; CHECK-LABEL: test_mask_packus_epi16_rrkz_256:
   3218 ; CHECK:       ## BB#0:
   3219 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   3220 ; CHECK-NEXT:    vpackuswb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x67,0xc1]
   3221 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3222   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   3223   ret <32 x i8> %res
   3224 }
   3225 
   3226 define <32 x i8> @test_mask_packus_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3227 ; CHECK-LABEL: test_mask_packus_epi16_rm_256:
   3228 ; CHECK:       ## BB#0:
   3229 ; CHECK-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x67,0x07]
   3230 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3231   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3232   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 -1)
   3233   ret <32 x i8> %res
   3234 }
   3235 
   3236 define <32 x i8> @test_mask_packus_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   3237 ; CHECK-LABEL: test_mask_packus_epi16_rmk_256:
   3238 ; CHECK:       ## BB#0:
   3239 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   3240 ; CHECK-NEXT:    vpackuswb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x67,0x0f]
   3241 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3242 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3243   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3244   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> %passThru, i32 %mask)
   3245   ret <32 x i8> %res
   3246 }
   3247 
   3248 define <32 x i8> @test_mask_packus_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i32 %mask) {
   3249 ; CHECK-LABEL: test_mask_packus_epi16_rmkz_256:
   3250 ; CHECK:       ## BB#0:
   3251 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   3252 ; CHECK-NEXT:    vpackuswb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x67,0x07]
   3253 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3254   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3255   %res = call <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16> %a, <16 x i16> %b, <32 x i8> zeroinitializer, i32 %mask)
   3256   ret <32 x i8> %res
   3257 }
   3258 
   3259 declare <32 x i8> @llvm.x86.avx512.mask.packuswb.256(<16 x i16>, <16 x i16>, <32 x i8>, i32)
   3260 
   3261 define <8 x i16> @test_mask_adds_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3262 ; CHECK-LABEL: test_mask_adds_epi16_rr_128:
   3263 ; CHECK:       ## BB#0:
   3264 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0xc1]
   3265 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3266   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3267   ret <8 x i16> %res
   3268 }
   3269 
   3270 define <8 x i16> @test_mask_adds_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   3271 ; CHECK-LABEL: test_mask_adds_epi16_rrk_128:
   3272 ; CHECK:       ## BB#0:
   3273 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3274 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0xd1]
   3275 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3276 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3277   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3278   ret <8 x i16> %res
   3279 }
   3280 
   3281 define <8 x i16> @test_mask_adds_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   3282 ; CHECK-LABEL: test_mask_adds_epi16_rrkz_128:
   3283 ; CHECK:       ## BB#0:
   3284 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3285 ; CHECK-NEXT:    vpaddsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0xc1]
   3286 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3287   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3288   ret <8 x i16> %res
   3289 }
   3290 
   3291 define <8 x i16> @test_mask_adds_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3292 ; CHECK-LABEL: test_mask_adds_epi16_rm_128:
   3293 ; CHECK:       ## BB#0:
   3294 ; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xed,0x07]
   3295 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3296   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3297   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3298   ret <8 x i16> %res
   3299 }
   3300 
   3301 define <8 x i16> @test_mask_adds_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3302 ; CHECK-LABEL: test_mask_adds_epi16_rmk_128:
   3303 ; CHECK:       ## BB#0:
   3304 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3305 ; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xed,0x0f]
   3306 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3307 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3308   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3309   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3310   ret <8 x i16> %res
   3311 }
   3312 
   3313 define <8 x i16> @test_mask_adds_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   3314 ; CHECK-LABEL: test_mask_adds_epi16_rmkz_128:
   3315 ; CHECK:       ## BB#0:
   3316 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3317 ; CHECK-NEXT:    vpaddsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xed,0x07]
   3318 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3319   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3320   %res = call <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3321   ret <8 x i16> %res
   3322 }
   3323 
   3324 declare <8 x i16> @llvm.x86.avx512.mask.padds.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   3325 
   3326 define <16 x i16> @test_mask_adds_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3327 ; CHECK-LABEL: test_mask_adds_epi16_rr_256:
   3328 ; CHECK:       ## BB#0:
   3329 ; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0xc1]
   3330 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3331   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3332   ret <16 x i16> %res
   3333 }
   3334 
   3335 define <16 x i16> @test_mask_adds_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   3336 ; CHECK-LABEL: test_mask_adds_epi16_rrk_256:
   3337 ; CHECK:       ## BB#0:
   3338 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3339 ; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0xd1]
   3340 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3341 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3342   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3343   ret <16 x i16> %res
   3344 }
   3345 
   3346 define <16 x i16> @test_mask_adds_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   3347 ; CHECK-LABEL: test_mask_adds_epi16_rrkz_256:
   3348 ; CHECK:       ## BB#0:
   3349 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3350 ; CHECK-NEXT:    vpaddsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0xc1]
   3351 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3352   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3353   ret <16 x i16> %res
   3354 }
   3355 
   3356 define <16 x i16> @test_mask_adds_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3357 ; CHECK-LABEL: test_mask_adds_epi16_rm_256:
   3358 ; CHECK:       ## BB#0:
   3359 ; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xed,0x07]
   3360 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3361   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3362   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3363   ret <16 x i16> %res
   3364 }
   3365 
   3366 define <16 x i16> @test_mask_adds_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3367 ; CHECK-LABEL: test_mask_adds_epi16_rmk_256:
   3368 ; CHECK:       ## BB#0:
   3369 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3370 ; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xed,0x0f]
   3371 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3372 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3373   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3374   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3375   ret <16 x i16> %res
   3376 }
   3377 
   3378 define <16 x i16> @test_mask_adds_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   3379 ; CHECK-LABEL: test_mask_adds_epi16_rmkz_256:
   3380 ; CHECK:       ## BB#0:
   3381 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3382 ; CHECK-NEXT:    vpaddsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xed,0x07]
   3383 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3384   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3385   %res = call <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3386   ret <16 x i16> %res
   3387 }
   3388 
   3389 declare <16 x i16> @llvm.x86.avx512.mask.padds.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   3390 
   3391 define <8 x i16> @test_mask_subs_epi16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3392 ; CHECK-LABEL: test_mask_subs_epi16_rr_128:
   3393 ; CHECK:       ## BB#0:
   3394 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0xc1]
   3395 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3396   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3397   ret <8 x i16> %res
   3398 }
   3399 
   3400 define <8 x i16> @test_mask_subs_epi16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   3401 ; CHECK-LABEL: test_mask_subs_epi16_rrk_128:
   3402 ; CHECK:       ## BB#0:
   3403 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3404 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0xd1]
   3405 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3406 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3407   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3408   ret <8 x i16> %res
   3409 }
   3410 
   3411 define <8 x i16> @test_mask_subs_epi16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   3412 ; CHECK-LABEL: test_mask_subs_epi16_rrkz_128:
   3413 ; CHECK:       ## BB#0:
   3414 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3415 ; CHECK-NEXT:    vpsubsw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0xc1]
   3416 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3417   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3418   ret <8 x i16> %res
   3419 }
   3420 
   3421 define <8 x i16> @test_mask_subs_epi16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3422 ; CHECK-LABEL: test_mask_subs_epi16_rm_128:
   3423 ; CHECK:       ## BB#0:
   3424 ; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe9,0x07]
   3425 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3426   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3427   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3428   ret <8 x i16> %res
   3429 }
   3430 
   3431 define <8 x i16> @test_mask_subs_epi16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3432 ; CHECK-LABEL: test_mask_subs_epi16_rmk_128:
   3433 ; CHECK:       ## BB#0:
   3434 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3435 ; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe9,0x0f]
   3436 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3437 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3438   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3439   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3440   ret <8 x i16> %res
   3441 }
   3442 
   3443 define <8 x i16> @test_mask_subs_epi16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   3444 ; CHECK-LABEL: test_mask_subs_epi16_rmkz_128:
   3445 ; CHECK:       ## BB#0:
   3446 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3447 ; CHECK-NEXT:    vpsubsw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe9,0x07]
   3448 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3449   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3450   %res = call <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3451   ret <8 x i16> %res
   3452 }
   3453 
   3454 declare <8 x i16> @llvm.x86.avx512.mask.psubs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   3455 
   3456 define <16 x i16> @test_mask_subs_epi16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3457 ; CHECK-LABEL: test_mask_subs_epi16_rr_256:
   3458 ; CHECK:       ## BB#0:
   3459 ; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0xc1]
   3460 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3461   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3462   ret <16 x i16> %res
   3463 }
   3464 
   3465 define <16 x i16> @test_mask_subs_epi16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   3466 ; CHECK-LABEL: test_mask_subs_epi16_rrk_256:
   3467 ; CHECK:       ## BB#0:
   3468 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3469 ; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0xd1]
   3470 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3471 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3472   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3473   ret <16 x i16> %res
   3474 }
   3475 
   3476 define <16 x i16> @test_mask_subs_epi16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   3477 ; CHECK-LABEL: test_mask_subs_epi16_rrkz_256:
   3478 ; CHECK:       ## BB#0:
   3479 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3480 ; CHECK-NEXT:    vpsubsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0xc1]
   3481 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3482   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3483   ret <16 x i16> %res
   3484 }
   3485 
   3486 define <16 x i16> @test_mask_subs_epi16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3487 ; CHECK-LABEL: test_mask_subs_epi16_rm_256:
   3488 ; CHECK:       ## BB#0:
   3489 ; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe9,0x07]
   3490 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3491   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3492   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3493   ret <16 x i16> %res
   3494 }
   3495 
   3496 define <16 x i16> @test_mask_subs_epi16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3497 ; CHECK-LABEL: test_mask_subs_epi16_rmk_256:
   3498 ; CHECK:       ## BB#0:
   3499 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3500 ; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe9,0x0f]
   3501 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3502 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3503   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3504   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3505   ret <16 x i16> %res
   3506 }
   3507 
   3508 define <16 x i16> @test_mask_subs_epi16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   3509 ; CHECK-LABEL: test_mask_subs_epi16_rmkz_256:
   3510 ; CHECK:       ## BB#0:
   3511 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3512 ; CHECK-NEXT:    vpsubsw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe9,0x07]
   3513 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3514   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3515   %res = call <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3516   ret <16 x i16> %res
   3517 }
   3518 
   3519 declare <16 x i16> @llvm.x86.avx512.mask.psubs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   3520 
   3521 define <8 x i16> @test_mask_adds_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3522 ; CHECK-LABEL: test_mask_adds_epu16_rr_128:
   3523 ; CHECK:       ## BB#0:
   3524 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0xc1]
   3525 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3526   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3527   ret <8 x i16> %res
   3528 }
   3529 
   3530 define <8 x i16> @test_mask_adds_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   3531 ; CHECK-LABEL: test_mask_adds_epu16_rrk_128:
   3532 ; CHECK:       ## BB#0:
   3533 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3534 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0xd1]
   3535 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3536 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3537   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3538   ret <8 x i16> %res
   3539 }
   3540 
   3541 define <8 x i16> @test_mask_adds_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   3542 ; CHECK-LABEL: test_mask_adds_epu16_rrkz_128:
   3543 ; CHECK:       ## BB#0:
   3544 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3545 ; CHECK-NEXT:    vpaddusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0xc1]
   3546 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3547   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3548   ret <8 x i16> %res
   3549 }
   3550 
   3551 define <8 x i16> @test_mask_adds_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3552 ; CHECK-LABEL: test_mask_adds_epu16_rm_128:
   3553 ; CHECK:       ## BB#0:
   3554 ; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdd,0x07]
   3555 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3556   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3557   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3558   ret <8 x i16> %res
   3559 }
   3560 
   3561 define <8 x i16> @test_mask_adds_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3562 ; CHECK-LABEL: test_mask_adds_epu16_rmk_128:
   3563 ; CHECK:       ## BB#0:
   3564 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3565 ; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdd,0x0f]
   3566 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3567 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3568   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3569   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3570   ret <8 x i16> %res
   3571 }
   3572 
   3573 define <8 x i16> @test_mask_adds_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   3574 ; CHECK-LABEL: test_mask_adds_epu16_rmkz_128:
   3575 ; CHECK:       ## BB#0:
   3576 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3577 ; CHECK-NEXT:    vpaddusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdd,0x07]
   3578 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3579   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3580   %res = call <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3581   ret <8 x i16> %res
   3582 }
   3583 
   3584 declare <8 x i16> @llvm.x86.avx512.mask.paddus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   3585 
   3586 define <16 x i16> @test_mask_adds_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3587 ; CHECK-LABEL: test_mask_adds_epu16_rr_256:
   3588 ; CHECK:       ## BB#0:
   3589 ; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0xc1]
   3590 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3591   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3592   ret <16 x i16> %res
   3593 }
   3594 
   3595 define <16 x i16> @test_mask_adds_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   3596 ; CHECK-LABEL: test_mask_adds_epu16_rrk_256:
   3597 ; CHECK:       ## BB#0:
   3598 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3599 ; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0xd1]
   3600 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3601 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3602   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3603   ret <16 x i16> %res
   3604 }
   3605 
   3606 define <16 x i16> @test_mask_adds_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   3607 ; CHECK-LABEL: test_mask_adds_epu16_rrkz_256:
   3608 ; CHECK:       ## BB#0:
   3609 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3610 ; CHECK-NEXT:    vpaddusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0xc1]
   3611 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3612   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3613   ret <16 x i16> %res
   3614 }
   3615 
   3616 define <16 x i16> @test_mask_adds_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3617 ; CHECK-LABEL: test_mask_adds_epu16_rm_256:
   3618 ; CHECK:       ## BB#0:
   3619 ; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdd,0x07]
   3620 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3621   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3622   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3623   ret <16 x i16> %res
   3624 }
   3625 
   3626 define <16 x i16> @test_mask_adds_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3627 ; CHECK-LABEL: test_mask_adds_epu16_rmk_256:
   3628 ; CHECK:       ## BB#0:
   3629 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3630 ; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdd,0x0f]
   3631 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3632 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3633   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3634   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3635   ret <16 x i16> %res
   3636 }
   3637 
   3638 define <16 x i16> @test_mask_adds_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   3639 ; CHECK-LABEL: test_mask_adds_epu16_rmkz_256:
   3640 ; CHECK:       ## BB#0:
   3641 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3642 ; CHECK-NEXT:    vpaddusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdd,0x07]
   3643 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3644   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3645   %res = call <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3646   ret <16 x i16> %res
   3647 }
   3648 
   3649 declare <16 x i16> @llvm.x86.avx512.mask.paddus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   3650 
   3651 define <8 x i16> @test_mask_subs_epu16_rr_128(<8 x i16> %a, <8 x i16> %b) {
   3652 ; CHECK-LABEL: test_mask_subs_epu16_rr_128:
   3653 ; CHECK:       ## BB#0:
   3654 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0xc1]
   3655 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3656   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3657   ret <8 x i16> %res
   3658 }
   3659 
   3660 define <8 x i16> @test_mask_subs_epu16_rrk_128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask) {
   3661 ; CHECK-LABEL: test_mask_subs_epu16_rrk_128:
   3662 ; CHECK:       ## BB#0:
   3663 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3664 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0xd1]
   3665 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3666 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3667   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3668   ret <8 x i16> %res
   3669 }
   3670 
   3671 define <8 x i16> @test_mask_subs_epu16_rrkz_128(<8 x i16> %a, <8 x i16> %b, i8 %mask) {
   3672 ; CHECK-LABEL: test_mask_subs_epu16_rrkz_128:
   3673 ; CHECK:       ## BB#0:
   3674 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3675 ; CHECK-NEXT:    vpsubusw %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0xc1]
   3676 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3677   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3678   ret <8 x i16> %res
   3679 }
   3680 
   3681 define <8 x i16> @test_mask_subs_epu16_rm_128(<8 x i16> %a, <8 x i16>* %ptr_b) {
   3682 ; CHECK-LABEL: test_mask_subs_epu16_rm_128:
   3683 ; CHECK:       ## BB#0:
   3684 ; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd9,0x07]
   3685 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3686   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3687   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 -1)
   3688   ret <8 x i16> %res
   3689 }
   3690 
   3691 define <8 x i16> @test_mask_subs_epu16_rmk_128(<8 x i16> %a, <8 x i16>* %ptr_b, <8 x i16> %passThru, i8 %mask) {
   3692 ; CHECK-LABEL: test_mask_subs_epu16_rmk_128:
   3693 ; CHECK:       ## BB#0:
   3694 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3695 ; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd9,0x0f]
   3696 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3697 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3698   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3699   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> %passThru, i8 %mask)
   3700   ret <8 x i16> %res
   3701 }
   3702 
   3703 define <8 x i16> @test_mask_subs_epu16_rmkz_128(<8 x i16> %a, <8 x i16>* %ptr_b, i8 %mask) {
   3704 ; CHECK-LABEL: test_mask_subs_epu16_rmkz_128:
   3705 ; CHECK:       ## BB#0:
   3706 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3707 ; CHECK-NEXT:    vpsubusw (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd9,0x07]
   3708 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3709   %b = load <8 x i16>, <8 x i16>* %ptr_b
   3710   %res = call <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16> %a, <8 x i16> %b, <8 x i16> zeroinitializer, i8 %mask)
   3711   ret <8 x i16> %res
   3712 }
   3713 
   3714 declare <8 x i16> @llvm.x86.avx512.mask.psubus.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   3715 
   3716 define <16 x i16> @test_mask_subs_epu16_rr_256(<16 x i16> %a, <16 x i16> %b) {
   3717 ; CHECK-LABEL: test_mask_subs_epu16_rr_256:
   3718 ; CHECK:       ## BB#0:
   3719 ; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0xc1]
   3720 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3721   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3722   ret <16 x i16> %res
   3723 }
   3724 
   3725 define <16 x i16> @test_mask_subs_epu16_rrk_256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask) {
   3726 ; CHECK-LABEL: test_mask_subs_epu16_rrk_256:
   3727 ; CHECK:       ## BB#0:
   3728 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3729 ; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0xd1]
   3730 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3731 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3732   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3733   ret <16 x i16> %res
   3734 }
   3735 
   3736 define <16 x i16> @test_mask_subs_epu16_rrkz_256(<16 x i16> %a, <16 x i16> %b, i16 %mask) {
   3737 ; CHECK-LABEL: test_mask_subs_epu16_rrkz_256:
   3738 ; CHECK:       ## BB#0:
   3739 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3740 ; CHECK-NEXT:    vpsubusw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0xc1]
   3741 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3742   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3743   ret <16 x i16> %res
   3744 }
   3745 
   3746 define <16 x i16> @test_mask_subs_epu16_rm_256(<16 x i16> %a, <16 x i16>* %ptr_b) {
   3747 ; CHECK-LABEL: test_mask_subs_epu16_rm_256:
   3748 ; CHECK:       ## BB#0:
   3749 ; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd9,0x07]
   3750 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3751   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3752   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 -1)
   3753   ret <16 x i16> %res
   3754 }
   3755 
   3756 define <16 x i16> @test_mask_subs_epu16_rmk_256(<16 x i16> %a, <16 x i16>* %ptr_b, <16 x i16> %passThru, i16 %mask) {
   3757 ; CHECK-LABEL: test_mask_subs_epu16_rmk_256:
   3758 ; CHECK:       ## BB#0:
   3759 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3760 ; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd9,0x0f]
   3761 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3762 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3763   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3764   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> %passThru, i16 %mask)
   3765   ret <16 x i16> %res
   3766 }
   3767 
   3768 define <16 x i16> @test_mask_subs_epu16_rmkz_256(<16 x i16> %a, <16 x i16>* %ptr_b, i16 %mask) {
   3769 ; CHECK-LABEL: test_mask_subs_epu16_rmkz_256:
   3770 ; CHECK:       ## BB#0:
   3771 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3772 ; CHECK-NEXT:    vpsubusw (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd9,0x07]
   3773 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3774   %b = load <16 x i16>, <16 x i16>* %ptr_b
   3775   %res = call <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16> %a, <16 x i16> %b, <16 x i16> zeroinitializer, i16 %mask)
   3776   ret <16 x i16> %res
   3777 }
   3778 
   3779 declare <16 x i16> @llvm.x86.avx512.mask.psubus.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   3780 
   3781 define <16 x i8> @test_mask_adds_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
   3782 ; CHECK-LABEL: test_mask_adds_epi8_rr_128:
   3783 ; CHECK:       ## BB#0:
   3784 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0xc1]
   3785 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3786   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   3787   ret <16 x i8> %res
   3788 }
   3789 
   3790 define <16 x i8> @test_mask_adds_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
   3791 ; CHECK-LABEL: test_mask_adds_epi8_rrk_128:
   3792 ; CHECK:       ## BB#0:
   3793 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3794 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0xd1]
   3795 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3796 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3797   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   3798   ret <16 x i8> %res
   3799 }
   3800 
   3801 define <16 x i8> @test_mask_adds_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   3802 ; CHECK-LABEL: test_mask_adds_epi8_rrkz_128:
   3803 ; CHECK:       ## BB#0:
   3804 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3805 ; CHECK-NEXT:    vpaddsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0xc1]
   3806 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3807   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   3808   ret <16 x i8> %res
   3809 }
   3810 
   3811 define <16 x i8> @test_mask_adds_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
   3812 ; CHECK-LABEL: test_mask_adds_epi8_rm_128:
   3813 ; CHECK:       ## BB#0:
   3814 ; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xec,0x07]
   3815 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3816   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3817   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   3818   ret <16 x i8> %res
   3819 }
   3820 
   3821 define <16 x i8> @test_mask_adds_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   3822 ; CHECK-LABEL: test_mask_adds_epi8_rmk_128:
   3823 ; CHECK:       ## BB#0:
   3824 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3825 ; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xec,0x0f]
   3826 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3827 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3828   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3829   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   3830   ret <16 x i8> %res
   3831 }
   3832 
   3833 define <16 x i8> @test_mask_adds_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
   3834 ; CHECK-LABEL: test_mask_adds_epi8_rmkz_128:
   3835 ; CHECK:       ## BB#0:
   3836 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3837 ; CHECK-NEXT:    vpaddsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xec,0x07]
   3838 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3839   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3840   %res = call <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   3841   ret <16 x i8> %res
   3842 }
   3843 
   3844 declare <16 x i8> @llvm.x86.avx512.mask.padds.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   3845 
   3846 define <32 x i8> @test_mask_adds_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
   3847 ; CHECK-LABEL: test_mask_adds_epi8_rr_256:
   3848 ; CHECK:       ## BB#0:
   3849 ; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0xc1]
   3850 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3851   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   3852   ret <32 x i8> %res
   3853 }
   3854 
   3855 define <32 x i8> @test_mask_adds_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
   3856 ; CHECK-LABEL: test_mask_adds_epi8_rrk_256:
   3857 ; CHECK:       ## BB#0:
   3858 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   3859 ; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0xd1]
   3860 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3861 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3862   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   3863   ret <32 x i8> %res
   3864 }
   3865 
   3866 define <32 x i8> @test_mask_adds_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
   3867 ; CHECK-LABEL: test_mask_adds_epi8_rrkz_256:
   3868 ; CHECK:       ## BB#0:
   3869 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   3870 ; CHECK-NEXT:    vpaddsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0xc1]
   3871 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3872   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   3873   ret <32 x i8> %res
   3874 }
   3875 
   3876 define <32 x i8> @test_mask_adds_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
   3877 ; CHECK-LABEL: test_mask_adds_epi8_rm_256:
   3878 ; CHECK:       ## BB#0:
   3879 ; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xec,0x07]
   3880 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3881   %b = load <32 x i8>, <32 x i8>* %ptr_b
   3882   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   3883   ret <32 x i8> %res
   3884 }
   3885 
   3886 define <32 x i8> @test_mask_adds_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   3887 ; CHECK-LABEL: test_mask_adds_epi8_rmk_256:
   3888 ; CHECK:       ## BB#0:
   3889 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   3890 ; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xec,0x0f]
   3891 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3892 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3893   %b = load <32 x i8>, <32 x i8>* %ptr_b
   3894   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   3895   ret <32 x i8> %res
   3896 }
   3897 
   3898 define <32 x i8> @test_mask_adds_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
   3899 ; CHECK-LABEL: test_mask_adds_epi8_rmkz_256:
   3900 ; CHECK:       ## BB#0:
   3901 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   3902 ; CHECK-NEXT:    vpaddsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xec,0x07]
   3903 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3904   %b = load <32 x i8>, <32 x i8>* %ptr_b
   3905   %res = call <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   3906   ret <32 x i8> %res
   3907 }
   3908 
   3909 declare <32 x i8> @llvm.x86.avx512.mask.padds.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   3910 
   3911 define <16 x i8> @test_mask_subs_epi8_rr_128(<16 x i8> %a, <16 x i8> %b) {
   3912 ; CHECK-LABEL: test_mask_subs_epi8_rr_128:
   3913 ; CHECK:       ## BB#0:
   3914 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0xc1]
   3915 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3916   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   3917   ret <16 x i8> %res
   3918 }
   3919 
   3920 define <16 x i8> @test_mask_subs_epi8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
   3921 ; CHECK-LABEL: test_mask_subs_epi8_rrk_128:
   3922 ; CHECK:       ## BB#0:
   3923 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3924 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0xd1]
   3925 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3926 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3927   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   3928   ret <16 x i8> %res
   3929 }
   3930 
   3931 define <16 x i8> @test_mask_subs_epi8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   3932 ; CHECK-LABEL: test_mask_subs_epi8_rrkz_128:
   3933 ; CHECK:       ## BB#0:
   3934 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   3935 ; CHECK-NEXT:    vpsubsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0xc1]
   3936 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3937   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   3938   ret <16 x i8> %res
   3939 }
   3940 
   3941 define <16 x i8> @test_mask_subs_epi8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
   3942 ; CHECK-LABEL: test_mask_subs_epi8_rm_128:
   3943 ; CHECK:       ## BB#0:
   3944 ; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe8,0x07]
   3945 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3946   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3947   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   3948   ret <16 x i8> %res
   3949 }
   3950 
   3951 define <16 x i8> @test_mask_subs_epi8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   3952 ; CHECK-LABEL: test_mask_subs_epi8_rmk_128:
   3953 ; CHECK:       ## BB#0:
   3954 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3955 ; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe8,0x0f]
   3956 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   3957 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3958   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3959   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   3960   ret <16 x i8> %res
   3961 }
   3962 
   3963 define <16 x i8> @test_mask_subs_epi8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
   3964 ; CHECK-LABEL: test_mask_subs_epi8_rmkz_128:
   3965 ; CHECK:       ## BB#0:
   3966 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   3967 ; CHECK-NEXT:    vpsubsb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe8,0x07]
   3968 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3969   %b = load <16 x i8>, <16 x i8>* %ptr_b
   3970   %res = call <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   3971   ret <16 x i8> %res
   3972 }
   3973 
   3974 declare <16 x i8> @llvm.x86.avx512.mask.psubs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   3975 
   3976 define <32 x i8> @test_mask_subs_epi8_rr_256(<32 x i8> %a, <32 x i8> %b) {
   3977 ; CHECK-LABEL: test_mask_subs_epi8_rr_256:
   3978 ; CHECK:       ## BB#0:
   3979 ; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0xc1]
   3980 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3981   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   3982   ret <32 x i8> %res
   3983 }
   3984 
   3985 define <32 x i8> @test_mask_subs_epi8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
   3986 ; CHECK-LABEL: test_mask_subs_epi8_rrk_256:
   3987 ; CHECK:       ## BB#0:
   3988 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   3989 ; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0xd1]
   3990 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   3991 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   3992   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   3993   ret <32 x i8> %res
   3994 }
   3995 
   3996 define <32 x i8> @test_mask_subs_epi8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
   3997 ; CHECK-LABEL: test_mask_subs_epi8_rrkz_256:
   3998 ; CHECK:       ## BB#0:
   3999 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4000 ; CHECK-NEXT:    vpsubsb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0xc1]
   4001 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4002   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4003   ret <32 x i8> %res
   4004 }
   4005 
   4006 define <32 x i8> @test_mask_subs_epi8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
   4007 ; CHECK-LABEL: test_mask_subs_epi8_rm_256:
   4008 ; CHECK:       ## BB#0:
   4009 ; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe8,0x07]
   4010 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4011   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4012   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   4013   ret <32 x i8> %res
   4014 }
   4015 
   4016 define <32 x i8> @test_mask_subs_epi8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   4017 ; CHECK-LABEL: test_mask_subs_epi8_rmk_256:
   4018 ; CHECK:       ## BB#0:
   4019 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4020 ; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe8,0x0f]
   4021 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   4022 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4023   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4024   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   4025   ret <32 x i8> %res
   4026 }
   4027 
   4028 define <32 x i8> @test_mask_subs_epi8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
   4029 ; CHECK-LABEL: test_mask_subs_epi8_rmkz_256:
   4030 ; CHECK:       ## BB#0:
   4031 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4032 ; CHECK-NEXT:    vpsubsb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe8,0x07]
   4033 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4034   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4035   %res = call <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4036   ret <32 x i8> %res
   4037 }
   4038 
   4039 declare <32 x i8> @llvm.x86.avx512.mask.psubs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4040 
   4041 define <16 x i8> @test_mask_adds_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
   4042 ; CHECK-LABEL: test_mask_adds_epu8_rr_128:
   4043 ; CHECK:       ## BB#0:
   4044 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0xc1]
   4045 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4046   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   4047   ret <16 x i8> %res
   4048 }
   4049 
   4050 define <16 x i8> @test_mask_adds_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
   4051 ; CHECK-LABEL: test_mask_adds_epu8_rrk_128:
   4052 ; CHECK:       ## BB#0:
   4053 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4054 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0xd1]
   4055 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   4056 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4057   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   4058   ret <16 x i8> %res
   4059 }
   4060 
   4061 define <16 x i8> @test_mask_adds_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   4062 ; CHECK-LABEL: test_mask_adds_epu8_rrkz_128:
   4063 ; CHECK:       ## BB#0:
   4064 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4065 ; CHECK-NEXT:    vpaddusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0xc1]
   4066 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4067   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   4068   ret <16 x i8> %res
   4069 }
   4070 
   4071 define <16 x i8> @test_mask_adds_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
   4072 ; CHECK-LABEL: test_mask_adds_epu8_rm_128:
   4073 ; CHECK:       ## BB#0:
   4074 ; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdc,0x07]
   4075 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4076   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4077   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   4078   ret <16 x i8> %res
   4079 }
   4080 
   4081 define <16 x i8> @test_mask_adds_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   4082 ; CHECK-LABEL: test_mask_adds_epu8_rmk_128:
   4083 ; CHECK:       ## BB#0:
   4084 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4085 ; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdc,0x0f]
   4086 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   4087 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4088   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4089   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   4090   ret <16 x i8> %res
   4091 }
   4092 
   4093 define <16 x i8> @test_mask_adds_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
   4094 ; CHECK-LABEL: test_mask_adds_epu8_rmkz_128:
   4095 ; CHECK:       ## BB#0:
   4096 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4097 ; CHECK-NEXT:    vpaddusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdc,0x07]
   4098 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4099   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4100   %res = call <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   4101   ret <16 x i8> %res
   4102 }
   4103 
   4104 declare <16 x i8> @llvm.x86.avx512.mask.paddus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4105 
   4106 define <32 x i8> @test_mask_adds_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
   4107 ; CHECK-LABEL: test_mask_adds_epu8_rr_256:
   4108 ; CHECK:       ## BB#0:
   4109 ; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0xc1]
   4110 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4111   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   4112   ret <32 x i8> %res
   4113 }
   4114 
   4115 define <32 x i8> @test_mask_adds_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
   4116 ; CHECK-LABEL: test_mask_adds_epu8_rrk_256:
   4117 ; CHECK:       ## BB#0:
   4118 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4119 ; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0xd1]
   4120 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   4121 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4122   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   4123   ret <32 x i8> %res
   4124 }
   4125 
   4126 define <32 x i8> @test_mask_adds_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
   4127 ; CHECK-LABEL: test_mask_adds_epu8_rrkz_256:
   4128 ; CHECK:       ## BB#0:
   4129 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4130 ; CHECK-NEXT:    vpaddusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0xc1]
   4131 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4132   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4133   ret <32 x i8> %res
   4134 }
   4135 
   4136 define <32 x i8> @test_mask_adds_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
   4137 ; CHECK-LABEL: test_mask_adds_epu8_rm_256:
   4138 ; CHECK:       ## BB#0:
   4139 ; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdc,0x07]
   4140 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4141   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4142   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   4143   ret <32 x i8> %res
   4144 }
   4145 
   4146 define <32 x i8> @test_mask_adds_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   4147 ; CHECK-LABEL: test_mask_adds_epu8_rmk_256:
   4148 ; CHECK:       ## BB#0:
   4149 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4150 ; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdc,0x0f]
   4151 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   4152 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4153   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4154   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   4155   ret <32 x i8> %res
   4156 }
   4157 
   4158 define <32 x i8> @test_mask_adds_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
   4159 ; CHECK-LABEL: test_mask_adds_epu8_rmkz_256:
   4160 ; CHECK:       ## BB#0:
   4161 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4162 ; CHECK-NEXT:    vpaddusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdc,0x07]
   4163 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4164   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4165   %res = call <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4166   ret <32 x i8> %res
   4167 }
   4168 
   4169 declare <32 x i8> @llvm.x86.avx512.mask.paddus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4170 
   4171 define <16 x i8> @test_mask_subs_epu8_rr_128(<16 x i8> %a, <16 x i8> %b) {
   4172 ; CHECK-LABEL: test_mask_subs_epu8_rr_128:
   4173 ; CHECK:       ## BB#0:
   4174 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0xc1]
   4175 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4176   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   4177   ret <16 x i8> %res
   4178 }
   4179 
   4180 define <16 x i8> @test_mask_subs_epu8_rrk_128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask) {
   4181 ; CHECK-LABEL: test_mask_subs_epu8_rrk_128:
   4182 ; CHECK:       ## BB#0:
   4183 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4184 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0xd1]
   4185 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   4186 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4187   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   4188   ret <16 x i8> %res
   4189 }
   4190 
   4191 define <16 x i8> @test_mask_subs_epu8_rrkz_128(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   4192 ; CHECK-LABEL: test_mask_subs_epu8_rrkz_128:
   4193 ; CHECK:       ## BB#0:
   4194 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4195 ; CHECK-NEXT:    vpsubusb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0xc1]
   4196 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4197   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   4198   ret <16 x i8> %res
   4199 }
   4200 
   4201 define <16 x i8> @test_mask_subs_epu8_rm_128(<16 x i8> %a, <16 x i8>* %ptr_b) {
   4202 ; CHECK-LABEL: test_mask_subs_epu8_rm_128:
   4203 ; CHECK:       ## BB#0:
   4204 ; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd8,0x07]
   4205 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4206   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4207   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 -1)
   4208   ret <16 x i8> %res
   4209 }
   4210 
   4211 define <16 x i8> @test_mask_subs_epu8_rmk_128(<16 x i8> %a, <16 x i8>* %ptr_b, <16 x i8> %passThru, i16 %mask) {
   4212 ; CHECK-LABEL: test_mask_subs_epu8_rmk_128:
   4213 ; CHECK:       ## BB#0:
   4214 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4215 ; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd8,0x0f]
   4216 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   4217 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4218   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4219   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passThru, i16 %mask)
   4220   ret <16 x i8> %res
   4221 }
   4222 
   4223 define <16 x i8> @test_mask_subs_epu8_rmkz_128(<16 x i8> %a, <16 x i8>* %ptr_b, i16 %mask) {
   4224 ; CHECK-LABEL: test_mask_subs_epu8_rmkz_128:
   4225 ; CHECK:       ## BB#0:
   4226 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4227 ; CHECK-NEXT:    vpsubusb (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd8,0x07]
   4228 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4229   %b = load <16 x i8>, <16 x i8>* %ptr_b
   4230   %res = call <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8> %a, <16 x i8> %b, <16 x i8> zeroinitializer, i16 %mask)
   4231   ret <16 x i8> %res
   4232 }
   4233 
   4234 declare <16 x i8> @llvm.x86.avx512.mask.psubus.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4235 
   4236 define <32 x i8> @test_mask_subs_epu8_rr_256(<32 x i8> %a, <32 x i8> %b) {
   4237 ; CHECK-LABEL: test_mask_subs_epu8_rr_256:
   4238 ; CHECK:       ## BB#0:
   4239 ; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0xc1]
   4240 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4241   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   4242   ret <32 x i8> %res
   4243 }
   4244 
   4245 define <32 x i8> @test_mask_subs_epu8_rrk_256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask) {
   4246 ; CHECK-LABEL: test_mask_subs_epu8_rrk_256:
   4247 ; CHECK:       ## BB#0:
   4248 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4249 ; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0xd1]
   4250 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   4251 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4252   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   4253   ret <32 x i8> %res
   4254 }
   4255 
   4256 define <32 x i8> @test_mask_subs_epu8_rrkz_256(<32 x i8> %a, <32 x i8> %b, i32 %mask) {
   4257 ; CHECK-LABEL: test_mask_subs_epu8_rrkz_256:
   4258 ; CHECK:       ## BB#0:
   4259 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4260 ; CHECK-NEXT:    vpsubusb %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0xc1]
   4261 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4262   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4263   ret <32 x i8> %res
   4264 }
   4265 
   4266 define <32 x i8> @test_mask_subs_epu8_rm_256(<32 x i8> %a, <32 x i8>* %ptr_b) {
   4267 ; CHECK-LABEL: test_mask_subs_epu8_rm_256:
   4268 ; CHECK:       ## BB#0:
   4269 ; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd8,0x07]
   4270 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4271   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4272   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 -1)
   4273   ret <32 x i8> %res
   4274 }
   4275 
   4276 define <32 x i8> @test_mask_subs_epu8_rmk_256(<32 x i8> %a, <32 x i8>* %ptr_b, <32 x i8> %passThru, i32 %mask) {
   4277 ; CHECK-LABEL: test_mask_subs_epu8_rmk_256:
   4278 ; CHECK:       ## BB#0:
   4279 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4280 ; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd8,0x0f]
   4281 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   4282 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4283   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4284   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> %passThru, i32 %mask)
   4285   ret <32 x i8> %res
   4286 }
   4287 
   4288 define <32 x i8> @test_mask_subs_epu8_rmkz_256(<32 x i8> %a, <32 x i8>* %ptr_b, i32 %mask) {
   4289 ; CHECK-LABEL: test_mask_subs_epu8_rmkz_256:
   4290 ; CHECK:       ## BB#0:
   4291 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   4292 ; CHECK-NEXT:    vpsubusb (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd8,0x07]
   4293 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4294   %b = load <32 x i8>, <32 x i8>* %ptr_b
   4295   %res = call <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8> %a, <32 x i8> %b, <32 x i8> zeroinitializer, i32 %mask)
   4296   ret <32 x i8> %res
   4297 }
   4298 
   4299 declare <32 x i8> @llvm.x86.avx512.mask.psubus.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4300 
   4301 declare <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4302 
   4303 define <16 x i8>@test_int_x86_avx512_mask_pmaxs_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   4304 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_128:
   4305 ; CHECK:       ## BB#0:
   4306 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4307 ; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3c,0xd1]
   4308 ; CHECK-NEXT:    vpmaxsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x3c,0xc1]
   4309 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4310 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4311   %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2 ,i16 %mask)
   4312   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxs.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   4313   %res2 = add <16 x i8> %res, %res1
   4314   ret <16 x i8> %res2
   4315 }
   4316 
   4317 declare <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4318 
   4319 define <32 x i8>@test_int_x86_avx512_mask_pmaxs_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4320 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_b_256:
   4321 ; CHECK:       ## BB#0:
   4322 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4323 ; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3c,0xd1]
   4324 ; CHECK-NEXT:    vpmaxsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x3c,0xc1]
   4325 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4326 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4327   %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4328   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxs.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4329   %res2 = add <32 x i8> %res, %res1
   4330   ret <32 x i8> %res2
   4331 }
   4332 
   4333 declare <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4334 
   4335 define <8 x i16>@test_int_x86_avx512_mask_pmaxs_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4336 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_128:
   4337 ; CHECK:       ## BB#0:
   4338 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4339 ; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xee,0xd1]
   4340 ; CHECK-NEXT:    vpmaxsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xee,0xc1]
   4341 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4342 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4343   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4344   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxs.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4345   %res2 = add <8 x i16> %res, %res1
   4346   ret <8 x i16> %res2
   4347 }
   4348 
   4349 declare <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4350 
   4351 define <16 x i16>@test_int_x86_avx512_mask_pmaxs_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   4352 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxs_w_256:
   4353 ; CHECK:       ## BB#0:
   4354 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4355 ; CHECK-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xee,0xd1]
   4356 ; CHECK-NEXT:    vpmaxsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xee,0xc1]
   4357 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4358 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4359   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   4360   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxs.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   4361   %res2 = add <16 x i16> %res, %res1
   4362   ret <16 x i16> %res2
   4363 }
   4364 
   4365 declare <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4366 
   4367 define <16 x i8>@test_int_x86_avx512_mask_pmaxu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2,i16 %mask) {
   4368 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_128:
   4369 ; CHECK:       ## BB#0:
   4370 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4371 ; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xde,0xd1]
   4372 ; CHECK-NEXT:    vpmaxub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xde,0xc1]
   4373 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4374 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4375   %res = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   4376   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmaxu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   4377   %res2 = add <16 x i8> %res, %res1
   4378   ret <16 x i8> %res2
   4379 }
   4380 
   4381 declare <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4382 
   4383 define <32 x i8>@test_int_x86_avx512_mask_pmaxu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4384 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_b_256:
   4385 ; CHECK:       ## BB#0:
   4386 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4387 ; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xde,0xd1]
   4388 ; CHECK-NEXT:    vpmaxub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xde,0xc1]
   4389 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4390 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4391   %res = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4392   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmaxu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4393   %res2 = add <32 x i8> %res, %res1
   4394   ret <32 x i8> %res2
   4395 }
   4396 
   4397 declare <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4398 
   4399 define <8 x i16>@test_int_x86_avx512_mask_pmaxu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4400 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_128:
   4401 ; CHECK:       ## BB#0:
   4402 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4403 ; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3e,0xd1]
   4404 ; CHECK-NEXT:    vpmaxuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3e,0xc1]
   4405 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4406 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4407   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4408   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaxu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4409   %res2 = add <8 x i16> %res, %res1
   4410   ret <8 x i16> %res2
   4411 }
   4412 
   4413 declare <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4414 
   4415 define <16 x i16>@test_int_x86_avx512_mask_pmaxu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   4416 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaxu_w_256:
   4417 ; CHECK:       ## BB#0:
   4418 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4419 ; CHECK-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3e,0xd1]
   4420 ; CHECK-NEXT:    vpmaxuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3e,0xc1]
   4421 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4422 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4423   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   4424   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaxu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   4425   %res2 = add <16 x i16> %res, %res1
   4426   ret <16 x i16> %res2
   4427 }
   4428 
   4429 declare <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4430 
   4431 define <16 x i8>@test_int_x86_avx512_mask_pmins_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   4432 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_128:
   4433 ; CHECK:       ## BB#0:
   4434 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4435 ; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x38,0xd1]
   4436 ; CHECK-NEXT:    vpminsb %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x38,0xc1]
   4437 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4438 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4439   %res = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   4440   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmins.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   4441   %res2 = add <16 x i8> %res, %res1
   4442   ret <16 x i8> %res2
   4443 }
   4444 
   4445 declare <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4446 
   4447 define <32 x i8>@test_int_x86_avx512_mask_pmins_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4448 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_b_256:
   4449 ; CHECK:       ## BB#0:
   4450 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4451 ; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x38,0xd1]
   4452 ; CHECK-NEXT:    vpminsb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x38,0xc1]
   4453 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4454 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4455   %res = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4456   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmins.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4457   %res2 = add <32 x i8> %res, %res1
   4458   ret <32 x i8> %res2
   4459 }
   4460 
   4461 declare <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4462 
   4463 define <8 x i16>@test_int_x86_avx512_mask_pmins_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4464 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_128:
   4465 ; CHECK:       ## BB#0:
   4466 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4467 ; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xea,0xd1]
   4468 ; CHECK-NEXT:    vpminsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xea,0xc1]
   4469 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4470 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4471   %res = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4472   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmins.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4473   %res2 = add <8 x i16> %res, %res1
   4474   ret <8 x i16> %res2
   4475 }
   4476 
   4477 declare <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4478 
   4479 define <16 x i16>@test_int_x86_avx512_mask_pmins_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   4480 ; CHECK-LABEL: test_int_x86_avx512_mask_pmins_w_256:
   4481 ; CHECK:       ## BB#0:
   4482 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4483 ; CHECK-NEXT:    vpminsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xea,0xd1]
   4484 ; CHECK-NEXT:    vpminsw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xea,0xc1]
   4485 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4486 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4487   %res = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   4488   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmins.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   4489   %res2 = add <16 x i16> %res, %res1
   4490   ret <16 x i16> %res2
   4491 }
   4492 
   4493 declare <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4494 
   4495 define <16 x i8>@test_int_x86_avx512_mask_pminu_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask) {
   4496 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_128:
   4497 ; CHECK:       ## BB#0:
   4498 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4499 ; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xda,0xd1]
   4500 ; CHECK-NEXT:    vpminub %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xda,0xc1]
   4501 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4502 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4503   %res = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %mask)
   4504   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pminu.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> zeroinitializer, i16 %mask)
   4505   %res2 = add <16 x i8> %res, %res1
   4506   ret <16 x i8> %res2
   4507 }
   4508 
   4509 declare <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4510 
   4511 define <32 x i8>@test_int_x86_avx512_mask_pminu_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4512 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_b_256:
   4513 ; CHECK:       ## BB#0:
   4514 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4515 ; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xda,0xd1]
   4516 ; CHECK-NEXT:    vpminub %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xda,0xc1]
   4517 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4518 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4519   %res = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4520   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pminu.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4521   %res2 = add <32 x i8> %res, %res1
   4522   ret <32 x i8> %res2
   4523 }
   4524 
   4525 declare <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4526 
   4527 define <8 x i16>@test_int_x86_avx512_mask_pminu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4528 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_128:
   4529 ; CHECK:       ## BB#0:
   4530 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4531 ; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x3a,0xd1]
   4532 ; CHECK-NEXT:    vpminuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x3a,0xc1]
   4533 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4534 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4535   %res = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4536   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pminu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4537   %res2 = add <8 x i16> %res, %res1
   4538   ret <8 x i16> %res2
   4539 }
   4540 
   4541 declare <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4542 
   4543 define <16 x i16>@test_int_x86_avx512_mask_pminu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask) {
   4544 ; CHECK-LABEL: test_int_x86_avx512_mask_pminu_w_256:
   4545 ; CHECK:       ## BB#0:
   4546 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4547 ; CHECK-NEXT:    vpminuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x3a,0xd1]
   4548 ; CHECK-NEXT:    vpminuw %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x3a,0xc1]
   4549 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4550 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4551   %res = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %mask)
   4552   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pminu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %mask)
   4553   %res2 = add <16 x i16> %res, %res1
   4554   ret <16 x i16> %res2
   4555 }
   4556 
   4557 declare <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4558 
   4559 define <8 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4560 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_128:
   4561 ; CHECK:       ## BB#0:
   4562 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4563 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4564 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x7d,0xda]
   4565 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca]
   4566 ; CHECK-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1]
   4567 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4568   %res = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4569   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4570   %res2 = add <8 x i16> %res, %res1
   4571   ret <8 x i16> %res2
   4572 }
   4573 
   4574 declare <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4575 
   4576 define <8 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4577 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_128:
   4578 ; CHECK:       ## BB#0:
   4579 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4580 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4581 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x7d,0xda]
   4582 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x7d,0xca]
   4583 ; CHECK-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1]
   4584 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4585   %res = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4586   %res1 = call <8 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4587   %res2 = add <8 x i16> %res, %res1
   4588   ret <8 x i16> %res2
   4589 }
   4590 
   4591 declare <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4592 
   4593 define <16 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4594 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_256:
   4595 ; CHECK:       ## BB#0:
   4596 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4597 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4598 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x7d,0xda]
   4599 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca]
   4600 ; CHECK-NEXT:    vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1]
   4601 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4602   %res = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4603   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4604   %res2 = add <16 x i16> %res, %res1
   4605   ret <16 x i16> %res2
   4606 }
   4607 
   4608 declare <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4609 
   4610 define <16 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4611 ; CHECK-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_256:
   4612 ; CHECK:       ## BB#0:
   4613 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4614 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4615 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x7d,0xda]
   4616 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x7d,0xca]
   4617 ; CHECK-NEXT:    vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1]
   4618 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4619   %res = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4620   %res1 = call <16 x i16> @llvm.x86.avx512.maskz.vpermt2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4621   %res2 = add <16 x i16> %res, %res1
   4622   ret <16 x i16> %res2
   4623 }
   4624 
   4625 declare <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4626 
   4627 define <8 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4628 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_128:
   4629 ; CHECK:       ## BB#0:
   4630 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4631 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4632 ; CHECK-NEXT:    vpermi2w %xmm2, %xmm0, %xmm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x75,0xda]
   4633 ; CHECK-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 ## encoding: [0x62,0xf2,0xfd,0x08,0x75,0xca]
   4634 ; CHECK-NEXT:    vpaddw %xmm1, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc1]
   4635 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4636   %res = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4637   %res1 = call <8 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4638   %res2 = add <8 x i16> %res, %res1
   4639   ret <8 x i16> %res2
   4640 }
   4641 
   4642 declare <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4643 
   4644 define <16 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4645 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_256:
   4646 ; CHECK:       ## BB#0:
   4647 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4648 ; CHECK-NEXT:    vmovaps %zmm1, %zmm3 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xd9]
   4649 ; CHECK-NEXT:    vpermi2w %ymm2, %ymm0, %ymm3 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x75,0xda]
   4650 ; CHECK-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 ## encoding: [0x62,0xf2,0xfd,0x28,0x75,0xca]
   4651 ; CHECK-NEXT:    vpaddw %ymm1, %ymm3, %ymm0 ## encoding: [0x62,0xf1,0x65,0x28,0xfd,0xc1]
   4652 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4653   %res = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4654   %res1 = call <16 x i16> @llvm.x86.avx512.mask.vpermi2var.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4655   %res2 = add <16 x i16> %res, %res1
   4656   ret <16 x i16> %res2
   4657 }
   4658 
   4659 declare <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4660 
   4661 define <16 x i8>@test_int_x86_avx512_mask_pavg_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   4662 ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_b_128:
   4663 ; CHECK:       ## BB#0:
   4664 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4665 ; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe0,0xd1]
   4666 ; CHECK-NEXT:    vpavgb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe0,0xc1]
   4667 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4668 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4669   %res = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   4670   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pavg.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   4671   %res2 = add <16 x i8> %res, %res1
   4672   ret <16 x i8> %res2
   4673 }
   4674 
   4675 declare <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4676 
   4677 define <32 x i8>@test_int_x86_avx512_mask_pavg_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4678 ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_b_256:
   4679 ; CHECK:       ## BB#0:
   4680 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4681 ; CHECK-NEXT:    vpavgb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe0,0xd1]
   4682 ; CHECK-NEXT:    vpavgb %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe0,0xc1]
   4683 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4684 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4685   %res = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4686   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pavg.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4687   %res2 = add <32 x i8> %res, %res1
   4688   ret <32 x i8> %res2
   4689 }
   4690 
   4691 declare <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4692 
   4693 define <8 x i16>@test_int_x86_avx512_mask_pavg_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4694 ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_128:
   4695 ; CHECK:       ## BB#0:
   4696 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4697 ; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe3,0xd1]
   4698 ; CHECK-NEXT:    vpavgw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe3,0xc1]
   4699 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4700 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4701   %res = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4702   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pavg.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4703   %res2 = add <8 x i16> %res, %res1
   4704   ret <8 x i16> %res2
   4705 }
   4706 
   4707 declare <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4708 
   4709 define <16 x i16>@test_int_x86_avx512_mask_pavg_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4710 ; CHECK-LABEL: test_int_x86_avx512_mask_pavg_w_256:
   4711 ; CHECK:       ## BB#0:
   4712 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4713 ; CHECK-NEXT:    vpavgw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe3,0xd1]
   4714 ; CHECK-NEXT:    vpavgw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe3,0xc1]
   4715 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4716 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4717   %res = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4718   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pavg.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4719   %res2 = add <16 x i16> %res, %res1
   4720   ret <16 x i16> %res2
   4721 }
   4722 
   4723 declare <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8>, <16 x i8>, <16 x i8>, i16)
   4724 
   4725 define <16 x i8>@test_int_x86_avx512_mask_pshuf_b_128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3) {
   4726 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_128:
   4727 ; CHECK:       ## BB#0:
   4728 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4729 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x00,0xd1]
   4730 ; CHECK-NEXT:    vpshufb %xmm1, %xmm0, %xmm0 ## encoding: [0xc4,0xe2,0x79,0x00,0xc1]
   4731 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   4732 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4733   %res = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 %x3)
   4734   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pshuf.b.128(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, i16 -1)
   4735   %res2 = add <16 x i8> %res, %res1
   4736   ret <16 x i8> %res2
   4737 }
   4738 
   4739 declare <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8>, <32 x i8>, <32 x i8>, i32)
   4740 
   4741 define <32 x i8>@test_int_x86_avx512_mask_pshuf_b_256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3) {
   4742 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_b_256:
   4743 ; CHECK:       ## BB#0:
   4744 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4745 ; CHECK-NEXT:    vpshufb %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x00,0xd1]
   4746 ; CHECK-NEXT:    vpshufb %ymm1, %ymm0, %ymm0 ## encoding: [0xc4,0xe2,0x7d,0x00,0xc1]
   4747 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   4748 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4749   %res = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 %x3)
   4750   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pshuf.b.256(<32 x i8> %x0, <32 x i8> %x1, <32 x i8> %x2, i32 -1)
   4751   %res2 = add <32 x i8> %res, %res1
   4752   ret <32 x i8> %res2
   4753 }
   4754 
   4755 declare <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8>, <16 x i8>, i16)
   4756 
   4757 define <16 x i8>@test_int_x86_avx512_mask_pabs_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   4758 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_128:
   4759 ; CHECK:       ## BB#0:
   4760 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4761 ; CHECK-NEXT:    vpabsb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1c,0xc8]
   4762 ; CHECK-NEXT:    vpabsb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1c,0xc0]
   4763 ; CHECK-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0]
   4764 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4765   %res = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   4766   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pabs.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 -1)
   4767   %res2 = add <16 x i8> %res, %res1
   4768   ret <16 x i8> %res2
   4769 }
   4770 
   4771 declare <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8>, <32 x i8>, i32)
   4772 
   4773 define <32 x i8>@test_int_x86_avx512_mask_pabs_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   4774 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_b_256:
   4775 ; CHECK:       ## BB#0:
   4776 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   4777 ; CHECK-NEXT:    vpabsb %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1c,0xc8]
   4778 ; CHECK-NEXT:    vpabsb %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1c,0xc0]
   4779 ; CHECK-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0]
   4780 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4781   %res = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   4782   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pabs.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 -1)
   4783   %res2 = add <32 x i8> %res, %res1
   4784   ret <32 x i8> %res2
   4785 }
   4786 
   4787 declare <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16>, <8 x i16>, i8)
   4788 
   4789 define <8 x i16>@test_int_x86_avx512_mask_pabs_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   4790 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_128:
   4791 ; CHECK:       ## BB#0:
   4792 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4793 ; CHECK-NEXT:    vpabsw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x1d,0xc8]
   4794 ; CHECK-NEXT:    vpabsw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x1d,0xc0]
   4795 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   4796 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4797   %res = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   4798   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pabs.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 -1)
   4799   %res2 = add <8 x i16> %res, %res1
   4800   ret <8 x i16> %res2
   4801 }
   4802 
   4803 declare <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16>, <16 x i16>, i16)
   4804 
   4805 define <16 x i16>@test_int_x86_avx512_mask_pabs_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   4806 ; CHECK-LABEL: test_int_x86_avx512_mask_pabs_w_256:
   4807 ; CHECK:       ## BB#0:
   4808 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4809 ; CHECK-NEXT:    vpabsw %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x1d,0xc8]
   4810 ; CHECK-NEXT:    vpabsw %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x1d,0xc0]
   4811 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   4812 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4813   %res = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   4814   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pabs.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 -1)
   4815   %res2 = add <16 x i16> %res, %res1
   4816   ret <16 x i16> %res2
   4817 }
   4818 
   4819 declare <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4820 
   4821 define <8 x i16>@test_int_x86_avx512_mask_pmulhu_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4822 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_128:
   4823 ; CHECK:       ## BB#0:
   4824 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4825 ; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe4,0xd1]
   4826 ; CHECK-NEXT:    vpmulhuw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe4,0xc1]
   4827 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4828 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4829   %res = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4830   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulhu.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4831   %res2 = add <8 x i16> %res, %res1
   4832   ret <8 x i16> %res2
   4833 }
   4834 
   4835 declare <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4836 
   4837 define <16 x i16>@test_int_x86_avx512_mask_pmulhu_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4838 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhu_w_256:
   4839 ; CHECK:       ## BB#0:
   4840 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4841 ; CHECK-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe4,0xd1]
   4842 ; CHECK-NEXT:    vpmulhuw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe4,0xc1]
   4843 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4844 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4845   %res = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4846   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulhu.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4847   %res2 = add <16 x i16> %res, %res1
   4848   ret <16 x i16> %res2
   4849 }
   4850 
   4851 declare <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4852 
   4853 define <8 x i16>@test_int_x86_avx512_mask_pmulh_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4854 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_128:
   4855 ; CHECK:       ## BB#0:
   4856 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4857 ; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe5,0xd1]
   4858 ; CHECK-NEXT:    vpmulhw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe5,0xc1]
   4859 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4860 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4861   %res = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4862   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmulh.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4863   %res2 = add <8 x i16> %res, %res1
   4864   ret <8 x i16> %res2
   4865 }
   4866 
   4867 declare <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4868 
   4869 define <16 x i16>@test_int_x86_avx512_mask_pmulh_w_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4870 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulh_w_256:
   4871 ; CHECK:       ## BB#0:
   4872 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4873 ; CHECK-NEXT:    vpmulhw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe5,0xd1]
   4874 ; CHECK-NEXT:    vpmulhw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe5,0xc1]
   4875 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4876 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4877   %res = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4878   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmulh.w.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4879   %res2 = add <16 x i16> %res, %res1
   4880   ret <16 x i16> %res2
   4881 }
   4882 
   4883 declare <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   4884 
   4885 define <8 x i16>@test_int_x86_avx512_mask_pmulhr_sw_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   4886 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_128:
   4887 ; CHECK:       ## BB#0:
   4888 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4889 ; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x0b,0xd1]
   4890 ; CHECK-NEXT:    vpmulhrsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x0b,0xc1]
   4891 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   4892 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4893   %res = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   4894   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   4895   %res2 = add <8 x i16> %res, %res1
   4896   ret <8 x i16> %res2
   4897 }
   4898 
   4899 declare <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   4900 
   4901 define <16 x i16>@test_int_x86_avx512_mask_pmulhr_sw_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   4902 ; CHECK-LABEL: test_int_x86_avx512_mask_pmulhr_sw_256:
   4903 ; CHECK:       ## BB#0:
   4904 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4905 ; CHECK-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x0b,0xd1]
   4906 ; CHECK-NEXT:    vpmulhrsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x0b,0xc1]
   4907 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   4908 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4909   %res = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   4910   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmul.hr.sw.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   4911   %res2 = add <16 x i16> %res, %res1
   4912   ret <16 x i16> %res2
   4913 }
   4914 
   4915 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16>, <16 x i8>, i8)
   4916 
   4917 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
   4918 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_128:
   4919 ; CHECK:       ## BB#0:
   4920 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4921 ; CHECK-NEXT:    vpmovwb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0xc1]
   4922 ; CHECK-NEXT:    vpmovwb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x30,0xc2]
   4923 ; CHECK-NEXT:    vpmovwb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0xc0]
   4924 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   4925 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   4926 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4927     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
   4928     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
   4929     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
   4930     %res3 = add <16 x i8> %res0, %res1
   4931     %res4 = add <16 x i8> %res3, %res2
   4932     ret <16 x i8> %res4
   4933 }
   4934 
   4935 declare void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16>, i8)
   4936 
   4937 define void @test_int_x86_avx512_mask_pmov_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
   4938 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_128:
   4939 ; CHECK:       ## BB#0:
   4940 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4941 ; CHECK-NEXT:    vpmovwb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x30,0x07]
   4942 ; CHECK-NEXT:    vpmovwb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x30,0x07]
   4943 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4944     call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
   4945     call void @llvm.x86.avx512.mask.pmov.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
   4946     ret void
   4947 }
   4948 
   4949 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16>, <16 x i8>, i8)
   4950 
   4951 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
   4952 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_128:
   4953 ; CHECK:       ## BB#0:
   4954 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4955 ; CHECK-NEXT:    vpmovswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0xc1]
   4956 ; CHECK-NEXT:    vpmovswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x20,0xc2]
   4957 ; CHECK-NEXT:    vpmovswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0xc0]
   4958 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   4959 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   4960 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4961     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
   4962     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
   4963     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
   4964     %res3 = add <16 x i8> %res0, %res1
   4965     %res4 = add <16 x i8> %res3, %res2
   4966     ret <16 x i8> %res4
   4967 }
   4968 
   4969 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16>, i8)
   4970 
   4971 define void @test_int_x86_avx512_mask_pmovs_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
   4972 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_128:
   4973 ; CHECK:       ## BB#0:
   4974 ; CHECK-NEXT:    vpmovswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x20,0x07]
   4975 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   4976 ; CHECK-NEXT:    vpmovswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x20,0x07]
   4977 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4978     call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
   4979     call void @llvm.x86.avx512.mask.pmovs.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
   4980     ret void
   4981 }
   4982 
   4983 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16>, <16 x i8>, i8)
   4984 
   4985 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2) {
   4986 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_128:
   4987 ; CHECK:       ## BB#0:
   4988 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   4989 ; CHECK-NEXT:    vpmovuswb %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0xc1]
   4990 ; CHECK-NEXT:    vpmovuswb %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0x89,0x10,0xc2]
   4991 ; CHECK-NEXT:    vpmovuswb %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0xc0]
   4992 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   4993 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   4994 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   4995     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 -1)
   4996     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> %x1, i8 %x2)
   4997     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.128(<8 x i16> %x0, <16 x i8> zeroinitializer, i8 %x2)
   4998     %res3 = add <16 x i8> %res0, %res1
   4999     %res4 = add <16 x i8> %res3, %res2
   5000     ret <16 x i8> %res4
   5001 }
   5002 
   5003 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16>, i8)
   5004 
   5005 define void @test_int_x86_avx512_mask_pmovus_wb_mem_128(i8* %ptr, <8 x i16> %x1, i8 %x2) {
   5006 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_128:
   5007 ; CHECK:       ## BB#0:
   5008 ; CHECK-NEXT:    vpmovuswb %xmm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x08,0x10,0x07]
   5009 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5010 ; CHECK-NEXT:    vpmovuswb %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x10,0x07]
   5011 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5012     call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 -1)
   5013     call void @llvm.x86.avx512.mask.pmovus.wb.mem.128(i8* %ptr, <8 x i16> %x1, i8 %x2)
   5014     ret void
   5015 }
   5016 
   5017 declare <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16>, <16 x i8>, i16)
   5018 
   5019 define <16 x i8>@test_int_x86_avx512_mask_pmov_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
   5020 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_256:
   5021 ; CHECK:       ## BB#0:
   5022 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5023 ; CHECK-NEXT:    vpmovwb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0xc1]
   5024 ; CHECK-NEXT:    vpmovwb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x30,0xc2]
   5025 ; CHECK-NEXT:    vpmovwb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0xc0]
   5026 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   5027 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   5028 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5029     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
   5030     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
   5031     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmov.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
   5032     %res3 = add <16 x i8> %res0, %res1
   5033     %res4 = add <16 x i8> %res3, %res2
   5034     ret <16 x i8> %res4
   5035 }
   5036 
   5037 declare void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16>, i16)
   5038 
   5039 define void @test_int_x86_avx512_mask_pmov_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
   5040 ; CHECK-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_256:
   5041 ; CHECK:       ## BB#0:
   5042 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5043 ; CHECK-NEXT:    vpmovwb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x30,0x07]
   5044 ; CHECK-NEXT:    vpmovwb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x30,0x07]
   5045 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5046     call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
   5047     call void @llvm.x86.avx512.mask.pmov.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
   5048     ret void
   5049 }
   5050 
   5051 declare <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16>, <16 x i8>, i16)
   5052 
   5053 define <16 x i8>@test_int_x86_avx512_mask_pmovs_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
   5054 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_256:
   5055 ; CHECK:       ## BB#0:
   5056 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5057 ; CHECK-NEXT:    vpmovswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0xc1]
   5058 ; CHECK-NEXT:    vpmovswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x20,0xc2]
   5059 ; CHECK-NEXT:    vpmovswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0xc0]
   5060 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   5061 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   5062 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5063     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
   5064     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
   5065     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovs.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
   5066     %res3 = add <16 x i8> %res0, %res1
   5067     %res4 = add <16 x i8> %res3, %res2
   5068     ret <16 x i8> %res4
   5069 }
   5070 
   5071 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16>, i16)
   5072 
   5073 define void @test_int_x86_avx512_mask_pmovs_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
   5074 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_256:
   5075 ; CHECK:       ## BB#0:
   5076 ; CHECK-NEXT:    vpmovswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x20,0x07]
   5077 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5078 ; CHECK-NEXT:    vpmovswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x20,0x07]
   5079 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5080     call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
   5081     call void @llvm.x86.avx512.mask.pmovs.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
   5082     ret void
   5083 }
   5084 
   5085 declare <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16>, <16 x i8>, i16)
   5086 
   5087 define <16 x i8>@test_int_x86_avx512_mask_pmovus_wb_256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2) {
   5088 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_256:
   5089 ; CHECK:       ## BB#0:
   5090 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5091 ; CHECK-NEXT:    vpmovuswb %ymm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0xc1]
   5092 ; CHECK-NEXT:    vpmovuswb %ymm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7e,0xa9,0x10,0xc2]
   5093 ; CHECK-NEXT:    vpmovuswb %ymm0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0xc0]
   5094 ; CHECK-NEXT:    vpaddb %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc1]
   5095 ; CHECK-NEXT:    vpaddb %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfc,0xc2]
   5096 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5097     %res0 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 -1)
   5098     %res1 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> %x1, i16 %x2)
   5099     %res2 = call <16 x i8> @llvm.x86.avx512.mask.pmovus.wb.256(<16 x i16> %x0, <16 x i8> zeroinitializer, i16 %x2)
   5100     %res3 = add <16 x i8> %res0, %res1
   5101     %res4 = add <16 x i8> %res3, %res2
   5102     ret <16 x i8> %res4
   5103 }
   5104 
   5105 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16>, i16)
   5106 
   5107 define void @test_int_x86_avx512_mask_pmovus_wb_mem_256(i8* %ptr, <16 x i16> %x1, i16 %x2) {
   5108 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_256:
   5109 ; CHECK:       ## BB#0:
   5110 ; CHECK-NEXT:    vpmovuswb %ymm0, (%rdi) ## encoding: [0x62,0xf2,0x7e,0x28,0x10,0x07]
   5111 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5112 ; CHECK-NEXT:    vpmovuswb %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x10,0x07]
   5113 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5114     call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 -1)
   5115     call void @llvm.x86.avx512.mask.pmovus.wb.mem.256(i8* %ptr, <16 x i16> %x1, i16 %x2)
   5116     ret void
   5117 }
   5118 
   5119 declare <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16>, <8 x i16>, <4 x i32>, i8)
   5120 
   5121 define <4 x i32>@test_int_x86_avx512_mask_pmaddw_d_128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3) {
   5122 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_128:
   5123 ; CHECK:       ## BB#0:
   5124 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5125 ; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf5,0xd1]
   5126 ; CHECK-NEXT:    vpmaddwd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf5,0xc1]
   5127 ; CHECK-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc0]
   5128 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5129   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 %x3)
   5130   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaddw.d.128(<8 x i16> %x0, <8 x i16> %x1, <4 x i32> %x2, i8 -1)
   5131   %res2 = add <4 x i32> %res, %res1
   5132   ret <4 x i32> %res2
   5133 }
   5134 
   5135 declare <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16>, <16 x i16>, <8 x i32>, i8)
   5136 
   5137 define <8 x i32>@test_int_x86_avx512_mask_pmaddw_d_256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3) {
   5138 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddw_d_256:
   5139 ; CHECK:       ## BB#0:
   5140 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5141 ; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf5,0xd1]
   5142 ; CHECK-NEXT:    vpmaddwd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf5,0xc1]
   5143 ; CHECK-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc0]
   5144 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5145   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 %x3)
   5146   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaddw.d.256(<16 x i16> %x0, <16 x i16> %x1, <8 x i32> %x2, i8 -1)
   5147   %res2 = add <8 x i32> %res, %res1
   5148   ret <8 x i32> %res2
   5149 }
   5150 
   5151 declare <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8>, <16 x i8>, <8 x i16>, i8)
   5152 
   5153 define <8 x i16>@test_int_x86_avx512_mask_pmaddubs_w_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3) {
   5154 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_128:
   5155 ; CHECK:       ## BB#0:
   5156 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5157 ; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x04,0xd1]
   5158 ; CHECK-NEXT:    vpmaddubsw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x04,0xc1]
   5159 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   5160 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5161   %res = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 %x3)
   5162   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmaddubs.w.128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x2, i8 -1)
   5163   %res2 = add <8 x i16> %res, %res1
   5164   ret <8 x i16> %res2
   5165 }
   5166 
   5167 declare <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8>, <32 x i8>, <16 x i16>, i16)
   5168 
   5169 define <16 x i16>@test_int_x86_avx512_mask_pmaddubs_w_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3) {
   5170 ; CHECK-LABEL: test_int_x86_avx512_mask_pmaddubs_w_256:
   5171 ; CHECK:       ## BB#0:
   5172 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5173 ; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x04,0xd1]
   5174 ; CHECK-NEXT:    vpmaddubsw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x04,0xc1]
   5175 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   5176 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5177   %res = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 %x3)
   5178   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmaddubs.w.256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x2, i16 -1)
   5179   %res2 = add <16 x i16> %res, %res1
   5180   ret <16 x i16> %res2
   5181 }
   5182 
   5183 declare <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8>, <16 x i8>, i32, <8 x i16>, i8)
   5184 
   5185 define <8 x i16>@test_int_x86_avx512_mask_dbpsadbw_128(<16 x i8> %x0, <16 x i8> %x1, <8 x i16> %x3, i8 %x4) {
   5186 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_128:
   5187 ; CHECK:       ## BB#0:
   5188 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5189 ; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x42,0xd1,0x02]
   5190 ; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x42,0xd9,0x02]
   5191 ; CHECK-NEXT:    vdbpsadbw $2, %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf3,0x7d,0x08,0x42,0xc1,0x02]
   5192 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5193 ; CHECK-NEXT:    vpaddw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfd,0xc1]
   5194 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5195   %res = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 %x4)
   5196   %res1 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> zeroinitializer, i8 %x4)
   5197   %res2 = call <8 x i16> @llvm.x86.avx512.mask.dbpsadbw.128(<16 x i8> %x0, <16 x i8> %x1, i32 2, <8 x i16> %x3, i8 -1)
   5198   %res3 = add <8 x i16> %res, %res1
   5199   %res4 = add <8 x i16> %res2, %res3
   5200   ret <8 x i16> %res4
   5201 }
   5202 
   5203 declare <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8>, <32 x i8>, i32, <16 x i16>, i16)
   5204 
   5205 define <16 x i16>@test_int_x86_avx512_mask_dbpsadbw_256(<32 x i8> %x0, <32 x i8> %x1, <16 x i16> %x3, i16 %x4) {
   5206 ; CHECK-LABEL: test_int_x86_avx512_mask_dbpsadbw_256:
   5207 ; CHECK:       ## BB#0:
   5208 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5209 ; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x42,0xd1,0x02]
   5210 ; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x42,0xd9,0x02]
   5211 ; CHECK-NEXT:    vdbpsadbw $2, %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf3,0x7d,0x28,0x42,0xc1,0x02]
   5212 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5213 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5214 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5215   %res = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 %x4)
   5216   %res1 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> zeroinitializer, i16 %x4)
   5217   %res2 = call <16 x i16> @llvm.x86.avx512.mask.dbpsadbw.256(<32 x i8> %x0, <32 x i8> %x1, i32 2, <16 x i16> %x3, i16 -1)
   5218   %res3 = add <16 x i16> %res, %res1
   5219   %res4 = add <16 x i16> %res3, %res2
   5220   ret <16 x i16> %res4
   5221 }
   5222 
   5223 declare i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8>)
   5224 
   5225 define i16@test_int_x86_avx512_cvtb2mask_128(<16 x i8> %x0) {
   5226 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_128:
   5227 ; CHECK:       ## BB#0:
   5228 ; CHECK-NEXT:    vpmovb2m %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x29,0xc0]
   5229 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5230 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5231 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5232     %res = call i16 @llvm.x86.avx512.cvtb2mask.128(<16 x i8> %x0)
   5233     ret i16 %res
   5234 }
   5235 
   5236 declare i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8>)
   5237 
   5238 define i32@test_int_x86_avx512_cvtb2mask_256(<32 x i8> %x0) {
   5239 ; CHECK-LABEL: test_int_x86_avx512_cvtb2mask_256:
   5240 ; CHECK:       ## BB#0:
   5241 ; CHECK-NEXT:    vpmovb2m %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x29,0xc0]
   5242 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
   5243 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5244     %res = call i32 @llvm.x86.avx512.cvtb2mask.256(<32 x i8> %x0)
   5245     ret i32 %res
   5246 }
   5247 
   5248 declare i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16>)
   5249 
   5250 define i8@test_int_x86_avx512_cvtw2mask_128(<8 x i16> %x0) {
   5251 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_128:
   5252 ; CHECK:       ## BB#0:
   5253 ; CHECK-NEXT:    vpmovw2m %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x29,0xc0]
   5254 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5255 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   5256 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5257     %res = call i8 @llvm.x86.avx512.cvtw2mask.128(<8 x i16> %x0)
   5258     ret i8 %res
   5259 }
   5260 
   5261 declare i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16>)
   5262 
   5263 define i16@test_int_x86_avx512_cvtw2mask_256(<16 x i16> %x0) {
   5264 ; CHECK-LABEL: test_int_x86_avx512_cvtw2mask_256:
   5265 ; CHECK:       ## BB#0:
   5266 ; CHECK-NEXT:    vpmovw2m %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x29,0xc0]
   5267 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5268 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5269 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5270     %res = call i16 @llvm.x86.avx512.cvtw2mask.256(<16 x i16> %x0)
   5271     ret i16 %res
   5272 }
   5273 
   5274 declare <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16)
   5275 
   5276 define <16 x i8>@test_int_x86_avx512_cvtmask2b_128(i16 %x0) {
   5277 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_128:
   5278 ; CHECK:       ## BB#0:
   5279 ; CHECK-NEXT:    kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
   5280 ; CHECK-NEXT:    vpmovm2b %k0, %xmm0 ## encoding: [0x62,0xf2,0x7e,0x08,0x28,0xc0]
   5281 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5282   %res = call <16 x i8> @llvm.x86.avx512.cvtmask2b.128(i16 %x0)
   5283   ret <16 x i8> %res
   5284 }
   5285 
   5286 declare <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32)
   5287 
   5288 define <32 x i8>@test_int_x86_avx512_cvtmask2b_256(i32 %x0) {
   5289 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2b_256:
   5290 ; CHECK:       ## BB#0:
   5291 ; CHECK-NEXT:    kmovd %edi, %k0 ## encoding: [0xc5,0xfb,0x92,0xc7]
   5292 ; CHECK-NEXT:    vpmovm2b %k0, %ymm0 ## encoding: [0x62,0xf2,0x7e,0x28,0x28,0xc0]
   5293 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5294   %res = call <32 x i8> @llvm.x86.avx512.cvtmask2b.256(i32 %x0)
   5295   ret <32 x i8> %res
   5296 }
   5297 
   5298 declare <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8)
   5299 
   5300 define <8 x i16>@test_int_x86_avx512_cvtmask2w_128(i8 %x0) {
   5301 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_128:
   5302 ; CHECK:       ## BB#0:
   5303 ; CHECK-NEXT:    kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
   5304 ; CHECK-NEXT:    vpmovm2w %k0, %xmm0 ## encoding: [0x62,0xf2,0xfe,0x08,0x28,0xc0]
   5305 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5306   %res = call <8 x i16> @llvm.x86.avx512.cvtmask2w.128(i8 %x0)
   5307   ret <8 x i16> %res
   5308 }
   5309 
   5310 declare <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16)
   5311 
   5312 define <16 x i16>@test_int_x86_avx512_cvtmask2w_256(i16 %x0) {
   5313 ; CHECK-LABEL: test_int_x86_avx512_cvtmask2w_256:
   5314 ; CHECK:       ## BB#0:
   5315 ; CHECK-NEXT:    kmovw %edi, %k0 ## encoding: [0xc5,0xf8,0x92,0xc7]
   5316 ; CHECK-NEXT:    vpmovm2w %k0, %ymm0 ## encoding: [0x62,0xf2,0xfe,0x28,0x28,0xc0]
   5317 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5318   %res = call <16 x i16> @llvm.x86.avx512.cvtmask2w.256(i16 %x0)
   5319   ret <16 x i16> %res
   5320 }
   5321 
   5322 declare <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5323 
   5324 define <8 x i16>@test_int_x86_avx512_mask_psrl_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5325 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_128:
   5326 ; CHECK:       ## BB#0:
   5327 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5328 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xd1,0xd1]
   5329 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xd1,0xd9]
   5330 ; CHECK-NEXT:    vpsrlw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xd1,0xc1]
   5331 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   5332 ; CHECK-NEXT:    vpaddw %xmm0, %xmm3, %xmm0 ## encoding: [0x62,0xf1,0x65,0x08,0xfd,0xc0]
   5333 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5334   %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5335   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5336   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5337   %res3 = add <8 x i16> %res, %res1
   5338   %res4 = add <8 x i16> %res2, %res3
   5339   ret <8 x i16> %res4
   5340 }
   5341 
   5342 declare <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   5343 
   5344 define <16 x i16>@test_int_x86_avx512_mask_psrl_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5345 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_w_256:
   5346 ; CHECK:       ## BB#0:
   5347 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5348 ; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xd1,0xd1]
   5349 ; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xd1,0xd9]
   5350 ; CHECK-NEXT:    vpsrlw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xd1,0xc1]
   5351 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   5352 ; CHECK-NEXT:    vpaddw %ymm3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc3]
   5353 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5354   %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5355   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   5356   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5357   %res3 = add <16 x i16> %res, %res1
   5358   %res4 = add <16 x i16> %res3, %res2
   5359   ret <16 x i16> %res4
   5360 }
   5361 
   5362 declare <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   5363 
   5364 define <8 x i16>@test_int_x86_avx512_mask_psrl_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   5365 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_128:
   5366 ; CHECK:       ## BB#0:
   5367 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5368 ; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xd0,0x03]
   5369 ; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xd0,0x03]
   5370 ; CHECK-NEXT:    vpsrlw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xd0,0x03]
   5371 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5372 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   5373 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5374   %res = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   5375   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   5376   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrl.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   5377   %res3 = add <8 x i16> %res, %res1
   5378   %res4 = add <8 x i16> %res2, %res3
   5379   ret <8 x i16> %res4
   5380 }
   5381 
   5382 declare <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   5383 
   5384 define <16 x i16>@test_int_x86_avx512_mask_psrl_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   5385 ; CHECK-LABEL: test_int_x86_avx512_mask_psrl_wi_256:
   5386 ; CHECK:       ## BB#0:
   5387 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5388 ; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xd0,0x03]
   5389 ; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xd0,0x03]
   5390 ; CHECK-NEXT:    vpsrlw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xd0,0x03]
   5391 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5392 ; CHECK-NEXT:    vpaddw %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfd,0xc2]
   5393 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5394   %res = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   5395   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   5396   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrl.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   5397   %res3 = add <16 x i16> %res, %res1
   5398   %res4 = add <16 x i16> %res3, %res2
   5399   ret <16 x i16> %res4
   5400 }
   5401 
   5402 declare <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5403 
   5404 define <16 x i16>@test_int_x86_avx512_mask_psrlv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5405 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv16_hi:
   5406 ; CHECK:       ## BB#0:
   5407 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5408 ; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x10,0xd1]
   5409 ; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x10,0xd9]
   5410 ; CHECK-NEXT:    vpsrlvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x10,0xc1]
   5411 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5412 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5413 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5414   %res = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5415   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5416   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrlv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5417   %res3 = add <16 x i16> %res, %res1
   5418   %res4 = add <16 x i16> %res3, %res2
   5419   ret <16 x i16> %res4
   5420 }
   5421 
   5422 declare <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5423 
   5424 define <8 x i16>@test_int_x86_avx512_mask_psrlv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5425 ; CHECK-LABEL: test_int_x86_avx512_mask_psrlv8_hi:
   5426 ; CHECK:       ## BB#0:
   5427 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5428 ; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x10,0xd1]
   5429 ; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x10,0xd9]
   5430 ; CHECK-NEXT:    vpsrlvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x10,0xc1]
   5431 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5432 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5433 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5434   %res = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5435   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5436   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrlv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5437   %res3 = add <8 x i16> %res, %res1
   5438   %res4 = add <8 x i16> %res3, %res2
   5439   ret <8 x i16> %res4
   5440 }
   5441 
   5442 declare <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5443 
   5444 define <8 x i16>@test_int_x86_avx512_mask_psra_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5445 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_128:
   5446 ; CHECK:       ## BB#0:
   5447 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5448 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xe1,0xd1]
   5449 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xe1,0xd9]
   5450 ; CHECK-NEXT:    vpsraw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xe1,0xc1]
   5451 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5452 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5453 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5454   %res = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5455   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5456   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5457   %res3 = add <8 x i16> %res, %res1
   5458   %res4 = add <8 x i16> %res3, %res2
   5459   ret <8 x i16> %res4
   5460 }
   5461 
   5462 declare <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   5463 
   5464 define <8 x i16>@test_int_x86_avx512_mask_psra_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   5465 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_128:
   5466 ; CHECK:       ## BB#0:
   5467 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5468 ; CHECK-NEXT:    vpsraw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xe0,0x03]
   5469 ; CHECK-NEXT:    vpsraw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xe0,0x03]
   5470 ; CHECK-NEXT:    vpsraw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xe0,0x03]
   5471 ; CHECK-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca]
   5472 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5473 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5474   %res = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   5475   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   5476   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psra.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   5477   %res3 = add <8 x i16> %res, %res1
   5478   %res4 = add <8 x i16> %res3, %res2
   5479   ret <8 x i16> %res4
   5480 }
   5481 
   5482 declare <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   5483 
   5484 define <16 x i16>@test_int_x86_avx512_mask_psra_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5485 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_w_256:
   5486 ; CHECK:       ## BB#0:
   5487 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5488 ; CHECK-NEXT:    vpsraw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xe1,0xd1]
   5489 ; CHECK-NEXT:    vpsraw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xe1,0xd9]
   5490 ; CHECK-NEXT:    vpsraw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xe1,0xc1]
   5491 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5492 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5493 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5494   %res = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5495   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5496   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   5497   %res3 = add <16 x i16> %res, %res1
   5498   %res4 = add <16 x i16> %res3, %res2
   5499   ret <16 x i16> %res4
   5500 }
   5501 
   5502 declare <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   5503 
   5504 define <16 x i16>@test_int_x86_avx512_mask_psra_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   5505 ; CHECK-LABEL: test_int_x86_avx512_mask_psra_wi_256:
   5506 ; CHECK:       ## BB#0:
   5507 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5508 ; CHECK-NEXT:    vpsraw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xe0,0x03]
   5509 ; CHECK-NEXT:    vpsraw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xe0,0x03]
   5510 ; CHECK-NEXT:    vpsraw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xe0,0x03]
   5511 ; CHECK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca]
   5512 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5513 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5514   %res = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   5515   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   5516   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psra.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   5517   %res3 = add <16 x i16> %res, %res1
   5518   %res4 = add <16 x i16> %res3, %res2
   5519   ret <16 x i16> %res4
   5520 }
   5521 
   5522 declare <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5523 
   5524 define <16 x i16>@test_int_x86_avx512_mask_psrav16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5525 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav16_hi:
   5526 ; CHECK:       ## BB#0:
   5527 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5528 ; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x11,0xd1]
   5529 ; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x11,0xd9]
   5530 ; CHECK-NEXT:    vpsravw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x11,0xc1]
   5531 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5532 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5533 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5534   %res = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5535   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5536   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psrav16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5537   %res3 = add <16 x i16> %res, %res1
   5538   %res4 = add <16 x i16> %res3, %res2
   5539   ret <16 x i16> %res4
   5540 }
   5541 
   5542 declare <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5543 
   5544 define <8 x i16>@test_int_x86_avx512_mask_psrav8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5545 ; CHECK-LABEL: test_int_x86_avx512_mask_psrav8_hi:
   5546 ; CHECK:       ## BB#0:
   5547 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5548 ; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x11,0xd1]
   5549 ; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x11,0xd9]
   5550 ; CHECK-NEXT:    vpsravw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x11,0xc1]
   5551 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5552 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5553 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5554   %res = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5555   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5556   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psrav8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5557   %res3 = add <8 x i16> %res, %res1
   5558   %res4 = add <8 x i16> %res3, %res2
   5559   ret <8 x i16> %res4
   5560 }
   5561 
   5562 
   5563 declare <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5564 
   5565 define <8 x i16>@test_int_x86_avx512_mask_psll_w_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5566 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_128:
   5567 ; CHECK:       ## BB#0:
   5568 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5569 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xf1,0xd1]
   5570 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xf1,0xd9]
   5571 ; CHECK-NEXT:    vpsllw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xf1,0xc1]
   5572 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5573 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5574 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5575   %res = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5576   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5577   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.w.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5578   %res3 = add <8 x i16> %res, %res1
   5579   %res4 = add <8 x i16> %res3, %res2
   5580   ret <8 x i16> %res4
   5581 }
   5582 
   5583 declare <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16>, <8 x i16>, <16 x i16>, i16)
   5584 
   5585 define <16 x i16>@test_int_x86_avx512_mask_psll_w_256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5586 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_w_256:
   5587 ; CHECK:       ## BB#0:
   5588 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5589 ; CHECK-NEXT:    vpsllw %xmm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xf1,0xd1]
   5590 ; CHECK-NEXT:    vpsllw %xmm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xf1,0xd9]
   5591 ; CHECK-NEXT:    vpsllw %xmm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xf1,0xc1]
   5592 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5593 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5594 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5595   %res = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5596   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5597   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.w.256(<16 x i16> %x0, <8 x i16> %x1, <16 x i16> %x2, i16 -1)
   5598   %res3 = add <16 x i16> %res, %res1
   5599   %res4 = add <16 x i16> %res3, %res2
   5600   ret <16 x i16> %res4
   5601 }
   5602 
   5603 declare <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16>, i32, <8 x i16>, i8)
   5604 
   5605 define <8 x i16>@test_int_x86_avx512_mask_psll_wi_128(<8 x i16> %x0, i32 %x1, <8 x i16> %x2, i8 %x3) {
   5606 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_128:
   5607 ; CHECK:       ## BB#0:
   5608 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5609 ; CHECK-NEXT:    vpsllw $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x09,0x71,0xf0,0x03]
   5610 ; CHECK-NEXT:    vpsllw $3, %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0x89,0x71,0xf0,0x03]
   5611 ; CHECK-NEXT:    vpsllw $3, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x71,0xf0,0x03]
   5612 ; CHECK-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca]
   5613 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5614 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5615   %res = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 %x3)
   5616   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> zeroinitializer, i8 %x3)
   5617   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psll.wi.128(<8 x i16> %x0, i32 3, <8 x i16> %x2, i8 -1)
   5618   %res3 = add <8 x i16> %res, %res1
   5619   %res4 = add <8 x i16> %res3, %res2
   5620   ret <8 x i16> %res4
   5621 }
   5622 
   5623 declare <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16>, i32, <16 x i16>, i16)
   5624 
   5625 define <16 x i16>@test_int_x86_avx512_mask_psll_wi_256(<16 x i16> %x0, i32 %x1, <16 x i16> %x2, i16 %x3) {
   5626 ; CHECK-LABEL: test_int_x86_avx512_mask_psll_wi_256:
   5627 ; CHECK:       ## BB#0:
   5628 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   5629 ; CHECK-NEXT:    vpsllw $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x75,0x29,0x71,0xf0,0x03]
   5630 ; CHECK-NEXT:    vpsllw $3, %ymm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf1,0x6d,0xa9,0x71,0xf0,0x03]
   5631 ; CHECK-NEXT:    vpsllw $3, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x71,0xf0,0x03]
   5632 ; CHECK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca]
   5633 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5634 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5635   %res = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 %x3)
   5636   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> zeroinitializer, i16 %x3)
   5637   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psll.wi.256(<16 x i16> %x0, i32 3, <16 x i16> %x2, i16 -1)
   5638   %res3 = add <16 x i16> %res, %res1
   5639   %res4 = add <16 x i16> %res3, %res2
   5640   ret <16 x i16> %res4
   5641 }
   5642 
   5643 declare <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5644 
   5645 define <16 x i16>@test_int_x86_avx512_mask_psllv16_hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5646 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv16_hi:
   5647 ; CHECK:       ## BB#0:
   5648 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5649 ; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x12,0xd1]
   5650 ; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x12,0xd9]
   5651 ; CHECK-NEXT:    vpsllvw %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf2,0xfd,0x28,0x12,0xc1]
   5652 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5653 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5654 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5655   %res = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5656   %res1 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5657   %res2 = call <16 x i16> @llvm.x86.avx512.mask.psllv16.hi(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5658   %res3 = add <16 x i16> %res, %res1
   5659   %res4 = add <16 x i16> %res3, %res2
   5660   ret <16 x i16> %res4
   5661 }
   5662 
   5663 declare <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5664 
   5665 define <8 x i16>@test_int_x86_avx512_mask_psllv8_hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5666 ; CHECK-LABEL: test_int_x86_avx512_mask_psllv8_hi:
   5667 ; CHECK:       ## BB#0:
   5668 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5669 ; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x12,0xd1]
   5670 ; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x12,0xd9]
   5671 ; CHECK-NEXT:    vpsllvw %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf2,0xfd,0x08,0x12,0xc1]
   5672 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5673 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5674 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5675   %res = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5676   %res1 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5677   %res2 = call <8 x i16> @llvm.x86.avx512.mask.psllv8.hi(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5678   %res3 = add <8 x i16> %res, %res1
   5679   %res4 = add <8 x i16> %res3, %res2
   5680   ret <8 x i16> %res4
   5681 }
   5682 
   5683 declare <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8>, <8 x i16>, i8)
   5684 
   5685 define <8 x i16>@test_int_x86_avx512_mask_pmovzxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
   5686 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_128:
   5687 ; CHECK:       ## BB#0:
   5688 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5689 ; CHECK-NEXT:    vpmovzxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x30,0xc8]
   5690 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   5691 ; CHECK-NEXT:    vpmovzxbw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x30,0xd0]
   5692 ; CHECK-NEXT:    ## xmm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   5693 ; CHECK-NEXT:    vpmovzxbw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x30,0xc0]
   5694 ; CHECK-NEXT:    ## xmm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   5695 ; CHECK-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca]
   5696 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5697 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5698   %res = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
   5699   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
   5700   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovzxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
   5701   %res3 = add <8 x i16> %res, %res1
   5702   %res4 = add <8 x i16> %res3, %res2
   5703   ret <8 x i16> %res4
   5704 }
   5705 
   5706 declare <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8>, <16 x i16>, i16)
   5707 
   5708 define <16 x i16>@test_int_x86_avx512_mask_pmovzxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
   5709 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovzxb_w_256:
   5710 ; CHECK:       ## BB#0:
   5711 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5712 ; CHECK-NEXT:    vpmovzxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x30,0xc8]
   5713 ; CHECK-NEXT:    ## ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   5714 ; CHECK-NEXT:    vpmovzxbw %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x30,0xd0]
   5715 ; CHECK-NEXT:    ## ymm2 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   5716 ; CHECK-NEXT:    vpmovzxbw %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x30,0xc0]
   5717 ; CHECK-NEXT:    ## ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero,xmm0[8],zero,xmm0[9],zero,xmm0[10],zero,xmm0[11],zero,xmm0[12],zero,xmm0[13],zero,xmm0[14],zero,xmm0[15],zero
   5718 ; CHECK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca]
   5719 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5720 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5721   %res = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
   5722   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
   5723   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovzxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
   5724   %res3 = add <16 x i16> %res, %res1
   5725   %res4 = add <16 x i16> %res3, %res2
   5726   ret <16 x i16> %res4
   5727 }
   5728 
   5729 
   5730 declare <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8>, <8 x i16>, i8)
   5731 
   5732 define <8 x i16>@test_int_x86_avx512_mask_pmovsxb_w_128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2) {
   5733 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_128:
   5734 ; CHECK:       ## BB#0:
   5735 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5736 ; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x20,0xc8]
   5737 ; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x20,0xd0]
   5738 ; CHECK-NEXT:    vpmovsxbw %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x20,0xc0]
   5739 ; CHECK-NEXT:    vpaddw %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xca]
   5740 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5741 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5742   %res = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 %x2)
   5743   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> zeroinitializer, i8 %x2)
   5744   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pmovsxb.w.128(<16 x i8> %x0, <8 x i16> %x1, i8 -1)
   5745   %res3 = add <8 x i16> %res, %res1
   5746   %res4 = add <8 x i16> %res3, %res2
   5747   ret <8 x i16> %res4
   5748 }
   5749 
   5750 declare <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8>, <16 x i16>, i16)
   5751 
   5752 define <16 x i16>@test_int_x86_avx512_mask_pmovsxb_w_256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2) {
   5753 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxb_w_256:
   5754 ; CHECK:       ## BB#0:
   5755 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5756 ; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x20,0xc8]
   5757 ; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x20,0xd0]
   5758 ; CHECK-NEXT:    vpmovsxbw %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x20,0xc0]
   5759 ; CHECK-NEXT:    vpaddw %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xca]
   5760 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5761 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5762   %res = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 %x2)
   5763   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> zeroinitializer, i16 %x2)
   5764   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pmovsxb.w.256(<16 x i8> %x0, <16 x i16> %x1, i16 -1)
   5765   %res3 = add <16 x i16> %res, %res1
   5766   %res4 = add <16 x i16> %res3, %res2
   5767   ret <16 x i16> %res4
   5768 }
   5769 
   5770 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32>, <2 x i64>, i8)
   5771 
   5772 define <2 x i64>@test_int_x86_avx512_mask_pmovsxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
   5773 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_128:
   5774 ; CHECK:       ## BB#0:
   5775 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5776 ; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x25,0xc8]
   5777 ; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x25,0xd0]
   5778 ; CHECK-NEXT:    vpmovsxdq %xmm0, %xmm0 ## encoding: [0x62,0xf2,0x7d,0x08,0x25,0xc0]
   5779 ; CHECK-NEXT:    vpaddq %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xca]
   5780 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
   5781 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5782   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
   5783   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
   5784   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
   5785   %res3 = add <2 x i64> %res, %res1
   5786   %res4 = add <2 x i64> %res3, %res2
   5787   ret <2 x i64> %res4
   5788 }
   5789 
   5790 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32>, <4 x i64>, i8)
   5791 
   5792 define <4 x i64>@test_int_x86_avx512_mask_pmovsxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
   5793 ; CHECK-LABEL: test_int_x86_avx512_mask_pmovsxd_q_256:
   5794 ; CHECK:       ## BB#0:
   5795 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5796 ; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x25,0xc8]
   5797 ; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm2 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x25,0xd0]
   5798 ; CHECK-NEXT:    vpmovsxdq %xmm0, %ymm0 ## encoding: [0x62,0xf2,0x7d,0x28,0x25,0xc0]
   5799 ; CHECK-NEXT:    vpaddq %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xca]
   5800 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
   5801 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5802   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
   5803   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
   5804   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
   5805   %res3 = add <4 x i64> %res, %res1
   5806   %res4 = add <4 x i64> %res3, %res2
   5807   ret <4 x i64> %res4
   5808 }
   5809 
   5810 declare <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16>, <8 x i16>, <8 x i16>, i8)
   5811 
   5812 define <8 x i16>@test_int_x86_avx512_mask_permvar_hi_128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3) {
   5813 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_128:
   5814 ; CHECK:       ## BB#0:
   5815 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5816 ; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x09,0x8d,0xd0]
   5817 ; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0x89,0x8d,0xd8]
   5818 ; CHECK-NEXT:    vpermw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf2,0xf5,0x08,0x8d,0xc0]
   5819 ; CHECK-NEXT:    vpaddw %xmm3, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xcb]
   5820 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   5821 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5822   %res = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 %x3)
   5823   %res1 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> zeroinitializer, i8 %x3)
   5824   %res2 = call <8 x i16> @llvm.x86.avx512.mask.permvar.hi.128(<8 x i16> %x0, <8 x i16> %x1, <8 x i16> %x2, i8 -1)
   5825   %res3 = add <8 x i16> %res, %res1
   5826   %res4 = add <8 x i16> %res3, %res2
   5827   ret <8 x i16> %res4
   5828 }
   5829 
   5830 declare <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16>, <16 x i16>, <16 x i16>, i16)
   5831 
   5832 define <16 x i16>@test_int_x86_avx512_mask_permvar_hi_256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3) {
   5833 ; CHECK-LABEL: test_int_x86_avx512_mask_permvar_hi_256:
   5834 ; CHECK:       ## BB#0:
   5835 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5836 ; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm2 {%k1} ## encoding: [0x62,0xf2,0xf5,0x29,0x8d,0xd0]
   5837 ; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm3 {%k1} {z} ## encoding: [0x62,0xf2,0xf5,0xa9,0x8d,0xd8]
   5838 ; CHECK-NEXT:    vpermw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf2,0xf5,0x28,0x8d,0xc0]
   5839 ; CHECK-NEXT:    vpaddw %ymm3, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xcb]
   5840 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   5841 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5842   %res = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 %x3)
   5843   %res1 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> zeroinitializer, i16 %x3)
   5844   %res2 = call <16 x i16> @llvm.x86.avx512.mask.permvar.hi.256(<16 x i16> %x0, <16 x i16> %x1, <16 x i16> %x2, i16 -1)
   5845   %res3 = add <16 x i16> %res, %res1
   5846   %res4 = add <16 x i16> %res3, %res2
   5847   ret <16 x i16> %res4
   5848 }
   5849 
   5850 declare i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8>, <16 x i8>, i16)
   5851 
   5852 define i16@test_int_x86_avx512_ptestm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   5853 ; CHECK-LABEL: test_int_x86_avx512_ptestm_b_128:
   5854 ; CHECK:       ## BB#0:
   5855 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5856 ; CHECK-NEXT:    vptestmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x26,0xc1]
   5857 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5858 ; CHECK-NEXT:    vptestmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x08,0x26,0xc1]
   5859 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5860 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5861 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5862 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5863   %res = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   5864   %res1 = call i16 @llvm.x86.avx512.ptestm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
   5865   %res2 = add i16 %res, %res1
   5866   ret i16 %res2
   5867 }
   5868 
   5869 declare i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8>, <32 x i8>, i32)
   5870 
   5871 define i32@test_int_x86_avx512_ptestm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   5872 ; CHECK-LABEL: test_int_x86_avx512_ptestm_b_256:
   5873 ; CHECK:       ## BB#0:
   5874 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   5875 ; CHECK-NEXT:    vptestmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x26,0xc1]
   5876 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
   5877 ; CHECK-NEXT:    vptestmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7d,0x28,0x26,0xc1]
   5878 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
   5879 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5880 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5881   %res = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   5882   %res1 = call i32 @llvm.x86.avx512.ptestm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
   5883   %res2 = add i32 %res, %res1
   5884   ret i32 %res2
   5885 }
   5886 
   5887 declare i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16>, <8 x i16>, i8)
   5888 
   5889 define i8@test_int_x86_avx512_ptestm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   5890 ; CHECK-LABEL: test_int_x86_avx512_ptestm_w_128:
   5891 ; CHECK:       ## BB#0:
   5892 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5893 ; CHECK-NEXT:    vptestmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x26,0xc1]
   5894 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5895 ; CHECK-NEXT:    vptestmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x26,0xc1]
   5896 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5897 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   5898 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   5899 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5900   %res = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   5901   %res1 = call i8 @llvm.x86.avx512.ptestm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
   5902   %res2 = add i8 %res, %res1
   5903   ret i8 %res2
   5904 }
   5905 
   5906 declare i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16>, <16 x i16>, i16)
   5907 
   5908 define i16@test_int_x86_avx512_ptestm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   5909 ; CHECK-LABEL: test_int_x86_avx512_ptestm_w_256:
   5910 ; CHECK:       ## BB#0:
   5911 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5912 ; CHECK-NEXT:    vptestmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x26,0xc1]
   5913 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5914 ; CHECK-NEXT:    vptestmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x26,0xc1]
   5915 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5916 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5917 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5918 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5919   %res = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   5920   %res1 = call i16 @llvm.x86.avx512.ptestm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
   5921   %res2 = add i16 %res, %res1
   5922   ret i16 %res2
   5923 }
   5924 
   5925 declare i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8>, <16 x i8>, i16)
   5926 
   5927 define i16@test_int_x86_avx512_ptestnm_b_128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2) {
   5928 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_128:
   5929 ; CHECK:       ## BB#0:
   5930 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5931 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x09,0x26,0xc1]
   5932 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5933 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x08,0x26,0xc1]
   5934 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5935 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5936 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5937 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5938   %res = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16 %x2)
   5939   %res1 = call i16 @llvm.x86.avx512.ptestnm.b.128(<16 x i8> %x0, <16 x i8> %x1, i16-1)
   5940   %res2 = add i16 %res, %res1
   5941   ret i16 %res2
   5942 }
   5943 
   5944 declare i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8>, <32 x i8>, i32)
   5945 
   5946 define i32@test_int_x86_avx512_ptestnm_b_256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2) {
   5947 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_b_256:
   5948 ; CHECK:       ## BB#0:
   5949 ; CHECK-NEXT:    kmovd %edi, %k1 ## encoding: [0xc5,0xfb,0x92,0xcf]
   5950 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0x7e,0x29,0x26,0xc1]
   5951 ; CHECK-NEXT:    kmovd %k0, %ecx ## encoding: [0xc5,0xfb,0x93,0xc8]
   5952 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0x7e,0x28,0x26,0xc1]
   5953 ; CHECK-NEXT:    kmovd %k0, %eax ## encoding: [0xc5,0xfb,0x93,0xc0]
   5954 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5955 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5956   %res = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32 %x2)
   5957   %res1 = call i32 @llvm.x86.avx512.ptestnm.b.256(<32 x i8> %x0, <32 x i8> %x1, i32-1)
   5958   %res2 = add i32 %res, %res1
   5959   ret i32 %res2
   5960 }
   5961 
   5962 declare i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16>, <8 x i16>, i8 %x2)
   5963 
   5964 define i8@test_int_x86_avx512_ptestnm_w_128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2) {
   5965 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_128:
   5966 ; CHECK:       ## BB#0:
   5967 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5968 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x09,0x26,0xc1]
   5969 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5970 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x08,0x26,0xc1]
   5971 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5972 ; CHECK-NEXT:    addb %cl, %al ## encoding: [0x00,0xc8]
   5973 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   5974 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5975   %res = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8 %x2)
   5976   %res1 = call i8 @llvm.x86.avx512.ptestnm.w.128(<8 x i16> %x0, <8 x i16> %x1, i8-1)
   5977   %res2 = add i8 %res, %res1
   5978   ret i8 %res2
   5979 }
   5980 
   5981 declare i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16>, <16 x i16>, i16 %x2)
   5982 
   5983 define i16@test_int_x86_avx512_ptestnm_w_256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2) {
   5984 ; CHECK-LABEL: test_int_x86_avx512_ptestnm_w_256:
   5985 ; CHECK:       ## BB#0:
   5986 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   5987 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfe,0x29,0x26,0xc1]
   5988 ; CHECK-NEXT:    kmovw %k0, %ecx ## encoding: [0xc5,0xf8,0x93,0xc8]
   5989 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfe,0x28,0x26,0xc1]
   5990 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   5991 ; CHECK-NEXT:    addl %ecx, %eax ## encoding: [0x01,0xc8]
   5992 ; CHECK-NEXT:    ## kill: %AX<def> %AX<kill> %EAX<kill>
   5993 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   5994   %res = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16 %x2)
   5995   %res1 = call i16 @llvm.x86.avx512.ptestnm.w.256(<16 x i16> %x0, <16 x i16> %x1, i16-1)
   5996   %res2 = add i16 %res, %res1
   5997   ret i16 %res2
   5998 }
   5999 
   6000 declare <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8, <32 x i8>, i32)
   6001 
   6002 define <32 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_256(i8 %x0, <32 x i8> %x1, i32 %mask) {
   6003 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_256:
   6004 ; CHECK:       ## BB#0:
   6005 ; CHECK-NEXT:    kmovd %esi, %k1 ## encoding: [0xc5,0xfb,0x92,0xce]
   6006 ; CHECK-NEXT:    vpbroadcastb %dil, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7a,0xc7]
   6007 ; CHECK-NEXT:    vpbroadcastb %dil, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7a,0xcf]
   6008 ; CHECK-NEXT:    vpbroadcastb %dil, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7a,0xd7]
   6009 ; CHECK-NEXT:    vpaddb %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfc,0xc0]
   6010 ; CHECK-NEXT:    vpaddb %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfc,0xc0]
   6011 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   6012   %res = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 -1)
   6013   %res1 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> %x1, i32 %mask)
   6014   %res2 = call <32 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.256(i8 %x0, <32 x i8> zeroinitializer, i32 %mask)
   6015   %res3 = add <32 x i8> %res, %res1
   6016   %res4 = add <32 x i8> %res2, %res3
   6017   ret <32 x i8> %res4
   6018 }
   6019 
   6020 declare <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8, <16 x i8>, i16)
   6021 
   6022 define <16 x i8>@test_int_x86_avx512_mask_pbroadcast_b_gpr_128(i8 %x0, <16 x i8> %x1, i16 %mask) {
   6023 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_b_gpr_128:
   6024 ; CHECK:       ## BB#0:
   6025 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6026 ; CHECK-NEXT:    vpbroadcastb %dil, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7a,0xc7]
   6027 ; CHECK-NEXT:    vpbroadcastb %dil, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7a,0xcf]
   6028 ; CHECK-NEXT:    vpbroadcastb %dil, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7a,0xd7]
   6029 ; CHECK-NEXT:    vpaddb %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfc,0xc0]
   6030 ; CHECK-NEXT:    vpaddb %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfc,0xc0]
   6031 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   6032   %res = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 -1)
   6033   %res1 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> %x1, i16 %mask)
   6034   %res2 = call <16 x i8> @llvm.x86.avx512.mask.pbroadcast.b.gpr.128(i8 %x0, <16 x i8> zeroinitializer, i16 %mask)
   6035   %res3 = add <16 x i8> %res, %res1
   6036   %res4 = add <16 x i8> %res2, %res3
   6037   ret <16 x i8> %res4
   6038 }
   6039 
   6040 declare <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16, <16 x i16>, i16)
   6041 
   6042 define <16 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_256(i16 %x0, <16 x i16> %x1, i16 %mask) {
   6043 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_256:
   6044 ; CHECK:       ## BB#0:
   6045 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6046 ; CHECK-NEXT:    vpbroadcastw %di, %ymm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x7b,0xc7]
   6047 ; CHECK-NEXT:    vpbroadcastw %di, %ymm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x7b,0xcf]
   6048 ; CHECK-NEXT:    vpbroadcastw %di, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x7b,0xd7]
   6049 ; CHECK-NEXT:    vpaddw %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfd,0xc0]
   6050 ; CHECK-NEXT:    vpaddw %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfd,0xc0]
   6051 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   6052   %res = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 -1)
   6053   %res1 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> %x1, i16 %mask)
   6054   %res2 = call <16 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.256(i16 %x0, <16 x i16> zeroinitializer, i16 %mask)
   6055   %res3 = add <16 x i16> %res, %res1
   6056   %res4 = add <16 x i16> %res2, %res3
   6057   ret <16 x i16> %res4
   6058 }
   6059 
   6060 declare <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16, <8 x i16>, i8)
   6061 
   6062 define <8 x i16>@test_int_x86_avx512_mask_pbroadcast_w_gpr_128(i16 %x0, <8 x i16> %x1, i8 %mask) {
   6063 ; CHECK-LABEL: test_int_x86_avx512_mask_pbroadcast_w_gpr_128:
   6064 ; CHECK:       ## BB#0:
   6065 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   6066 ; CHECK-NEXT:    vpbroadcastw %di, %xmm0 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x7b,0xc7]
   6067 ; CHECK-NEXT:    vpbroadcastw %di, %xmm1 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x7b,0xcf]
   6068 ; CHECK-NEXT:    vpbroadcastw %di, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x7b,0xd7]
   6069 ; CHECK-NEXT:    vpaddw %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfd,0xc0]
   6070 ; CHECK-NEXT:    vpaddw %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfd,0xc0]
   6071 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   6072   %res = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 -1)
   6073   %res1 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> %x1, i8 %mask)
   6074   %res2 = call <8 x i16> @llvm.x86.avx512.mask.pbroadcast.w.gpr.128(i16 %x0, <8 x i16> zeroinitializer, i8 %mask)
   6075   %res3 = add <8 x i16> %res, %res1
   6076   %res4 = add <8 x i16> %res2, %res3
   6077   ret <8 x i16> %res4
   6078 }
   6079