Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=knl -mattr=+avx512vl --show-mc-encoding| FileCheck %s
      3 
      4 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
      5 
      6 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
      7 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_256:
      8 ; CHECK:       ## BB#0:
      9 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
     10 ; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
     11 ; CHECK-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
     12 ; CHECK-NEXT:    vpaddd (%rsi){1to8}, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e]
     13 ; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc1]
     14 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     15   %y_32  = load i32, i32 * %y_ptr
     16   %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
     17   %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
     18   %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
     19   %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
     20   %res3 = add <8 x i32> %res, %res1
     21   %res4 = add <8 x i32> %res2, %res3
     22   ret <8 x i32> %res4
     23 }
     24 
     25 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
     26 
     27 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
     28 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastd_128:
     29 ; CHECK:       ## BB#0:
     30 ; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x58,0xd0]
     31 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
     32 ; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
     33 ; CHECK-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
     34 ; CHECK-NEXT:    vpaddd %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc9]
     35 ; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc1]
     36 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     37   %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
     38   %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
     39   %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
     40   %res3 = add <4 x i32> %res, %res1
     41   %res4 = add <4 x i32> %res2, %res3
     42   ret <4 x i32> %res4
     43 }
     44 
     45 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
     46 
     47 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
     48 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_256:
     49 ; CHECK:       ## BB#0:
     50 ; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x59,0xd0]
     51 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
     52 ; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
     53 ; CHECK-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
     54 ; CHECK-NEXT:    vpaddq %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc9]
     55 ; CHECK-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc1]
     56 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     57   %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
     58   %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
     59   %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
     60   %res3 = add <4 x i64> %res, %res1
     61   %res4 = add <4 x i64> %res2, %res3
     62   ret <4 x i64> %res4
     63 }
     64 
     65 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
     66 
     67 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
     68 ; CHECK-LABEL: test_int_x86_avx512_pbroadcastq_128:
     69 ; CHECK:       ## BB#0:
     70 ; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm2 ## encoding: [0x62,0xf2,0xfd,0x08,0x59,0xd0]
     71 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
     72 ; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
     73 ; CHECK-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
     74 ; CHECK-NEXT:    vpaddq %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc9]
     75 ; CHECK-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xd4,0xc1]
     76 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     77   %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
     78   %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
     79   %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
     80   %res3 = add <2 x i64> %res, %res1
     81   %res4 = add <2 x i64> %res2, %res3
     82   ret <2 x i64> %res4
     83 }
     84 
     85 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
     86 
     87 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
     88 ; CHECK-LABEL: test_x86_vbroadcast_sd_pd_256:
     89 ; CHECK:       ## BB#0:
     90 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm2 ## encoding: [0x62,0xf2,0xfd,0x28,0x19,0xd0]
     91 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
     92 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
     93 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
     94 ; CHECK-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc9]
     95 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
     96 ; CHECK-NEXT:    retq ## encoding: [0xc3]
     97   %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
     98   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
     99   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
    100   %res3 = fadd <4 x double> %res, %res1
    101   %res4 = fadd <4 x double> %res2, %res3
    102   ret <4 x double> %res4
    103 }
    104 
    105 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
    106 
    107 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
    108 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_256:
    109 ; CHECK:       ## BB#0:
    110 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm2 ## encoding: [0x62,0xf2,0x7d,0x28,0x18,0xd0]
    111 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    112 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
    113 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
    114 ; CHECK-NEXT:    vaddps %ymm1, %ymm2, %ymm1 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc9]
    115 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
    116 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    117   %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
    118   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
    119   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
    120   %res3 = fadd <8 x float> %res, %res1
    121   %res4 = fadd <8 x float> %res2, %res3
    122   ret <8 x float> %res4
    123 }
    124 
    125 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
    126 
    127 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
    128 ; CHECK-LABEL: test_x86_vbroadcast_ss_ps_128:
    129 ; CHECK:       ## BB#0:
    130 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm2 ## encoding: [0x62,0xf2,0x7d,0x08,0x18,0xd0]
    131 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    132 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
    133 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
    134 ; CHECK-NEXT:    vaddps %xmm1, %xmm2, %xmm1 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc9]
    135 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
    136 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    137   %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
    138   %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
    139   %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
    140   %res3 = fadd <4 x float> %res, %res1
    141   %res4 = fadd <4 x float> %res2, %res3
    142   ret <4 x float> %res4
    143 }
    144 
    145 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
    146 
    147 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
    148 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_128:
    149 ; CHECK:       ## BB#0:
    150 ; CHECK-NEXT:    vmovsldup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x12,0xd0]
    151 ; CHECK-NEXT:    ## xmm2 = xmm0[0,0,2,2]
    152 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    153 ; CHECK-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
    154 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0,0,2,2]
    155 ; CHECK-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
    156 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[0,0,2,2]
    157 ; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
    158 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
    159 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    160   %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
    161   %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
    162   %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
    163   %res3 = fadd <4 x float> %res, %res1
    164   %res4 = fadd <4 x float> %res2, %res3
    165   ret <4 x float> %res4
    166 }
    167 
    168 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
    169 
    170 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
    171 ; CHECK-LABEL: test_int_x86_avx512_mask_movsldup_256:
    172 ; CHECK:       ## BB#0:
    173 ; CHECK-NEXT:    vmovsldup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x12,0xd0]
    174 ; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2,4,4,6,6]
    175 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    176 ; CHECK-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
    177 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
    178 ; CHECK-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
    179 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
    180 ; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
    181 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
    182 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    183   %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
    184   %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
    185   %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
    186   %res3 = fadd <8 x float> %res, %res1
    187   %res4 = fadd <8 x float> %res2, %res3
    188   ret <8 x float> %res4
    189 }
    190 
    191 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
    192 
    193 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
    194 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_128:
    195 ; CHECK:       ## BB#0:
    196 ; CHECK-NEXT:    vmovshdup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7e,0x08,0x16,0xd0]
    197 ; CHECK-NEXT:    ## xmm2 = xmm0[1,1,3,3]
    198 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    199 ; CHECK-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
    200 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[1,1,3,3]
    201 ; CHECK-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
    202 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[1,1,3,3]
    203 ; CHECK-NEXT:    vaddps %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xca]
    204 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x58,0xc1]
    205 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    206   %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
    207   %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
    208   %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
    209   %res3 = fadd <4 x float> %res, %res1
    210   %res4 = fadd <4 x float> %res2, %res3
    211   ret <4 x float> %res4
    212 }
    213 
    214 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
    215 
    216 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
    217 ; CHECK-LABEL: test_int_x86_avx512_mask_movshdup_256:
    218 ; CHECK:       ## BB#0:
    219 ; CHECK-NEXT:    vmovshdup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7e,0x28,0x16,0xd0]
    220 ; CHECK-NEXT:    ## ymm2 = ymm0[1,1,3,3,5,5,7,7]
    221 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    222 ; CHECK-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
    223 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
    224 ; CHECK-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
    225 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
    226 ; CHECK-NEXT:    vaddps %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xca]
    227 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc1]
    228 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    229   %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
    230   %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
    231   %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
    232   %res3 = fadd <8 x float> %res, %res1
    233   %res4 = fadd <8 x float> %res2, %res3
    234   ret <8 x float> %res4
    235 }
    236 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
    237 
    238 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
    239 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_128:
    240 ; CHECK:       ## BB#0:
    241 ; CHECK-NEXT:    vmovddup %xmm0, %xmm2 ## encoding: [0x62,0xf1,0xff,0x08,0x12,0xd0]
    242 ; CHECK-NEXT:    ## xmm2 = xmm0[0,0]
    243 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    244 ; CHECK-NEXT:    vmovddup %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
    245 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[0,0]
    246 ; CHECK-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
    247 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[0,0]
    248 ; CHECK-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xca]
    249 ; CHECK-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc1]
    250 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    251   %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
    252   %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
    253   %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
    254   %res3 = fadd <2 x double> %res, %res1
    255   %res4 = fadd <2 x double> %res2, %res3
    256   ret <2 x double> %res4
    257 }
    258 
    259 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
    260 
    261 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
    262 ; CHECK-LABEL: test_int_x86_avx512_mask_movddup_256:
    263 ; CHECK:       ## BB#0:
    264 ; CHECK-NEXT:    vmovddup %ymm0, %ymm2 ## encoding: [0x62,0xf1,0xff,0x28,0x12,0xd0]
    265 ; CHECK-NEXT:    ## ymm2 = ymm0[0,0,2,2]
    266 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    267 ; CHECK-NEXT:    vmovddup %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
    268 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,0,2,2]
    269 ; CHECK-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
    270 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,0,2,2]
    271 ; CHECK-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xca]
    272 ; CHECK-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc1]
    273 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    274   %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
    275   %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
    276   %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
    277   %res3 = fadd <4 x double> %res, %res1
    278   %res4 = fadd <4 x double> %res2, %res3
    279   ret <4 x double> %res4
    280 }
    281 
    282 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
    283 
    284 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
    285 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
    286 ; CHECK:       ## BB#0:
    287 ; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x05,0xd0,0x06]
    288 ; CHECK-NEXT:    ## ymm2 = ymm0[0,1,3,2]
    289 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    290 ; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
    291 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[0,1,3,2]
    292 ; CHECK-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
    293 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[0,1,3,2]
    294 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
    295 ; CHECK-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc0]
    296 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    297   %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
    298   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
    299   %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
    300   %res3 = fadd <4 x double> %res, %res1
    301   %res4 = fadd <4 x double> %res2, %res3
    302   ret <4 x double> %res4
    303 }
    304 
    305 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
    306 
    307 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
    308 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
    309 ; CHECK:       ## BB#0:
    310 ; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0xfd,0x08,0x05,0xd0,0x01]
    311 ; CHECK-NEXT:    ## xmm2 = xmm0[1,0]
    312 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    313 ; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
    314 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[1,0]
    315 ; CHECK-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
    316 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[1,0]
    317 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
    318 ; CHECK-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x58,0xc2]
    319 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    320   %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
    321   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
    322   %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
    323   %res3 = fadd <2 x double> %res, %res1
    324   %res4 = fadd <2 x double> %res3, %res2
    325   ret <2 x double> %res4
    326 }
    327 
    328 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
    329 
    330 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
    331 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
    332 ; CHECK:       ## BB#0:
    333 ; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0x7d,0x28,0x04,0xd0,0x16]
    334 ; CHECK-NEXT:    ## ymm2 = ymm0[2,1,1,0,6,5,5,4]
    335 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    336 ; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
    337 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
    338 ; CHECK-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
    339 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
    340 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
    341 ; CHECK-NEXT:    vaddps %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x58,0xc2]
    342 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    343   %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
    344   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
    345   %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
    346   %res3 = fadd <8 x float> %res, %res1
    347   %res4 = fadd <8 x float> %res3, %res2
    348   ret <8 x float> %res4
    349 }
    350 
    351 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
    352 
    353 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
    354 ; CHECK-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
    355 ; CHECK:       ## BB#0:
    356 ; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm2 ## encoding: [0x62,0xf3,0x7d,0x08,0x04,0xd0,0x16]
    357 ; CHECK-NEXT:    ## xmm2 = xmm0[2,1,1,0]
    358 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    359 ; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
    360 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[2,1,1,0]
    361 ; CHECK-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
    362 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[2,1,1,0]
    363 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
    364 ; CHECK-NEXT:    vaddps %xmm0, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc0]
    365 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    366   %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
    367   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
    368   %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
    369   %res3 = fadd <4 x float> %res, %res1
    370   %res4 = fadd <4 x float> %res2, %res3
    371   ret <4 x float> %res4
    372 }
    373 
    374 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
    375 
    376 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
    377 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_df_256:
    378 ; CHECK:       ## BB#0:
    379 ; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x01,0xd0,0x03]
    380 ; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0]
    381 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    382 ; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
    383 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0]
    384 ; CHECK-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
    385 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
    386 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
    387 ; CHECK-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x58,0xc2]
    388 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    389   %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
    390   %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
    391   %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
    392   %res3 = fadd <4 x double> %res, %res1
    393   %res4 = fadd <4 x double> %res3, %res2
    394   ret <4 x double> %res4
    395 }
    396 
    397 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
    398 
    399 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
    400 ; CHECK-LABEL: test_int_x86_avx512_mask_perm_di_256:
    401 ; CHECK:       ## BB#0:
    402 ; CHECK-NEXT:    vpermq $3, %ymm0, %ymm2 ## encoding: [0x62,0xf3,0xfd,0x28,0x00,0xd0,0x03]
    403 ; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0]
    404 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    405 ; CHECK-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
    406 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0]
    407 ; CHECK-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
    408 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0]
    409 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
    410 ; CHECK-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xd4,0xc2]
    411 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    412   %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
    413   %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
    414   %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
    415   %res3 = add <4 x i64> %res, %res1
    416   %res4 = add <4 x i64> %res3, %res2
    417   ret <4 x i64> %res4
    418 }
    419 
    420 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
    421 
    422 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
    423 ; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_128:
    424 ; CHECK:       ## BB#0:
    425 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    426 ; CHECK-NEXT:    vmovapd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
    427 ; CHECK-NEXT:    vmovapd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x29,0x06]
    428 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    429   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
    430   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
    431   ret void
    432 }
    433 
    434 declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
    435 
    436 define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
    437 ; CHECK-LABEL: test_int_x86_avx512_mask_store_pd_256:
    438 ; CHECK:       ## BB#0:
    439 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    440 ; CHECK-NEXT:    vmovapd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
    441 ; CHECK-NEXT:    vmovapd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x29,0x06]
    442 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    443   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
    444   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
    445   ret void
    446 }
    447 
    448 declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
    449 
    450 define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
    451 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
    452 ; CHECK:       ## BB#0:
    453 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    454 ; CHECK-NEXT:    vmovupd %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
    455 ; CHECK-NEXT:    vmovupd %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x11,0x06]
    456 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    457   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
    458   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
    459   ret void
    460 }
    461 
    462 declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
    463 
    464 define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
    465 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
    466 ; CHECK:       ## BB#0:
    467 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    468 ; CHECK-NEXT:    vmovupd %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
    469 ; CHECK-NEXT:    vmovupd %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x11,0x06]
    470 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    471   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
    472   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
    473   ret void
    474 }
    475 
    476 declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
    477 
    478 define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
    479 ; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_128:
    480 ; CHECK:       ## BB#0:
    481 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    482 ; CHECK-NEXT:    vmovaps %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
    483 ; CHECK-NEXT:    vmovaps %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x29,0x06]
    484 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    485     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
    486     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
    487     ret void
    488 }
    489 
    490 declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
    491 
    492 define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
    493 ; CHECK-LABEL: test_int_x86_avx512_mask_store_ps_256:
    494 ; CHECK:       ## BB#0:
    495 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    496 ; CHECK-NEXT:    vmovaps %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
    497 ; CHECK-NEXT:    vmovaps %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x29,0x06]
    498 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    499     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
    500     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
    501     ret void
    502 }
    503 
    504 declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
    505 
    506 define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
    507 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
    508 ; CHECK:       ## BB#0:
    509 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    510 ; CHECK-NEXT:    vmovups %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
    511 ; CHECK-NEXT:    vmovups %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x08,0x11,0x06]
    512 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    513     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
    514     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
    515     ret void
    516 }
    517 
    518 declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
    519 
    520 define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
    521 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
    522 ; CHECK:       ## BB#0:
    523 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    524 ; CHECK-NEXT:    vmovups %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
    525 ; CHECK-NEXT:    vmovups %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7c,0x28,0x11,0x06]
    526 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    527     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
    528     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
    529     ret void
    530 }
    531 
    532 declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
    533 
    534 define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
    535 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_128:
    536 ; CHECK:       ## BB#0:
    537 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    538 ; CHECK-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
    539 ; CHECK-NEXT:    vmovdqu64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x08,0x7f,0x06]
    540 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    541   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
    542   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
    543   ret void
    544 }
    545 
    546 declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
    547 
    548 define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
    549 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_q_256:
    550 ; CHECK:       ## BB#0:
    551 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    552 ; CHECK-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
    553 ; CHECK-NEXT:    vmovdqu64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfe,0x28,0x7f,0x06]
    554 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    555   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
    556   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
    557   ret void
    558 }
    559 
    560 declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
    561 
    562 define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
    563 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_128:
    564 ; CHECK:       ## BB#0:
    565 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    566 ; CHECK-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
    567 ; CHECK-NEXT:    vmovdqu32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x08,0x7f,0x06]
    568 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    569   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
    570   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
    571   ret void
    572 }
    573 
    574 declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
    575 
    576 define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
    577 ; CHECK-LABEL: test_int_x86_avx512_mask_storeu_d_256:
    578 ; CHECK:       ## BB#0:
    579 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    580 ; CHECK-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
    581 ; CHECK-NEXT:    vmovdqu32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7e,0x28,0x7f,0x06]
    582 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    583   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
    584   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
    585   ret void
    586 }
    587 
    588 declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
    589 
    590 define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
    591 ; CHECK-LABEL: test_int_x86_avx512_mask_store_q_128:
    592 ; CHECK:       ## BB#0:
    593 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    594 ; CHECK-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
    595 ; CHECK-NEXT:    vmovdqa64 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x08,0x7f,0x06]
    596 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    597   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
    598   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
    599   ret void
    600 }
    601 
    602 declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
    603 
    604 define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
    605 ; CHECK-LABEL: test_int_x86_avx512_mask_store_q_256:
    606 ; CHECK:       ## BB#0:
    607 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    608 ; CHECK-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
    609 ; CHECK-NEXT:    vmovdqa64 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0xfd,0x28,0x7f,0x06]
    610 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    611   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
    612   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
    613   ret void
    614 }
    615 
    616 declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
    617 
    618 define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
    619 ; CHECK-LABEL: test_int_x86_avx512_mask_store_d_128:
    620 ; CHECK:       ## BB#0:
    621 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    622 ; CHECK-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
    623 ; CHECK-NEXT:    vmovdqa32 %xmm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x08,0x7f,0x06]
    624 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    625   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
    626   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
    627   ret void
    628 }
    629 
    630 declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
    631 
    632 define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
    633 ; CHECK-LABEL: test_int_x86_avx512_mask_store_d_256:
    634 ; CHECK:       ## BB#0:
    635 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    636 ; CHECK-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
    637 ; CHECK-NEXT:    vmovdqa32 %ymm0, (%rsi) ## encoding: [0x62,0xf1,0x7d,0x28,0x7f,0x06]
    638 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    639   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
    640   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
    641   ret void
    642 }
    643 
    644 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
    645 ; CHECK-LABEL: test_mask_load_aligned_ps_256:
    646 ; CHECK:       ## BB#0:
    647 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x28,0x07]
    648 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    649 ; CHECK-NEXT:    vmovaps (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
    650 ; CHECK-NEXT:    vmovaps (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
    651 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
    652 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    653   %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
    654   %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
    655   %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
    656   %res4 = fadd <8 x float> %res2, %res1
    657   ret <8 x float> %res4
    658 }
    659 
    660 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
    661 
    662 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
    663 ; CHECK-LABEL: test_mask_load_unaligned_ps_256:
    664 ; CHECK:       ## BB#0:
    665 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7c,0x28,0x10,0x07]
    666 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    667 ; CHECK-NEXT:    vmovups (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
    668 ; CHECK-NEXT:    vmovups (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
    669 ; CHECK-NEXT:    vaddps %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x74,0x28,0x58,0xc0]
    670 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    671   %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
    672   %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
    673   %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
    674   %res4 = fadd <8 x float> %res2, %res1
    675   ret <8 x float> %res4
    676 }
    677 
    678 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
    679 
    680 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
    681 ; CHECK-LABEL: test_mask_load_aligned_pd_256:
    682 ; CHECK:       ## BB#0:
    683 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x28,0x07]
    684 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    685 ; CHECK-NEXT:    vmovapd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
    686 ; CHECK-NEXT:    vmovapd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
    687 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
    688 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    689   %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
    690   %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
    691   %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
    692   %res4 = fadd <4 x double> %res2, %res1
    693   ret <4 x double> %res4
    694 }
    695 
    696 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
    697 
    698 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
    699 ; CHECK-LABEL: test_mask_load_unaligned_pd_256:
    700 ; CHECK:       ## BB#0:
    701 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x10,0x07]
    702 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    703 ; CHECK-NEXT:    vmovupd (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
    704 ; CHECK-NEXT:    vmovupd (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
    705 ; CHECK-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0x58,0xc0]
    706 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    707   %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
    708   %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
    709   %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
    710   %res4 = fadd <4 x double> %res2, %res1
    711   ret <4 x double> %res4
    712 }
    713 
    714 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
    715 
    716 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
    717 ; CHECK-LABEL: test_mask_load_aligned_ps_128:
    718 ; CHECK:       ## BB#0:
    719 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x28,0x07]
    720 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    721 ; CHECK-NEXT:    vmovaps (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
    722 ; CHECK-NEXT:    vmovaps (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
    723 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
    724 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    725   %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
    726   %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
    727   %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
    728   %res4 = fadd <4 x float> %res2, %res1
    729   ret <4 x float> %res4
    730 }
    731 
    732 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
    733 
    734 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
    735 ; CHECK-LABEL: test_mask_load_unaligned_ps_128:
    736 ; CHECK:       ## BB#0:
    737 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7c,0x08,0x10,0x07]
    738 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    739 ; CHECK-NEXT:    vmovups (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
    740 ; CHECK-NEXT:    vmovups (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
    741 ; CHECK-NEXT:    vaddps %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x74,0x08,0x58,0xc0]
    742 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    743   %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
    744   %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
    745   %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
    746   %res4 = fadd <4 x float> %res2, %res1
    747   ret <4 x float> %res4
    748 }
    749 
    750 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
    751 
    752 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
    753 ; CHECK-LABEL: test_mask_load_aligned_pd_128:
    754 ; CHECK:       ## BB#0:
    755 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x28,0x07]
    756 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    757 ; CHECK-NEXT:    vmovapd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
    758 ; CHECK-NEXT:    vmovapd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
    759 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
    760 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    761   %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
    762   %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
    763   %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
    764   %res4 = fadd <2 x double> %res2, %res1
    765   ret <2 x double> %res4
    766 }
    767 
    768 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
    769 
    770 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
    771 ; CHECK-LABEL: test_mask_load_unaligned_pd_128:
    772 ; CHECK:       ## BB#0:
    773 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x10,0x07]
    774 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    775 ; CHECK-NEXT:    vmovupd (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
    776 ; CHECK-NEXT:    vmovupd (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
    777 ; CHECK-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0x58,0xc0]
    778 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    779   %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
    780   %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
    781   %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
    782   %res4 = fadd <2 x double> %res2, %res1
    783   ret <2 x double> %res4
    784 }
    785 
    786 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
    787 
    788 declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
    789 
    790 define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
    791 ; CHECK-LABEL: test_mask_load_unaligned_d_128:
    792 ; CHECK:       ## BB#0:
    793 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7e,0x08,0x6f,0x07]
    794 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    795 ; CHECK-NEXT:    vmovdqu32 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
    796 ; CHECK-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
    797 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
    798 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    799   %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
    800   %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
    801   %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
    802   %res4 = add <4 x i32> %res2, %res1
    803   ret <4 x i32> %res4
    804 }
    805 
    806 declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
    807 
    808 define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
    809 ; CHECK-LABEL: test_mask_load_unaligned_d_256:
    810 ; CHECK:       ## BB#0:
    811 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7e,0x28,0x6f,0x07]
    812 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    813 ; CHECK-NEXT:    vmovdqu32 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
    814 ; CHECK-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
    815 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
    816 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    817   %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
    818   %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
    819   %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
    820   %res4 = add <8 x i32> %res2, %res1
    821   ret <8 x i32> %res4
    822 }
    823 
    824 declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
    825 
    826 define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
    827 ; CHECK-LABEL: test_mask_load_unaligned_q_128:
    828 ; CHECK:       ## BB#0:
    829 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfe,0x08,0x6f,0x07]
    830 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    831 ; CHECK-NEXT:    vmovdqu64 (%rsi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
    832 ; CHECK-NEXT:    vmovdqu64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
    833 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
    834 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    835   %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
    836   %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
    837   %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
    838   %res4 = add <2 x i64> %res2, %res1
    839   ret <2 x i64> %res4
    840 }
    841 
    842 declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
    843 
    844 define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
    845 ; CHECK-LABEL: test_mask_load_unaligned_q_256:
    846 ; CHECK:       ## BB#0:
    847 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfe,0x28,0x6f,0x07]
    848 ; CHECK-NEXT:    kmovw %edx, %k1 ## encoding: [0xc5,0xf8,0x92,0xca]
    849 ; CHECK-NEXT:    vmovdqu64 (%rsi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
    850 ; CHECK-NEXT:    vmovdqu64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
    851 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
    852 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    853   %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
    854   %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
    855   %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
    856   %res4 = add <4 x i64> %res2, %res1
    857   ret <4 x i64> %res4
    858 }
    859 
    860 declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
    861 
    862 define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
    863 ; CHECK-LABEL: test_mask_load_aligned_d_128:
    864 ; CHECK:       ## BB#0:
    865 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0x6f,0x07]
    866 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    867 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
    868 ; CHECK-NEXT:    vmovdqa32 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
    869 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
    870 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    871   %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
    872   %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
    873   %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
    874   %res4 = add <4 x i32> %res2, %res1
    875   ret <4 x i32> %res4
    876 }
    877 
    878 declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
    879 
    880 define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
    881 ; CHECK-LABEL: test_mask_load_aligned_d_256:
    882 ; CHECK:       ## BB#0:
    883 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0x6f,0x07]
    884 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    885 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
    886 ; CHECK-NEXT:    vmovdqa32 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
    887 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
    888 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    889   %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
    890   %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
    891   %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
    892   %res4 = add <8 x i32> %res2, %res1
    893   ret <8 x i32> %res4
    894 }
    895 
    896 declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
    897 
    898 define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
    899 ; CHECK-LABEL: test_mask_load_aligned_q_128:
    900 ; CHECK:       ## BB#0:
    901 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0x6f,0x07]
    902 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    903 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
    904 ; CHECK-NEXT:    vmovdqa64 (%rdi), %xmm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
    905 ; CHECK-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0xf5,0x08,0xd4,0xc0]
    906 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    907   %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
    908   %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
    909   %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
    910   %res4 = add <2 x i64> %res2, %res1
    911   ret <2 x i64> %res4
    912 }
    913 
    914 declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
    915 
    916 define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
    917 ; CHECK-LABEL: test_mask_load_aligned_q_256:
    918 ; CHECK:       ## BB#0:
    919 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0x6f,0x07]
    920 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    921 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
    922 ; CHECK-NEXT:    vmovdqa64 (%rdi), %ymm1 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
    923 ; CHECK-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0xf5,0x28,0xd4,0xc0]
    924 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    925   %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
    926   %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
    927   %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
    928   %res4 = add <4 x i64> %res2, %res1
    929   ret <4 x i64> %res4
    930 }
    931 
    932 declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
    933 
    934 define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
    935 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
    936 ; CHECK:       ## BB#0:
    937 ; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm2 ## encoding: [0x62,0xf1,0x7d,0x08,0x70,0xd0,0x03]
    938 ; CHECK-NEXT:    ## xmm2 = xmm0[3,0,0,0]
    939 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    940 ; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
    941 ; CHECK-NEXT:    ## xmm1 {%k1} = xmm0[3,0,0,0]
    942 ; CHECK-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
    943 ; CHECK-NEXT:    ## xmm0 {%k1} {z} = xmm0[3,0,0,0]
    944 ; CHECK-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 ## encoding: [0x62,0xf1,0x75,0x08,0xfe,0xc0]
    945 ; CHECK-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xfe,0xc2]
    946 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    947 	%res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
    948 	%res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
    949 	%res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
    950 	%res3 = add <4 x i32> %res, %res1
    951 	%res4 = add <4 x i32> %res3, %res2
    952 	ret <4 x i32> %res4
    953 }
    954 
    955 declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
    956 
    957 define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
    958 ; CHECK-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
    959 ; CHECK:       ## BB#0:
    960 ; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm2 ## encoding: [0x62,0xf1,0x7d,0x28,0x70,0xd0,0x03]
    961 ; CHECK-NEXT:    ## ymm2 = ymm0[3,0,0,0,7,4,4,4]
    962 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
    963 ; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
    964 ; CHECK-NEXT:    ## ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
    965 ; CHECK-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
    966 ; CHECK-NEXT:    ## ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
    967 ; CHECK-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 ## encoding: [0x62,0xf1,0x75,0x28,0xfe,0xc0]
    968 ; CHECK-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xfe,0xc2]
    969 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    970 	%res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
    971 	%res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
    972 	%res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
    973 	%res3 = add <8 x i32> %res, %res1
    974 	%res4 = add <8 x i32> %res3, %res2
    975 	ret <8 x i32> %res4
    976 }
    977 
    978 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
    979 ; CHECK-LABEL: test_pcmpeq_d_256:
    980 ; CHECK:       ## BB#0:
    981 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
    982 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    983 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    984 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    985   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
    986   ret i8 %res
    987 }
    988 
    989 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
    990 ; CHECK-LABEL: test_mask_pcmpeq_d_256:
    991 ; CHECK:       ## BB#0:
    992 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
    993 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
    994 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
    995 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
    996 ; CHECK-NEXT:    retq ## encoding: [0xc3]
    997   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
    998   ret i8 %res
    999 }
   1000 
   1001 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
   1002 
   1003 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
   1004 ; CHECK-LABEL: test_pcmpeq_q_256:
   1005 ; CHECK:       ## BB#0:
   1006 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
   1007 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1008 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1009 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1010 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1011 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1012   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
   1013   ret i8 %res
   1014 }
   1015 
   1016 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   1017 ; CHECK-LABEL: test_mask_pcmpeq_q_256:
   1018 ; CHECK:       ## BB#0:
   1019 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1020 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
   1021 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1022 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1023 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1024 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1025 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1026   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
   1027   ret i8 %res
   1028 }
   1029 
   1030 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
   1031 
   1032 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
   1033 ; CHECK-LABEL: test_pcmpgt_d_256:
   1034 ; CHECK:       ## BB#0:
   1035 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
   1036 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1037 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1038 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1039   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
   1040   ret i8 %res
   1041 }
   1042 
   1043 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   1044 ; CHECK-LABEL: test_mask_pcmpgt_d_256:
   1045 ; CHECK:       ## BB#0:
   1046 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1047 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc1]
   1048 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1049 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1050 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1051   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
   1052   ret i8 %res
   1053 }
   1054 
   1055 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
   1056 
   1057 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
   1058 ; CHECK-LABEL: test_pcmpgt_q_256:
   1059 ; CHECK:       ## BB#0:
   1060 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
   1061 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1062 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1063 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1064 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1065 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1066   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
   1067   ret i8 %res
   1068 }
   1069 
   1070 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   1071 ; CHECK-LABEL: test_mask_pcmpgt_q_256:
   1072 ; CHECK:       ## BB#0:
   1073 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1074 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
   1075 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1076 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1077 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1078 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1079 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1080   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
   1081   ret i8 %res
   1082 }
   1083 
   1084 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
   1085 
   1086 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
   1087 ; CHECK-LABEL: test_pcmpeq_d_128:
   1088 ; CHECK:       ## BB#0:
   1089 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
   1090 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1091 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1092 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1093 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1094 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1095   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
   1096   ret i8 %res
   1097 }
   1098 
   1099 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1100 ; CHECK-LABEL: test_mask_pcmpeq_d_128:
   1101 ; CHECK:       ## BB#0:
   1102 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1103 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
   1104 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1105 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1106 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1107 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1108 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1109   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
   1110   ret i8 %res
   1111 }
   1112 
   1113 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
   1114 
   1115 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
   1116 ; CHECK-LABEL: test_pcmpeq_q_128:
   1117 ; CHECK:       ## BB#0:
   1118 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
   1119 ; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
   1120 ; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
   1121 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1122 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1123 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1124 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1125 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1126   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
   1127   ret i8 %res
   1128 }
   1129 
   1130 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   1131 ; CHECK-LABEL: test_mask_pcmpeq_q_128:
   1132 ; CHECK:       ## BB#0:
   1133 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1134 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
   1135 ; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
   1136 ; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
   1137 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1138 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1139 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1140 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1141 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1142   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
   1143   ret i8 %res
   1144 }
   1145 
   1146 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
   1147 
   1148 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
   1149 ; CHECK-LABEL: test_pcmpgt_d_128:
   1150 ; CHECK:       ## BB#0:
   1151 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
   1152 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1153 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1154 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1155 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1156 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1157   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
   1158   ret i8 %res
   1159 }
   1160 
   1161 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1162 ; CHECK-LABEL: test_mask_pcmpgt_d_128:
   1163 ; CHECK:       ## BB#0:
   1164 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1165 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
   1166 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1167 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1168 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1169 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1170 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1171   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
   1172   ret i8 %res
   1173 }
   1174 
   1175 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
   1176 
   1177 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
   1178 ; CHECK-LABEL: test_pcmpgt_q_128:
   1179 ; CHECK:       ## BB#0:
   1180 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 ## encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
   1181 ; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
   1182 ; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
   1183 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1184 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1185 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1186 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1187 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1188   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
   1189   ret i8 %res
   1190 }
   1191 
   1192 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   1193 ; CHECK-LABEL: test_mask_pcmpgt_q_128:
   1194 ; CHECK:       ## BB#0:
   1195 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1196 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} ## encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
   1197 ; CHECK-NEXT:    kshiftlw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0e]
   1198 ; CHECK-NEXT:    kshiftrw $14, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0e]
   1199 ; CHECK-NEXT:    kshiftlw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x32,0xc0,0x0c]
   1200 ; CHECK-NEXT:    kshiftrw $12, %k0, %k0 ## encoding: [0xc4,0xe3,0xf9,0x30,0xc0,0x0c]
   1201 ; CHECK-NEXT:    kmovw %k0, %eax ## encoding: [0xc5,0xf8,0x93,0xc0]
   1202 ; CHECK-NEXT:    ## kill: %AL<def> %AL<kill> %EAX<kill>
   1203 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1204   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
   1205   ret i8 %res
   1206 }
   1207 
   1208 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
   1209 
   1210 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1211 
   1212 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1213 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
   1214 ; CHECK:       ## BB#0:
   1215 ; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x15,0xd9]
   1216 ; CHECK-NEXT:    ## xmm3 = xmm0[1],xmm1[1]
   1217 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1218 ; CHECK-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
   1219 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[1],xmm1[1]
   1220 ; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
   1221 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1222   %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1223   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1224   %res2 = fadd <2 x double> %res, %res1
   1225   ret <2 x double> %res2
   1226 }
   1227 
   1228 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1229 
   1230 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1231 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
   1232 ; CHECK:       ## BB#0:
   1233 ; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x15,0xd9]
   1234 ; CHECK-NEXT:    ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   1235 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1236 ; CHECK-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
   1237 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   1238 ; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
   1239 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1240   %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1241   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1242   %res2 = fadd <4 x double> %res, %res1
   1243   ret <4 x double> %res2
   1244 }
   1245 
   1246 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1247 
   1248 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1249 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
   1250 ; CHECK:       ## BB#0:
   1251 ; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x15,0xd9]
   1252 ; CHECK-NEXT:    ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1253 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1254 ; CHECK-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
   1255 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1256 ; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
   1257 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1258   %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1259   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1260   %res2 = fadd <4 x float> %res, %res1
   1261   ret <4 x float> %res2
   1262 }
   1263 
   1264 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1265 
   1266 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1267 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
   1268 ; CHECK:       ## BB#0:
   1269 ; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x15,0xd9]
   1270 ; CHECK-NEXT:    ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1271 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1272 ; CHECK-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
   1273 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1274 ; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
   1275 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1276   %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1277   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1278   %res2 = fadd <8 x float> %res, %res1
   1279   ret <8 x float> %res2
   1280 }
   1281 
   1282 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   1283 
   1284 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   1285 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
   1286 ; CHECK:       ## BB#0:
   1287 ; CHECK-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x14,0xd9]
   1288 ; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0]
   1289 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1290 ; CHECK-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
   1291 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0]
   1292 ; CHECK-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0x58,0xc3]
   1293 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1294   %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   1295   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   1296   %res2 = fadd <2 x double> %res, %res1
   1297   ret <2 x double> %res2
   1298 }
   1299 
   1300 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   1301 
   1302 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   1303 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
   1304 ; CHECK:       ## BB#0:
   1305 ; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x14,0xd9]
   1306 ; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1307 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1308 ; CHECK-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
   1309 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1310 ; CHECK-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0x58,0xc3]
   1311 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1312   %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   1313   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   1314   %res2 = fadd <4 x double> %res, %res1
   1315   ret <4 x double> %res2
   1316 }
   1317 
   1318 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   1319 
   1320 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   1321 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
   1322 ; CHECK:       ## BB#0:
   1323 ; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7c,0x08,0x14,0xd9]
   1324 ; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1325 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1326 ; CHECK-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
   1327 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1328 ; CHECK-NEXT:    vaddps %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6c,0x08,0x58,0xc3]
   1329 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1330   %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   1331   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   1332   %res2 = fadd <4 x float> %res, %res1
   1333   ret <4 x float> %res2
   1334 }
   1335 
   1336 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   1337 
   1338 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   1339 ; CHECK-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
   1340 ; CHECK:       ## BB#0:
   1341 ; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7c,0x28,0x14,0xd9]
   1342 ; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1343 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1344 ; CHECK-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
   1345 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1346 ; CHECK-NEXT:    vaddps %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6c,0x28,0x58,0xc3]
   1347 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1348   %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   1349   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   1350   %res2 = fadd <8 x float> %res, %res1
   1351   ret <8 x float> %res2
   1352 }
   1353 
   1354 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   1355 
   1356 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   1357 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
   1358 ; CHECK:       ## BB#0:
   1359 ; CHECK-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x6a,0xd9]
   1360 ; CHECK-NEXT:    ## xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1361 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1362 ; CHECK-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
   1363 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1364 ; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
   1365 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1366   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   1367   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   1368   %res2 = add <4 x i32> %res, %res1
   1369   ret <4 x i32> %res2
   1370 }
   1371 
   1372 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   1373 
   1374 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   1375 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
   1376 ; CHECK:       ## BB#0:
   1377 ; CHECK-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0x7d,0x08,0x62,0xd9]
   1378 ; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1379 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1380 ; CHECK-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
   1381 ; CHECK-NEXT:    ## xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1382 ; CHECK-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0x6d,0x08,0xfe,0xc3]
   1383 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1384   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   1385   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   1386   %res2 = add <4 x i32> %res, %res1
   1387   ret <4 x i32> %res2
   1388 }
   1389 
   1390 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   1391 
   1392 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   1393 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
   1394 ; CHECK:       ## BB#0:
   1395 ; CHECK-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x6a,0xd9]
   1396 ; CHECK-NEXT:    ## ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1397 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1398 ; CHECK-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
   1399 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1400 ; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
   1401 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1402   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   1403   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   1404   %res2 = add <8 x i32> %res, %res1
   1405   ret <8 x i32> %res2
   1406 }
   1407 
   1408 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   1409 
   1410 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   1411 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
   1412 ; CHECK:       ## BB#0:
   1413 ; CHECK-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0x7d,0x28,0x62,0xd9]
   1414 ; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1415 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1416 ; CHECK-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
   1417 ; CHECK-NEXT:    ## ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1418 ; CHECK-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0x6d,0x28,0xfe,0xc3]
   1419 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1420   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   1421   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   1422   %res2 = add <8 x i32> %res, %res1
   1423   ret <8 x i32> %res2
   1424 }
   1425 
   1426 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   1427 
   1428 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   1429 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
   1430 ; CHECK:       ## BB#0:
   1431 ; CHECK-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6d,0xd9]
   1432 ; CHECK-NEXT:    ## xmm3 = xmm0[1],xmm1[1]
   1433 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1434 ; CHECK-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
   1435 ; CHECK-NEXT:    ## xmm2 = xmm0[1],xmm1[1]
   1436 ; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3]
   1437 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1438   %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   1439   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   1440   %res2 = add <2 x i64> %res, %res1
   1441   ret <2 x i64> %res2
   1442 }
   1443 
   1444 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   1445 
   1446 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   1447 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
   1448 ; CHECK:       ## BB#0:
   1449 ; CHECK-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 ## encoding: [0x62,0xf1,0xfd,0x08,0x6c,0xd9]
   1450 ; CHECK-NEXT:    ## xmm3 = xmm0[0],xmm1[0]
   1451 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1452 ; CHECK-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
   1453 ; CHECK-NEXT:    ## xmm2 = xmm0[0],xmm1[0]
   1454 ; CHECK-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 ## encoding: [0x62,0xf1,0xed,0x08,0xd4,0xc3]
   1455 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1456   %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   1457   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   1458   %res2 = add <2 x i64> %res, %res1
   1459   ret <2 x i64> %res2
   1460 }
   1461 
   1462 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   1463 
   1464 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   1465 ; CHECK-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
   1466 ; CHECK:       ## BB#0:
   1467 ; CHECK-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6c,0xd9]
   1468 ; CHECK-NEXT:    ## ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1469 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1470 ; CHECK-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
   1471 ; CHECK-NEXT:    ## ymm2 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1472 ; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3]
   1473 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1474   %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   1475   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   1476   %res2 = add <4 x i64> %res, %res1
   1477   ret <4 x i64> %res2
   1478 }
   1479 
   1480 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   1481 
   1482 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   1483 ; CHECK-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
   1484 ; CHECK:       ## BB#0:
   1485 ; CHECK-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 ## encoding: [0x62,0xf1,0xfd,0x28,0x6d,0xd9]
   1486 ; CHECK-NEXT:    ## ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   1487 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1488 ; CHECK-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
   1489 ; CHECK-NEXT:    ## ymm2 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   1490 ; CHECK-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 ## encoding: [0x62,0xf1,0xed,0x28,0xd4,0xc3]
   1491 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1492   %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   1493   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   1494   %res2 = add <4 x i64> %res, %res1
   1495   ret <4 x i64> %res2
   1496 }
   1497 
   1498 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   1499 ; CHECK-LABEL: test_mask_and_epi32_rr_128:
   1500 ; CHECK:       ## BB#0:
   1501 ; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0xc1]
   1502 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1503   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1504   ret <4 x i32> %res
   1505 }
   1506 
   1507 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   1508 ; CHECK-LABEL: test_mask_and_epi32_rrk_128:
   1509 ; CHECK:       ## BB#0:
   1510 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1511 ; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
   1512 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1513 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1514   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1515   ret <4 x i32> %res
   1516 }
   1517 
   1518 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1519 ; CHECK-LABEL: test_mask_and_epi32_rrkz_128:
   1520 ; CHECK:       ## BB#0:
   1521 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1522 ; CHECK-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
   1523 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1524   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1525   ret <4 x i32> %res
   1526 }
   1527 
   1528 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   1529 ; CHECK-LABEL: test_mask_and_epi32_rm_128:
   1530 ; CHECK:       ## BB#0:
   1531 ; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdb,0x07]
   1532 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1533   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1534   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1535   ret <4 x i32> %res
   1536 }
   1537 
   1538 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1539 ; CHECK-LABEL: test_mask_and_epi32_rmk_128:
   1540 ; CHECK:       ## BB#0:
   1541 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1542 ; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
   1543 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1544 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1545   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1546   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1547   ret <4 x i32> %res
   1548 }
   1549 
   1550 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   1551 ; CHECK-LABEL: test_mask_and_epi32_rmkz_128:
   1552 ; CHECK:       ## BB#0:
   1553 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1554 ; CHECK-NEXT:    vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
   1555 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1556   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1557   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1558   ret <4 x i32> %res
   1559 }
   1560 
   1561 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   1562 ; CHECK-LABEL: test_mask_and_epi32_rmb_128:
   1563 ; CHECK:       ## BB#0:
   1564 ; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
   1565 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1566   %q = load i32, i32* %ptr_b
   1567   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1568   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1569   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1570   ret <4 x i32> %res
   1571 }
   1572 
   1573 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1574 ; CHECK-LABEL: test_mask_and_epi32_rmbk_128:
   1575 ; CHECK:       ## BB#0:
   1576 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1577 ; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
   1578 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1579 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1580   %q = load i32, i32* %ptr_b
   1581   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1582   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1583   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1584   ret <4 x i32> %res
   1585 }
   1586 
   1587 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   1588 ; CHECK-LABEL: test_mask_and_epi32_rmbkz_128:
   1589 ; CHECK:       ## BB#0:
   1590 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1591 ; CHECK-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
   1592 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1593   %q = load i32, i32* %ptr_b
   1594   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1595   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1596   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1597   ret <4 x i32> %res
   1598 }
   1599 
   1600 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   1601 
   1602 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   1603 ; CHECK-LABEL: test_mask_and_epi32_rr_256:
   1604 ; CHECK:       ## BB#0:
   1605 ; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0xc1]
   1606 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1607   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1608   ret <8 x i32> %res
   1609 }
   1610 
   1611 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   1612 ; CHECK-LABEL: test_mask_and_epi32_rrk_256:
   1613 ; CHECK:       ## BB#0:
   1614 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1615 ; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
   1616 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1617 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1618   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1619   ret <8 x i32> %res
   1620 }
   1621 
   1622 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   1623 ; CHECK-LABEL: test_mask_and_epi32_rrkz_256:
   1624 ; CHECK:       ## BB#0:
   1625 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1626 ; CHECK-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
   1627 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1628   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1629   ret <8 x i32> %res
   1630 }
   1631 
   1632 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   1633 ; CHECK-LABEL: test_mask_and_epi32_rm_256:
   1634 ; CHECK:       ## BB#0:
   1635 ; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdb,0x07]
   1636 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1637   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1638   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1639   ret <8 x i32> %res
   1640 }
   1641 
   1642 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   1643 ; CHECK-LABEL: test_mask_and_epi32_rmk_256:
   1644 ; CHECK:       ## BB#0:
   1645 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1646 ; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
   1647 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1648 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1649   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1650   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1651   ret <8 x i32> %res
   1652 }
   1653 
   1654 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   1655 ; CHECK-LABEL: test_mask_and_epi32_rmkz_256:
   1656 ; CHECK:       ## BB#0:
   1657 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1658 ; CHECK-NEXT:    vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
   1659 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1660   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1661   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1662   ret <8 x i32> %res
   1663 }
   1664 
   1665 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   1666 ; CHECK-LABEL: test_mask_and_epi32_rmb_256:
   1667 ; CHECK:       ## BB#0:
   1668 ; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
   1669 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1670   %q = load i32, i32* %ptr_b
   1671   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1672   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1673   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1674   ret <8 x i32> %res
   1675 }
   1676 
   1677 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   1678 ; CHECK-LABEL: test_mask_and_epi32_rmbk_256:
   1679 ; CHECK:       ## BB#0:
   1680 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1681 ; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
   1682 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1683 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1684   %q = load i32, i32* %ptr_b
   1685   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1686   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1687   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1688   ret <8 x i32> %res
   1689 }
   1690 
   1691 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   1692 ; CHECK-LABEL: test_mask_and_epi32_rmbkz_256:
   1693 ; CHECK:       ## BB#0:
   1694 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1695 ; CHECK-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
   1696 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1697   %q = load i32, i32* %ptr_b
   1698   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1699   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1700   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1701   ret <8 x i32> %res
   1702 }
   1703 
   1704 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   1705 
   1706 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   1707 ; CHECK-LABEL: test_mask_or_epi32_rr_128:
   1708 ; CHECK:       ## BB#0:
   1709 ; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0xc1]
   1710 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1711   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1712   ret <4 x i32> %res
   1713 }
   1714 
   1715 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   1716 ; CHECK-LABEL: test_mask_or_epi32_rrk_128:
   1717 ; CHECK:       ## BB#0:
   1718 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1719 ; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
   1720 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1721 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1722   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1723   ret <4 x i32> %res
   1724 }
   1725 
   1726 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1727 ; CHECK-LABEL: test_mask_or_epi32_rrkz_128:
   1728 ; CHECK:       ## BB#0:
   1729 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1730 ; CHECK-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
   1731 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1732   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1733   ret <4 x i32> %res
   1734 }
   1735 
   1736 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   1737 ; CHECK-LABEL: test_mask_or_epi32_rm_128:
   1738 ; CHECK:       ## BB#0:
   1739 ; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xeb,0x07]
   1740 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1741   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1742   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1743   ret <4 x i32> %res
   1744 }
   1745 
   1746 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1747 ; CHECK-LABEL: test_mask_or_epi32_rmk_128:
   1748 ; CHECK:       ## BB#0:
   1749 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1750 ; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
   1751 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1752 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1753   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1754   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1755   ret <4 x i32> %res
   1756 }
   1757 
   1758 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   1759 ; CHECK-LABEL: test_mask_or_epi32_rmkz_128:
   1760 ; CHECK:       ## BB#0:
   1761 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1762 ; CHECK-NEXT:    vpord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
   1763 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1764   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1765   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1766   ret <4 x i32> %res
   1767 }
   1768 
   1769 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   1770 ; CHECK-LABEL: test_mask_or_epi32_rmb_128:
   1771 ; CHECK:       ## BB#0:
   1772 ; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
   1773 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1774   %q = load i32, i32* %ptr_b
   1775   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1776   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1777   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1778   ret <4 x i32> %res
   1779 }
   1780 
   1781 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1782 ; CHECK-LABEL: test_mask_or_epi32_rmbk_128:
   1783 ; CHECK:       ## BB#0:
   1784 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1785 ; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
   1786 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1787 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1788   %q = load i32, i32* %ptr_b
   1789   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1790   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1791   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1792   ret <4 x i32> %res
   1793 }
   1794 
   1795 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   1796 ; CHECK-LABEL: test_mask_or_epi32_rmbkz_128:
   1797 ; CHECK:       ## BB#0:
   1798 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1799 ; CHECK-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
   1800 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1801   %q = load i32, i32* %ptr_b
   1802   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1803   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1804   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1805   ret <4 x i32> %res
   1806 }
   1807 
   1808 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   1809 
   1810 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   1811 ; CHECK-LABEL: test_mask_or_epi32_rr_256:
   1812 ; CHECK:       ## BB#0:
   1813 ; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0xc1]
   1814 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1815   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1816   ret <8 x i32> %res
   1817 }
   1818 
   1819 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   1820 ; CHECK-LABEL: test_mask_or_epi32_rrk_256:
   1821 ; CHECK:       ## BB#0:
   1822 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1823 ; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
   1824 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1825 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1826   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1827   ret <8 x i32> %res
   1828 }
   1829 
   1830 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   1831 ; CHECK-LABEL: test_mask_or_epi32_rrkz_256:
   1832 ; CHECK:       ## BB#0:
   1833 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1834 ; CHECK-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
   1835 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1836   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1837   ret <8 x i32> %res
   1838 }
   1839 
   1840 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   1841 ; CHECK-LABEL: test_mask_or_epi32_rm_256:
   1842 ; CHECK:       ## BB#0:
   1843 ; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xeb,0x07]
   1844 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1845   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1846   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1847   ret <8 x i32> %res
   1848 }
   1849 
   1850 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   1851 ; CHECK-LABEL: test_mask_or_epi32_rmk_256:
   1852 ; CHECK:       ## BB#0:
   1853 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1854 ; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
   1855 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1856 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1857   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1858   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1859   ret <8 x i32> %res
   1860 }
   1861 
   1862 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   1863 ; CHECK-LABEL: test_mask_or_epi32_rmkz_256:
   1864 ; CHECK:       ## BB#0:
   1865 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1866 ; CHECK-NEXT:    vpord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
   1867 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1868   %b = load <8 x i32>, <8 x i32>* %ptr_b
   1869   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1870   ret <8 x i32> %res
   1871 }
   1872 
   1873 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   1874 ; CHECK-LABEL: test_mask_or_epi32_rmb_256:
   1875 ; CHECK:       ## BB#0:
   1876 ; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
   1877 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1878   %q = load i32, i32* %ptr_b
   1879   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1880   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1881   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   1882   ret <8 x i32> %res
   1883 }
   1884 
   1885 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   1886 ; CHECK-LABEL: test_mask_or_epi32_rmbk_256:
   1887 ; CHECK:       ## BB#0:
   1888 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1889 ; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
   1890 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1891 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1892   %q = load i32, i32* %ptr_b
   1893   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1894   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1895   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   1896   ret <8 x i32> %res
   1897 }
   1898 
   1899 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   1900 ; CHECK-LABEL: test_mask_or_epi32_rmbkz_256:
   1901 ; CHECK:       ## BB#0:
   1902 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1903 ; CHECK-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
   1904 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1905   %q = load i32, i32* %ptr_b
   1906   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   1907   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   1908   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   1909   ret <8 x i32> %res
   1910 }
   1911 
   1912 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   1913 
   1914 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   1915 ; CHECK-LABEL: test_mask_xor_epi32_rr_128:
   1916 ; CHECK:       ## BB#0:
   1917 ; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0xc1]
   1918 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1919   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1920   ret <4 x i32> %res
   1921 }
   1922 
   1923 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   1924 ; CHECK-LABEL: test_mask_xor_epi32_rrk_128:
   1925 ; CHECK:       ## BB#0:
   1926 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1927 ; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
   1928 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   1929 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1930   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1931   ret <4 x i32> %res
   1932 }
   1933 
   1934 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1935 ; CHECK-LABEL: test_mask_xor_epi32_rrkz_128:
   1936 ; CHECK:       ## BB#0:
   1937 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   1938 ; CHECK-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
   1939 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1940   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1941   ret <4 x i32> %res
   1942 }
   1943 
   1944 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   1945 ; CHECK-LABEL: test_mask_xor_epi32_rm_128:
   1946 ; CHECK:       ## BB#0:
   1947 ; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xef,0x07]
   1948 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1949   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1950   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1951   ret <4 x i32> %res
   1952 }
   1953 
   1954 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1955 ; CHECK-LABEL: test_mask_xor_epi32_rmk_128:
   1956 ; CHECK:       ## BB#0:
   1957 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1958 ; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
   1959 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1960 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1961   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1962   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   1963   ret <4 x i32> %res
   1964 }
   1965 
   1966 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   1967 ; CHECK-LABEL: test_mask_xor_epi32_rmkz_128:
   1968 ; CHECK:       ## BB#0:
   1969 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1970 ; CHECK-NEXT:    vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
   1971 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1972   %b = load <4 x i32>, <4 x i32>* %ptr_b
   1973   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   1974   ret <4 x i32> %res
   1975 }
   1976 
   1977 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   1978 ; CHECK-LABEL: test_mask_xor_epi32_rmb_128:
   1979 ; CHECK:       ## BB#0:
   1980 ; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
   1981 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1982   %q = load i32, i32* %ptr_b
   1983   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1984   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1985   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   1986   ret <4 x i32> %res
   1987 }
   1988 
   1989 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   1990 ; CHECK-LABEL: test_mask_xor_epi32_rmbk_128:
   1991 ; CHECK:       ## BB#0:
   1992 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   1993 ; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
   1994 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   1995 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   1996   %q = load i32, i32* %ptr_b
   1997   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   1998   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   1999   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2000   ret <4 x i32> %res
   2001 }
   2002 
   2003 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   2004 ; CHECK-LABEL: test_mask_xor_epi32_rmbkz_128:
   2005 ; CHECK:       ## BB#0:
   2006 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2007 ; CHECK-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
   2008 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2009   %q = load i32, i32* %ptr_b
   2010   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2011   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2012   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2013   ret <4 x i32> %res
   2014 }
   2015 
   2016 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2017 
   2018 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   2019 ; CHECK-LABEL: test_mask_xor_epi32_rr_256:
   2020 ; CHECK:       ## BB#0:
   2021 ; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0xc1]
   2022 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2023   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2024   ret <8 x i32> %res
   2025 }
   2026 
   2027 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   2028 ; CHECK-LABEL: test_mask_xor_epi32_rrk_256:
   2029 ; CHECK:       ## BB#0:
   2030 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2031 ; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
   2032 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2033 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2034   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2035   ret <8 x i32> %res
   2036 }
   2037 
   2038 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   2039 ; CHECK-LABEL: test_mask_xor_epi32_rrkz_256:
   2040 ; CHECK:       ## BB#0:
   2041 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2042 ; CHECK-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
   2043 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2044   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2045   ret <8 x i32> %res
   2046 }
   2047 
   2048 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   2049 ; CHECK-LABEL: test_mask_xor_epi32_rm_256:
   2050 ; CHECK:       ## BB#0:
   2051 ; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xef,0x07]
   2052 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2053   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2054   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2055   ret <8 x i32> %res
   2056 }
   2057 
   2058 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2059 ; CHECK-LABEL: test_mask_xor_epi32_rmk_256:
   2060 ; CHECK:       ## BB#0:
   2061 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2062 ; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
   2063 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2064 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2065   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2066   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2067   ret <8 x i32> %res
   2068 }
   2069 
   2070 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   2071 ; CHECK-LABEL: test_mask_xor_epi32_rmkz_256:
   2072 ; CHECK:       ## BB#0:
   2073 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2074 ; CHECK-NEXT:    vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
   2075 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2076   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2077   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2078   ret <8 x i32> %res
   2079 }
   2080 
   2081 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   2082 ; CHECK-LABEL: test_mask_xor_epi32_rmb_256:
   2083 ; CHECK:       ## BB#0:
   2084 ; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
   2085 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2086   %q = load i32, i32* %ptr_b
   2087   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2088   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2089   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2090   ret <8 x i32> %res
   2091 }
   2092 
   2093 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2094 ; CHECK-LABEL: test_mask_xor_epi32_rmbk_256:
   2095 ; CHECK:       ## BB#0:
   2096 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2097 ; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
   2098 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2099 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2100   %q = load i32, i32* %ptr_b
   2101   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2102   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2103   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2104   ret <8 x i32> %res
   2105 }
   2106 
   2107 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   2108 ; CHECK-LABEL: test_mask_xor_epi32_rmbkz_256:
   2109 ; CHECK:       ## BB#0:
   2110 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2111 ; CHECK-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
   2112 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2113   %q = load i32, i32* %ptr_b
   2114   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2115   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2116   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2117   ret <8 x i32> %res
   2118 }
   2119 
   2120 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   2121 
   2122 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   2123 ; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
   2124 ; CHECK:       ## BB#0:
   2125 ; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0xc1]
   2126 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2127   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2128   ret <4 x i32> %res
   2129 }
   2130 
   2131 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   2132 ; CHECK-LABEL: test_mask_andnot_epi32_rrk_128:
   2133 ; CHECK:       ## BB#0:
   2134 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2135 ; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
   2136 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2137 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2138   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2139   ret <4 x i32> %res
   2140 }
   2141 
   2142 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   2143 ; CHECK-LABEL: test_mask_andnot_epi32_rrkz_128:
   2144 ; CHECK:       ## BB#0:
   2145 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2146 ; CHECK-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
   2147 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2148   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2149   ret <4 x i32> %res
   2150 }
   2151 
   2152 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   2153 ; CHECK-LABEL: test_mask_andnot_epi32_rm_128:
   2154 ; CHECK:       ## BB#0:
   2155 ; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x08,0xdf,0x07]
   2156 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2157   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2158   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2159   ret <4 x i32> %res
   2160 }
   2161 
   2162 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2163 ; CHECK-LABEL: test_mask_andnot_epi32_rmk_128:
   2164 ; CHECK:       ## BB#0:
   2165 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2166 ; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
   2167 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2168 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2169   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2170   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2171   ret <4 x i32> %res
   2172 }
   2173 
   2174 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   2175 ; CHECK-LABEL: test_mask_andnot_epi32_rmkz_128:
   2176 ; CHECK:       ## BB#0:
   2177 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2178 ; CHECK-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
   2179 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2180   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2181   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2182   ret <4 x i32> %res
   2183 }
   2184 
   2185 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   2186 ; CHECK-LABEL: test_mask_andnot_epi32_rmb_128:
   2187 ; CHECK:       ## BB#0:
   2188 ; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
   2189 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2190   %q = load i32, i32* %ptr_b
   2191   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2192   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2193   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2194   ret <4 x i32> %res
   2195 }
   2196 
   2197 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2198 ; CHECK-LABEL: test_mask_andnot_epi32_rmbk_128:
   2199 ; CHECK:       ## BB#0:
   2200 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2201 ; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
   2202 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2203 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2204   %q = load i32, i32* %ptr_b
   2205   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2206   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2207   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2208   ret <4 x i32> %res
   2209 }
   2210 
   2211 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   2212 ; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_128:
   2213 ; CHECK:       ## BB#0:
   2214 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2215 ; CHECK-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
   2216 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2217   %q = load i32, i32* %ptr_b
   2218   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2219   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2220   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2221   ret <4 x i32> %res
   2222 }
   2223 
   2224 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2225 
   2226 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   2227 ; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
   2228 ; CHECK:       ## BB#0:
   2229 ; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0xc1]
   2230 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2231   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2232   ret <8 x i32> %res
   2233 }
   2234 
   2235 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   2236 ; CHECK-LABEL: test_mask_andnot_epi32_rrk_256:
   2237 ; CHECK:       ## BB#0:
   2238 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2239 ; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
   2240 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2241 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2242   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2243   ret <8 x i32> %res
   2244 }
   2245 
   2246 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   2247 ; CHECK-LABEL: test_mask_andnot_epi32_rrkz_256:
   2248 ; CHECK:       ## BB#0:
   2249 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2250 ; CHECK-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
   2251 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2252   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2253   ret <8 x i32> %res
   2254 }
   2255 
   2256 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   2257 ; CHECK-LABEL: test_mask_andnot_epi32_rm_256:
   2258 ; CHECK:       ## BB#0:
   2259 ; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x28,0xdf,0x07]
   2260 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2261   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2262   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2263   ret <8 x i32> %res
   2264 }
   2265 
   2266 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2267 ; CHECK-LABEL: test_mask_andnot_epi32_rmk_256:
   2268 ; CHECK:       ## BB#0:
   2269 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2270 ; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
   2271 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2272 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2273   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2274   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2275   ret <8 x i32> %res
   2276 }
   2277 
   2278 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   2279 ; CHECK-LABEL: test_mask_andnot_epi32_rmkz_256:
   2280 ; CHECK:       ## BB#0:
   2281 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2282 ; CHECK-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
   2283 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2284   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2285   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2286   ret <8 x i32> %res
   2287 }
   2288 
   2289 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   2290 ; CHECK-LABEL: test_mask_andnot_epi32_rmb_256:
   2291 ; CHECK:       ## BB#0:
   2292 ; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
   2293 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2294   %q = load i32, i32* %ptr_b
   2295   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2296   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2297   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2298   ret <8 x i32> %res
   2299 }
   2300 
   2301 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2302 ; CHECK-LABEL: test_mask_andnot_epi32_rmbk_256:
   2303 ; CHECK:       ## BB#0:
   2304 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2305 ; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
   2306 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2307 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2308   %q = load i32, i32* %ptr_b
   2309   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2310   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2311   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2312   ret <8 x i32> %res
   2313 }
   2314 
   2315 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   2316 ; CHECK-LABEL: test_mask_andnot_epi32_rmbkz_256:
   2317 ; CHECK:       ## BB#0:
   2318 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2319 ; CHECK-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
   2320 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2321   %q = load i32, i32* %ptr_b
   2322   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2323   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2324   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2325   ret <8 x i32> %res
   2326 }
   2327 
   2328 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   2329 
   2330 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
   2331 ; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
   2332 ; CHECK:       ## BB#0:
   2333 ; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0xc1]
   2334 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2335   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2336   ret <2 x i64> %res
   2337 }
   2338 
   2339 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
   2340 ; CHECK-LABEL: test_mask_andnot_epi64_rrk_128:
   2341 ; CHECK:       ## BB#0:
   2342 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2343 ; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
   2344 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2345 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2346   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2347   ret <2 x i64> %res
   2348 }
   2349 
   2350 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   2351 ; CHECK-LABEL: test_mask_andnot_epi64_rrkz_128:
   2352 ; CHECK:       ## BB#0:
   2353 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2354 ; CHECK-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
   2355 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2356   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2357   ret <2 x i64> %res
   2358 }
   2359 
   2360 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
   2361 ; CHECK-LABEL: test_mask_andnot_epi64_rm_128:
   2362 ; CHECK:       ## BB#0:
   2363 ; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x08,0xdf,0x07]
   2364 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2365   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2366   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2367   ret <2 x i64> %res
   2368 }
   2369 
   2370 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   2371 ; CHECK-LABEL: test_mask_andnot_epi64_rmk_128:
   2372 ; CHECK:       ## BB#0:
   2373 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2374 ; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
   2375 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2376 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2377   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2378   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2379   ret <2 x i64> %res
   2380 }
   2381 
   2382 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
   2383 ; CHECK-LABEL: test_mask_andnot_epi64_rmkz_128:
   2384 ; CHECK:       ## BB#0:
   2385 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2386 ; CHECK-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
   2387 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2388   %b = load <2 x i64>, <2 x i64>* %ptr_b
   2389   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2390   ret <2 x i64> %res
   2391 }
   2392 
   2393 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
   2394 ; CHECK-LABEL: test_mask_andnot_epi64_rmb_128:
   2395 ; CHECK:       ## BB#0:
   2396 ; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 ## encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
   2397 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2398   %q = load i64, i64* %ptr_b
   2399   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2400   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2401   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   2402   ret <2 x i64> %res
   2403 }
   2404 
   2405 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   2406 ; CHECK-LABEL: test_mask_andnot_epi64_rmbk_128:
   2407 ; CHECK:       ## BB#0:
   2408 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2409 ; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
   2410 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2411 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2412   %q = load i64, i64* %ptr_b
   2413   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2414   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2415   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   2416   ret <2 x i64> %res
   2417 }
   2418 
   2419 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
   2420 ; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_128:
   2421 ; CHECK:       ## BB#0:
   2422 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2423 ; CHECK-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
   2424 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2425   %q = load i64, i64* %ptr_b
   2426   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   2427   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   2428   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   2429   ret <2 x i64> %res
   2430 }
   2431 
   2432 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   2433 
   2434 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
   2435 ; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
   2436 ; CHECK:       ## BB#0:
   2437 ; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0xc1]
   2438 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2439   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2440   ret <4 x i64> %res
   2441 }
   2442 
   2443 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
   2444 ; CHECK-LABEL: test_mask_andnot_epi64_rrk_256:
   2445 ; CHECK:       ## BB#0:
   2446 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2447 ; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
   2448 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc2]
   2449 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2450   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2451   ret <4 x i64> %res
   2452 }
   2453 
   2454 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   2455 ; CHECK-LABEL: test_mask_andnot_epi64_rrkz_256:
   2456 ; CHECK:       ## BB#0:
   2457 ; CHECK-NEXT:    kmovw %edi, %k1 ## encoding: [0xc5,0xf8,0x92,0xcf]
   2458 ; CHECK-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
   2459 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2460   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2461   ret <4 x i64> %res
   2462 }
   2463 
   2464 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
   2465 ; CHECK-LABEL: test_mask_andnot_epi64_rm_256:
   2466 ; CHECK:       ## BB#0:
   2467 ; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x28,0xdf,0x07]
   2468 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2469   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2470   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2471   ret <4 x i64> %res
   2472 }
   2473 
   2474 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   2475 ; CHECK-LABEL: test_mask_andnot_epi64_rmk_256:
   2476 ; CHECK:       ## BB#0:
   2477 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2478 ; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
   2479 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2480 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2481   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2482   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2483   ret <4 x i64> %res
   2484 }
   2485 
   2486 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
   2487 ; CHECK-LABEL: test_mask_andnot_epi64_rmkz_256:
   2488 ; CHECK:       ## BB#0:
   2489 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2490 ; CHECK-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
   2491 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2492   %b = load <4 x i64>, <4 x i64>* %ptr_b
   2493   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2494   ret <4 x i64> %res
   2495 }
   2496 
   2497 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
   2498 ; CHECK-LABEL: test_mask_andnot_epi64_rmb_256:
   2499 ; CHECK:       ## BB#0:
   2500 ; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 ## encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
   2501 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2502   %q = load i64, i64* %ptr_b
   2503   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2504   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2505   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   2506   ret <4 x i64> %res
   2507 }
   2508 
   2509 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   2510 ; CHECK-LABEL: test_mask_andnot_epi64_rmbk_256:
   2511 ; CHECK:       ## BB#0:
   2512 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2513 ; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} ## encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
   2514 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0 ## encoding: [0x62,0xf1,0x7c,0x48,0x28,0xc1]
   2515 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2516   %q = load i64, i64* %ptr_b
   2517   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2518   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2519   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   2520   ret <4 x i64> %res
   2521 }
   2522 
   2523 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
   2524 ; CHECK-LABEL: test_mask_andnot_epi64_rmbkz_256:
   2525 ; CHECK:       ## BB#0:
   2526 ; CHECK-NEXT:    kmovw %esi, %k1 ## encoding: [0xc5,0xf8,0x92,0xce]
   2527 ; CHECK-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} ## encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
   2528 ; CHECK-NEXT:    retq ## encoding: [0xc3]
   2529   %q = load i64, i64* %ptr_b
   2530   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   2531   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   2532   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   2533   ret <4 x i64> %res
   2534 }
   2535 
   2536 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   2537