Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 declare <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32, <4 x i32>, i8)
      6 
      7 define <4 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_128(i32 %x0, <4 x i32> %x1, i8 %mask) {
      8 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
      9 ; X86:       # %bb.0:
     10 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     11 ; X86-NEXT:    vpbroadcastd %eax, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xc8]
     12 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
     13 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
     14 ; X86-NEXT:    vpbroadcastd %eax, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc0]
     15 ; X86-NEXT:    vpbroadcastd %eax, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd0]
     16 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
     17 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
     18 ; X86-NEXT:    retl # encoding: [0xc3]
     19 ;
     20 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_128:
     21 ; X64:       # %bb.0:
     22 ; X64-NEXT:    vpbroadcastd %edi, %xmm1 # encoding: [0x62,0xf2,0x7d,0x08,0x7c,0xcf]
     23 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
     24 ; X64-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7c,0xc7]
     25 ; X64-NEXT:    vpbroadcastd %edi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7c,0xd7]
     26 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
     27 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
     28 ; X64-NEXT:    retq # encoding: [0xc3]
     29   %res = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 -1)
     30   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> %x1, i8 %mask)
     31   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.128(i32 %x0, <4 x i32> zeroinitializer, i8 %mask)
     32   %res3 = add <4 x i32> %res, %res1
     33   %res4 = add <4 x i32> %res2, %res3
     34   ret <4 x i32> %res4
     35 }
     36 
     37 
     38 declare <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64, <2 x i64>, i8)
     39 
     40 define <2 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_128(i64 %x0, <2 x i64> %x1, i8 %mask) {
     41 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
     42 ; X86:       # %bb.0:
     43 ; X86-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
     44 ; X86-NEXT:    # xmm1 = mem[0],zero
     45 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd1]
     46 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
     47 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
     48 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc1]
     49 ; X86-NEXT:    vpbroadcastq %xmm1, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc9]
     50 ; X86-NEXT:    vpaddq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc1]
     51 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
     52 ; X86-NEXT:    retl # encoding: [0xc3]
     53 ;
     54 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_128:
     55 ; X64:       # %bb.0:
     56 ; X64-NEXT:    vpbroadcastq %rdi, %xmm1 # encoding: [0x62,0xf2,0xfd,0x08,0x7c,0xcf]
     57 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
     58 ; X64-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7c,0xc7]
     59 ; X64-NEXT:    vpbroadcastq %rdi, %xmm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7c,0xd7]
     60 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
     61 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
     62 ; X64-NEXT:    retq # encoding: [0xc3]
     63   %res = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 -1)
     64   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> %x1,i8 %mask)
     65   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.128(i64 %x0, <2 x i64> zeroinitializer,i8 %mask)
     66   %res3 = add <2 x i64> %res, %res1
     67   %res4 = add <2 x i64> %res2, %res3
     68   ret <2 x i64> %res4
     69 }
     70 
     71 
     72  declare <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32, <8 x i32>, i8)
     73 
     74   define <8 x i32>@test_int_x86_avx512_mask_pbroadcast_d_gpr_256(i32 %x0, <8 x i32> %x1, i8 %mask) {
     75 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
     76 ; X86:       # %bb.0:
     77 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     78 ; X86-NEXT:    vpbroadcastd %eax, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xc8]
     79 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
     80 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
     81 ; X86-NEXT:    vpbroadcastd %eax, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc0]
     82 ; X86-NEXT:    vpbroadcastd %eax, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd0]
     83 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
     84 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
     85 ; X86-NEXT:    retl # encoding: [0xc3]
     86 ;
     87 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_d_gpr_256:
     88 ; X64:       # %bb.0:
     89 ; X64-NEXT:    vpbroadcastd %edi, %ymm1 # encoding: [0x62,0xf2,0x7d,0x28,0x7c,0xcf]
     90 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
     91 ; X64-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7c,0xc7]
     92 ; X64-NEXT:    vpbroadcastd %edi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7c,0xd7]
     93 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
     94 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
     95 ; X64-NEXT:    retq # encoding: [0xc3]
     96     %res = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 -1)
     97     %res1 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> %x1, i8 %mask)
     98     %res2 = call <8 x i32> @llvm.x86.avx512.mask.pbroadcast.d.gpr.256(i32 %x0, <8 x i32> zeroinitializer, i8 %mask)
     99     %res3 = add <8 x i32> %res, %res1
    100     %res4 = add <8 x i32> %res2, %res3
    101     ret <8 x i32> %res4
    102   }
    103 
    104  declare <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64, <4 x i64>, i8)
    105 
    106   define <4 x i64>@test_int_x86_avx512_mask_pbroadcast_q_gpr_256(i64 %x0, <4 x i64> %x1, i8 %mask) {
    107 ; X86-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
    108 ; X86:       # %bb.0:
    109 ; X86-NEXT:    vmovq {{[0-9]+}}(%esp), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x4c,0x24,0x04]
    110 ; X86-NEXT:    # xmm1 = mem[0],zero
    111 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd1]
    112 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x0c]
    113 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    114 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc1]
    115 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc9]
    116 ; X86-NEXT:    vpaddq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc1]
    117 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    118 ; X86-NEXT:    retl # encoding: [0xc3]
    119 ;
    120 ; X64-LABEL: test_int_x86_avx512_mask_pbroadcast_q_gpr_256:
    121 ; X64:       # %bb.0:
    122 ; X64-NEXT:    vpbroadcastq %rdi, %ymm1 # encoding: [0x62,0xf2,0xfd,0x28,0x7c,0xcf]
    123 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    124 ; X64-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7c,0xc7]
    125 ; X64-NEXT:    vpbroadcastq %rdi, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7c,0xd7]
    126 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
    127 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
    128 ; X64-NEXT:    retq # encoding: [0xc3]
    129     %res = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 -1)
    130     %res1 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> %x1,i8 %mask)
    131     %res2 = call <4 x i64> @llvm.x86.avx512.mask.pbroadcast.q.gpr.256(i64 %x0, <4 x i64> zeroinitializer,i8 %mask)
    132     %res3 = add <4 x i64> %res, %res1
    133     %res4 = add <4 x i64> %res2, %res3
    134     ret <4 x i64> %res4
    135   }
    136 
    137 
    138 
    139 declare <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32>, <8 x i32>, i8)
    140 
    141 define <8 x i32>@test_int_x86_avx512_pbroadcastd_256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask, i32 * %y_ptr) {
    142 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_256:
    143 ; X86:       # %bb.0:
    144 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    145 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x04]
    146 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
    147 ; X86-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
    148 ; X86-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
    149 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x08]
    150 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
    151 ; X86-NEXT:    retl # encoding: [0xc3]
    152 ;
    153 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_256:
    154 ; X64:       # %bb.0:
    155 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    156 ; X64-NEXT:    vpbroadcastd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x58,0xc8]
    157 ; X64-NEXT:    vpbroadcastd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x58,0xc0]
    158 ; X64-NEXT:    vpaddd (%rsi){1to8}, %ymm1, %ymm1 # encoding: [0x62,0xf1,0x75,0x38,0xfe,0x0e]
    159 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
    160 ; X64-NEXT:    retq # encoding: [0xc3]
    161   %y_32  = load i32, i32 * %y_ptr
    162   %y = insertelement <4 x i32> undef, i32 %y_32, i32 0
    163   %res = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %y, <8 x i32> %x1, i8 -1)
    164   %res1 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> %x1, i8 %mask)
    165   %res2 = call <8 x i32> @llvm.x86.avx512.pbroadcastd.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
    166   %res3 = add <8 x i32> %res, %res1
    167   %res4 = add <8 x i32> %res2, %res3
    168   ret <8 x i32> %res4
    169 }
    170 
    171 declare <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32>, <4 x i32>, i8)
    172 
    173 define <4 x i32>@test_int_x86_avx512_pbroadcastd_128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask) {
    174 ; X86-LABEL: test_int_x86_avx512_pbroadcastd_128:
    175 ; X86:       # %bb.0:
    176 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
    177 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    178 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    179 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
    180 ; X86-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
    181 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
    182 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
    183 ; X86-NEXT:    retl # encoding: [0xc3]
    184 ;
    185 ; X64-LABEL: test_int_x86_avx512_pbroadcastd_128:
    186 ; X64:       # %bb.0:
    187 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x58,0xd0]
    188 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    189 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x58,0xc8]
    190 ; X64-NEXT:    vpbroadcastd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x58,0xc0]
    191 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
    192 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
    193 ; X64-NEXT:    retq # encoding: [0xc3]
    194   %res = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
    195   %res1 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> %x1, i8 %mask)
    196   %res2 = call <4 x i32> @llvm.x86.avx512.pbroadcastd.128(<4 x i32> %x0, <4 x i32> zeroinitializer, i8 %mask)
    197   %res3 = add <4 x i32> %res, %res1
    198   %res4 = add <4 x i32> %res2, %res3
    199   ret <4 x i32> %res4
    200 }
    201 
    202 declare <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64>, <4 x i64>, i8)
    203 
    204 define <4 x i64>@test_int_x86_avx512_pbroadcastq_256(<2 x i64> %x0, <4 x i64> %x1, i8 %mask) {
    205 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_256:
    206 ; X86:       # %bb.0:
    207 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
    208 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    209 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    210 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
    211 ; X86-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
    212 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
    213 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    214 ; X86-NEXT:    retl # encoding: [0xc3]
    215 ;
    216 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_256:
    217 ; X64:       # %bb.0:
    218 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd0]
    219 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    220 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x59,0xc8]
    221 ; X64-NEXT:    vpbroadcastq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x59,0xc0]
    222 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
    223 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
    224 ; X64-NEXT:    retq # encoding: [0xc3]
    225   %res = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 -1)
    226   %res1 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> %x1,i8 %mask)
    227   %res2 = call <4 x i64> @llvm.x86.avx512.pbroadcastq.256(<2 x i64> %x0, <4 x i64> zeroinitializer,i8 %mask)
    228   %res3 = add <4 x i64> %res, %res1
    229   %res4 = add <4 x i64> %res2, %res3
    230   ret <4 x i64> %res4
    231 }
    232 
    233 declare <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64>, <2 x i64>, i8)
    234 
    235 define <2 x i64>@test_int_x86_avx512_pbroadcastq_128(<2 x i64> %x0, <2 x i64> %x1, i8 %mask) {
    236 ; X86-LABEL: test_int_x86_avx512_pbroadcastq_128:
    237 ; X86:       # %bb.0:
    238 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
    239 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    240 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    241 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
    242 ; X86-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
    243 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
    244 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    245 ; X86-NEXT:    retl # encoding: [0xc3]
    246 ;
    247 ; X64-LABEL: test_int_x86_avx512_pbroadcastq_128:
    248 ; X64:       # %bb.0:
    249 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0xd0]
    250 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    251 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x59,0xc8]
    252 ; X64-NEXT:    vpbroadcastq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x59,0xc0]
    253 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
    254 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
    255 ; X64-NEXT:    retq # encoding: [0xc3]
    256   %res = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 -1)
    257   %res1 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> %x1,i8 %mask)
    258   %res2 = call <2 x i64> @llvm.x86.avx512.pbroadcastq.128(<2 x i64> %x0, <2 x i64> zeroinitializer,i8 %mask)
    259   %res3 = add <2 x i64> %res, %res1
    260   %res4 = add <2 x i64> %res2, %res3
    261   ret <2 x i64> %res4
    262 }
    263 
    264 declare <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double>, <4 x double>, i8) nounwind readonly
    265 
    266 define <4 x double> @test_x86_vbroadcast_sd_pd_256(<2 x double> %a0, <4 x double> %a1, i8 %mask ) {
    267 ; X86-LABEL: test_x86_vbroadcast_sd_pd_256:
    268 ; X86:       # %bb.0:
    269 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
    270 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    271 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    272 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
    273 ; X86-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
    274 ; X86-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
    275 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    276 ; X86-NEXT:    retl # encoding: [0xc3]
    277 ;
    278 ; X64-LABEL: test_x86_vbroadcast_sd_pd_256:
    279 ; X64:       # %bb.0:
    280 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xd0]
    281 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    282 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x19,0xc8]
    283 ; X64-NEXT:    vaddpd %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc9]
    284 ; X64-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x19,0xc0]
    285 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    286 ; X64-NEXT:    retq # encoding: [0xc3]
    287   %res = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 -1)
    288   %res1 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> %a1, i8 %mask)
    289   %res2 = call <4 x double> @llvm.x86.avx512.mask.broadcast.sd.pd.256(<2 x double> %a0, <4 x double> zeroinitializer, i8 %mask)
    290   %res3 = fadd <4 x double> %res, %res1
    291   %res4 = fadd <4 x double> %res2, %res3
    292   ret <4 x double> %res4
    293 }
    294 
    295 declare <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float>, <8 x float>, i8) nounwind readonly
    296 
    297 define <8 x float> @test_x86_vbroadcast_ss_ps_256(<4 x float> %a0, <8 x float> %a1, i8 %mask ) {
    298 ; X86-LABEL: test_x86_vbroadcast_ss_ps_256:
    299 ; X86:       # %bb.0:
    300 ; X86-NEXT:    vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
    301 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    302 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    303 ; X86-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
    304 ; X86-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
    305 ; X86-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
    306 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    307 ; X86-NEXT:    retl # encoding: [0xc3]
    308 ;
    309 ; X64-LABEL: test_x86_vbroadcast_ss_ps_256:
    310 ; X64:       # %bb.0:
    311 ; X64-NEXT:    vbroadcastss %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x18,0xd0]
    312 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    313 ; X64-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x18,0xc8]
    314 ; X64-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
    315 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x18,0xc0]
    316 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    317 ; X64-NEXT:    retq # encoding: [0xc3]
    318   %res = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 -1)
    319   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> %a1, i8 %mask)
    320   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.256(<4 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
    321   %res3 = fadd <8 x float> %res, %res1
    322   %res4 = fadd <8 x float> %res2, %res3
    323   ret <8 x float> %res4
    324 }
    325 
    326 declare <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float>, <4 x float>, i8) nounwind readonly
    327 
    328 define <4 x float> @test_x86_vbroadcast_ss_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask ) {
    329 ; X86-LABEL: test_x86_vbroadcast_ss_ps_128:
    330 ; X86:       # %bb.0:
    331 ; X86-NEXT:    vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
    332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    333 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    334 ; X86-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
    335 ; X86-NEXT:    vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
    336 ; X86-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
    337 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    338 ; X86-NEXT:    retl # encoding: [0xc3]
    339 ;
    340 ; X64-LABEL: test_x86_vbroadcast_ss_ps_128:
    341 ; X64:       # %bb.0:
    342 ; X64-NEXT:    vbroadcastss %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x18,0xd0]
    343 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    344 ; X64-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x18,0xc8]
    345 ; X64-NEXT:    vaddps %xmm1, %xmm2, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc9]
    346 ; X64-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x18,0xc0]
    347 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    348 ; X64-NEXT:    retq # encoding: [0xc3]
    349   %res = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 -1)
    350   %res1 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> %a1, i8 %mask)
    351   %res2 = call <4 x float> @llvm.x86.avx512.mask.broadcast.ss.ps.128(<4 x float> %a0, <4 x float> zeroinitializer, i8 %mask)
    352   %res3 = fadd <4 x float> %res, %res1
    353   %res4 = fadd <4 x float> %res2, %res3
    354   ret <4 x float> %res4
    355 }
    356 
    357 declare <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float>, <4 x float>, i8)
    358 
    359 define <4 x float>@test_int_x86_avx512_mask_movsldup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
    360 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_128:
    361 ; X86:       # %bb.0:
    362 ; X86-NEXT:    vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
    363 ; X86-NEXT:    # xmm2 = xmm0[0,0,2,2]
    364 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    365 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    366 ; X86-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
    367 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
    368 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
    369 ; X86-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
    370 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
    371 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    372 ; X86-NEXT:    retl # encoding: [0xc3]
    373 ;
    374 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_128:
    375 ; X64:       # %bb.0:
    376 ; X64-NEXT:    vmovsldup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x12,0xd0]
    377 ; X64-NEXT:    # xmm2 = xmm0[0,0,2,2]
    378 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    379 ; X64-NEXT:    vmovsldup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x12,0xc8]
    380 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0,2,2]
    381 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
    382 ; X64-NEXT:    vmovsldup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x12,0xc0]
    383 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0,2,2]
    384 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    385 ; X64-NEXT:    retq # encoding: [0xc3]
    386   %res = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
    387   %res1 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
    388   %res2 = call <4 x float> @llvm.x86.avx512.mask.movsldup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
    389   %res3 = fadd <4 x float> %res, %res1
    390   %res4 = fadd <4 x float> %res2, %res3
    391   ret <4 x float> %res4
    392 }
    393 
    394 declare <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float>, <8 x float>, i8)
    395 
    396 define <8 x float>@test_int_x86_avx512_mask_movsldup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
    397 ; X86-LABEL: test_int_x86_avx512_mask_movsldup_256:
    398 ; X86:       # %bb.0:
    399 ; X86-NEXT:    vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
    400 ; X86-NEXT:    # ymm2 = ymm0[0,0,2,2,4,4,6,6]
    401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    402 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    403 ; X86-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
    404 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
    405 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
    406 ; X86-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
    407 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
    408 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    409 ; X86-NEXT:    retl # encoding: [0xc3]
    410 ;
    411 ; X64-LABEL: test_int_x86_avx512_mask_movsldup_256:
    412 ; X64:       # %bb.0:
    413 ; X64-NEXT:    vmovsldup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x12,0xd0]
    414 ; X64-NEXT:    # ymm2 = ymm0[0,0,2,2,4,4,6,6]
    415 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    416 ; X64-NEXT:    vmovsldup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x12,0xc8]
    417 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
    418 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
    419 ; X64-NEXT:    vmovsldup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x12,0xc0]
    420 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
    421 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    422 ; X64-NEXT:    retq # encoding: [0xc3]
    423   %res = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
    424   %res1 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
    425   %res2 = call <8 x float> @llvm.x86.avx512.mask.movsldup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
    426   %res3 = fadd <8 x float> %res, %res1
    427   %res4 = fadd <8 x float> %res2, %res3
    428   ret <8 x float> %res4
    429 }
    430 
    431 declare <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float>, <4 x float>, i8)
    432 
    433 define <4 x float>@test_int_x86_avx512_mask_movshdup_128(<4 x float> %x0, <4 x float> %x1, i8 %x2) {
    434 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_128:
    435 ; X86:       # %bb.0:
    436 ; X86-NEXT:    vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
    437 ; X86-NEXT:    # xmm2 = xmm0[1,1,3,3]
    438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    439 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    440 ; X86-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
    441 ; X86-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
    442 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
    443 ; X86-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
    444 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
    445 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    446 ; X86-NEXT:    retl # encoding: [0xc3]
    447 ;
    448 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_128:
    449 ; X64:       # %bb.0:
    450 ; X64-NEXT:    vmovshdup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x16,0xd0]
    451 ; X64-NEXT:    # xmm2 = xmm0[1,1,3,3]
    452 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    453 ; X64-NEXT:    vmovshdup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x16,0xc8]
    454 ; X64-NEXT:    # xmm1 {%k1} = xmm0[1,1,3,3]
    455 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xca]
    456 ; X64-NEXT:    vmovshdup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x16,0xc0]
    457 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,1,3,3]
    458 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
    459 ; X64-NEXT:    retq # encoding: [0xc3]
    460   %res = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 %x2)
    461   %res1 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> %x1, i8 -1)
    462   %res2 = call <4 x float> @llvm.x86.avx512.mask.movshdup.128(<4 x float> %x0, <4 x float> zeroinitializer, i8 %x2)
    463   %res3 = fadd <4 x float> %res, %res1
    464   %res4 = fadd <4 x float> %res2, %res3
    465   ret <4 x float> %res4
    466 }
    467 
    468 declare <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float>, <8 x float>, i8)
    469 
    470 define <8 x float>@test_int_x86_avx512_mask_movshdup_256(<8 x float> %x0, <8 x float> %x1, i8 %x2) {
    471 ; X86-LABEL: test_int_x86_avx512_mask_movshdup_256:
    472 ; X86:       # %bb.0:
    473 ; X86-NEXT:    vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
    474 ; X86-NEXT:    # ymm2 = ymm0[1,1,3,3,5,5,7,7]
    475 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    476 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    477 ; X86-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
    478 ; X86-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
    479 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
    480 ; X86-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
    481 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
    482 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    483 ; X86-NEXT:    retl # encoding: [0xc3]
    484 ;
    485 ; X64-LABEL: test_int_x86_avx512_mask_movshdup_256:
    486 ; X64:       # %bb.0:
    487 ; X64-NEXT:    vmovshdup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x16,0xd0]
    488 ; X64-NEXT:    # ymm2 = ymm0[1,1,3,3,5,5,7,7]
    489 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    490 ; X64-NEXT:    vmovshdup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x16,0xc8]
    491 ; X64-NEXT:    # ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
    492 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xca]
    493 ; X64-NEXT:    vmovshdup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x16,0xc0]
    494 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
    495 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
    496 ; X64-NEXT:    retq # encoding: [0xc3]
    497   %res = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 %x2)
    498   %res1 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> %x1, i8 -1)
    499   %res2 = call <8 x float> @llvm.x86.avx512.mask.movshdup.256(<8 x float> %x0, <8 x float> zeroinitializer, i8 %x2)
    500   %res3 = fadd <8 x float> %res, %res1
    501   %res4 = fadd <8 x float> %res2, %res3
    502   ret <8 x float> %res4
    503 }
    504 declare <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double>, <2 x double>, i8)
    505 
    506 define <2 x double>@test_int_x86_avx512_mask_movddup_128(<2 x double> %x0, <2 x double> %x1, i8 %x2) {
    507 ; X86-LABEL: test_int_x86_avx512_mask_movddup_128:
    508 ; X86:       # %bb.0:
    509 ; X86-NEXT:    vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
    510 ; X86-NEXT:    # xmm2 = xmm0[0,0]
    511 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    512 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    513 ; X86-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
    514 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0,0]
    515 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
    516 ; X86-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
    517 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
    518 ; X86-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    519 ; X86-NEXT:    retl # encoding: [0xc3]
    520 ;
    521 ; X64-LABEL: test_int_x86_avx512_mask_movddup_128:
    522 ; X64:       # %bb.0:
    523 ; X64-NEXT:    vmovddup %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x12,0xd0]
    524 ; X64-NEXT:    # xmm2 = xmm0[0,0]
    525 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    526 ; X64-NEXT:    vmovddup %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x09,0x12,0xc8]
    527 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0,0]
    528 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xca]
    529 ; X64-NEXT:    vmovddup %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0x89,0x12,0xc0]
    530 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0,0]
    531 ; X64-NEXT:    vaddpd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc1]
    532 ; X64-NEXT:    retq # encoding: [0xc3]
    533   %res = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 %x2)
    534   %res1 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> %x1, i8 -1)
    535   %res2 = call <2 x double> @llvm.x86.avx512.mask.movddup.128(<2 x double> %x0, <2 x double> zeroinitializer, i8 %x2)
    536   %res3 = fadd <2 x double> %res, %res1
    537   %res4 = fadd <2 x double> %res2, %res3
    538   ret <2 x double> %res4
    539 }
    540 
    541 declare <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double>, <4 x double>, i8)
    542 
    543 define <4 x double>@test_int_x86_avx512_mask_movddup_256(<4 x double> %x0, <4 x double> %x1, i8 %x2) {
    544 ; X86-LABEL: test_int_x86_avx512_mask_movddup_256:
    545 ; X86:       # %bb.0:
    546 ; X86-NEXT:    vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
    547 ; X86-NEXT:    # ymm2 = ymm0[0,0,2,2]
    548 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    549 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    550 ; X86-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
    551 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
    552 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
    553 ; X86-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
    554 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
    555 ; X86-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    556 ; X86-NEXT:    retl # encoding: [0xc3]
    557 ;
    558 ; X64-LABEL: test_int_x86_avx512_mask_movddup_256:
    559 ; X64:       # %bb.0:
    560 ; X64-NEXT:    vmovddup %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0x12,0xd0]
    561 ; X64-NEXT:    # ymm2 = ymm0[0,0,2,2]
    562 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    563 ; X64-NEXT:    vmovddup %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0x12,0xc8]
    564 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,0,2,2]
    565 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xca]
    566 ; X64-NEXT:    vmovddup %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xff,0xa9,0x12,0xc0]
    567 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,0,2,2]
    568 ; X64-NEXT:    vaddpd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc1]
    569 ; X64-NEXT:    retq # encoding: [0xc3]
    570   %res = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 %x2)
    571   %res1 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> %x1, i8 -1)
    572   %res2 = call <4 x double> @llvm.x86.avx512.mask.movddup.256(<4 x double> %x0, <4 x double> zeroinitializer, i8 %x2)
    573   %res3 = fadd <4 x double> %res, %res1
    574   %res4 = fadd <4 x double> %res2, %res3
    575   ret <4 x double> %res4
    576 }
    577 
    578 declare <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double>, i32, <4 x double>, i8)
    579 
    580 define <4 x double>@test_int_x86_avx512_mask_vpermil_pd_256(<4 x double> %x0, <4 x double> %x2, i8 %x3) {
    581 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
    582 ; X86:       # %bb.0:
    583 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
    584 ; X86-NEXT:    # ymm2 = ymm0[0,1,3,2]
    585 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    586 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    587 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
    588 ; X86-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
    589 ; X86-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
    590 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
    591 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
    592 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
    593 ; X86-NEXT:    retl # encoding: [0xc3]
    594 ;
    595 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_256:
    596 ; X64:       # %bb.0:
    597 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x05,0xd0,0x06]
    598 ; X64-NEXT:    # ymm2 = ymm0[0,1,3,2]
    599 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    600 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x05,0xc8,0x06]
    601 ; X64-NEXT:    # ymm1 {%k1} = ymm0[0,1,3,2]
    602 ; X64-NEXT:    vpermilpd $6, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x05,0xc0,0x06]
    603 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[0,1,3,2]
    604 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
    605 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
    606 ; X64-NEXT:    retq # encoding: [0xc3]
    607   %res = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 %x3)
    608   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> zeroinitializer, i8 %x3)
    609   %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermil.pd.256(<4 x double> %x0, i32 22, <4 x double> %x2, i8 -1)
    610   %res3 = fadd <4 x double> %res, %res1
    611   %res4 = fadd <4 x double> %res2, %res3
    612   ret <4 x double> %res4
    613 }
    614 
    615 declare <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double>, i32, <2 x double>, i8)
    616 
    617 define <2 x double>@test_int_x86_avx512_mask_vpermil_pd_128(<2 x double> %x0, <2 x double> %x2, i8 %x3) {
    618 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
    619 ; X86:       # %bb.0:
    620 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
    621 ; X86-NEXT:    # xmm2 = xmm0[1,0]
    622 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    623 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    624 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
    625 ; X86-NEXT:    # xmm1 {%k1} = xmm0[1,0]
    626 ; X86-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
    627 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
    628 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
    629 ; X86-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
    630 ; X86-NEXT:    retl # encoding: [0xc3]
    631 ;
    632 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_pd_128:
    633 ; X64:       # %bb.0:
    634 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x05,0xd0,0x01]
    635 ; X64-NEXT:    # xmm2 = xmm0[1,0]
    636 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    637 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x05,0xc8,0x01]
    638 ; X64-NEXT:    # xmm1 {%k1} = xmm0[1,0]
    639 ; X64-NEXT:    vpermilpd $1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0x05,0xc0,0x01]
    640 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1,0]
    641 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
    642 ; X64-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
    643 ; X64-NEXT:    retq # encoding: [0xc3]
    644   %res = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 %x3)
    645   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> zeroinitializer, i8 %x3)
    646   %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermil.pd.128(<2 x double> %x0, i32 1, <2 x double> %x2, i8 -1)
    647   %res3 = fadd <2 x double> %res, %res1
    648   %res4 = fadd <2 x double> %res3, %res2
    649   ret <2 x double> %res4
    650 }
    651 
    652 declare <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float>, i32, <8 x float>, i8)
    653 
    654 define <8 x float>@test_int_x86_avx512_mask_vpermil_ps_256(<8 x float> %x0, <8 x float> %x2, i8 %x3) {
    655 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
    656 ; X86:       # %bb.0:
    657 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
    658 ; X86-NEXT:    # ymm2 = ymm0[2,1,1,0,6,5,5,4]
    659 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    660 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    661 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
    662 ; X86-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
    663 ; X86-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
    664 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
    665 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
    666 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
    667 ; X86-NEXT:    retl # encoding: [0xc3]
    668 ;
    669 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_256:
    670 ; X64:       # %bb.0:
    671 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x04,0xd0,0x16]
    672 ; X64-NEXT:    # ymm2 = ymm0[2,1,1,0,6,5,5,4]
    673 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    674 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x04,0xc8,0x16]
    675 ; X64-NEXT:    # ymm1 {%k1} = ymm0[2,1,1,0,6,5,5,4]
    676 ; X64-NEXT:    vpermilps $22, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x04,0xc0,0x16]
    677 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[2,1,1,0,6,5,5,4]
    678 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
    679 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
    680 ; X64-NEXT:    retq # encoding: [0xc3]
    681   %res = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 %x3)
    682   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> zeroinitializer, i8 %x3)
    683   %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermil.ps.256(<8 x float> %x0, i32 22, <8 x float> %x2, i8 -1)
    684   %res3 = fadd <8 x float> %res, %res1
    685   %res4 = fadd <8 x float> %res3, %res2
    686   ret <8 x float> %res4
    687 }
    688 
    689 declare <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float>, i32, <4 x float>, i8)
    690 
    691 define <4 x float>@test_int_x86_avx512_mask_vpermil_ps_128(<4 x float> %x0, <4 x float> %x2, i8 %x3) {
    692 ; X86-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
    693 ; X86:       # %bb.0:
    694 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
    695 ; X86-NEXT:    # xmm2 = xmm0[2,1,1,0]
    696 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
    697 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    698 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
    699 ; X86-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
    700 ; X86-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
    701 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
    702 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
    703 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
    704 ; X86-NEXT:    retl # encoding: [0xc3]
    705 ;
    706 ; X64-LABEL: test_int_x86_avx512_mask_vpermil_ps_128:
    707 ; X64:       # %bb.0:
    708 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x04,0xd0,0x16]
    709 ; X64-NEXT:    # xmm2 = xmm0[2,1,1,0]
    710 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
    711 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x04,0xc8,0x16]
    712 ; X64-NEXT:    # xmm1 {%k1} = xmm0[2,1,1,0]
    713 ; X64-NEXT:    vpermilps $22, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x04,0xc0,0x16]
    714 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[2,1,1,0]
    715 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
    716 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
    717 ; X64-NEXT:    retq # encoding: [0xc3]
    718   %res = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 %x3)
    719   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> zeroinitializer, i8 %x3)
    720   %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermil.ps.128(<4 x float> %x0, i32 22, <4 x float> %x2, i8 -1)
    721   %res3 = fadd <4 x float> %res, %res1
    722   %res4 = fadd <4 x float> %res2, %res3
    723   ret <4 x float> %res4
    724 }
    725 
    726 declare <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double>, i32, <4 x double>, i8)
    727 
    728 define <4 x double>@test_int_x86_avx512_mask_perm_df_256(<4 x double> %x0, i32 %x1, <4 x double> %x2, i8 %x3) {
    729 ; X86-LABEL: test_int_x86_avx512_mask_perm_df_256:
    730 ; X86:       # %bb.0:
    731 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
    732 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0]
    733 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    734 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    735 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
    736 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
    737 ; X86-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
    738 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
    739 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
    740 ; X86-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
    741 ; X86-NEXT:    retl # encoding: [0xc3]
    742 ;
    743 ; X64-LABEL: test_int_x86_avx512_mask_perm_df_256:
    744 ; X64:       # %bb.0:
    745 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x01,0xd0,0x03]
    746 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0]
    747 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    748 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x01,0xc8,0x03]
    749 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
    750 ; X64-NEXT:    vpermpd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x01,0xc0,0x03]
    751 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
    752 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
    753 ; X64-NEXT:    vaddpd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc2]
    754 ; X64-NEXT:    retq # encoding: [0xc3]
    755   %res = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 %x3)
    756   %res1 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> zeroinitializer, i8 %x3)
    757   %res2 = call <4 x double> @llvm.x86.avx512.mask.perm.df.256(<4 x double> %x0, i32 3, <4 x double> %x2, i8 -1)
    758   %res3 = fadd <4 x double> %res, %res1
    759   %res4 = fadd <4 x double> %res3, %res2
    760   ret <4 x double> %res4
    761 }
    762 
    763 declare <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64>, i32, <4 x i64>, i8)
    764 
    765 define <4 x i64>@test_int_x86_avx512_mask_perm_di_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
    766 ; X86-LABEL: test_int_x86_avx512_mask_perm_di_256:
    767 ; X86:       # %bb.0:
    768 ; X86-NEXT:    vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
    769 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0]
    770 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
    771 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
    772 ; X86-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
    773 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
    774 ; X86-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
    775 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
    776 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
    777 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
    778 ; X86-NEXT:    retl # encoding: [0xc3]
    779 ;
    780 ; X64-LABEL: test_int_x86_avx512_mask_perm_di_256:
    781 ; X64:       # %bb.0:
    782 ; X64-NEXT:    vpermq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0x00,0xd0,0x03]
    783 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0]
    784 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
    785 ; X64-NEXT:    vpermq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x00,0xc8,0x03]
    786 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0]
    787 ; X64-NEXT:    vpermq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0x00,0xc0,0x03]
    788 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0]
    789 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
    790 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
    791 ; X64-NEXT:    retq # encoding: [0xc3]
    792   %res = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
    793   %res1 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
    794   %res2 = call <4 x i64> @llvm.x86.avx512.mask.perm.di.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
    795   %res3 = add <4 x i64> %res, %res1
    796   %res4 = add <4 x i64> %res3, %res2
    797   ret <4 x i64> %res4
    798 }
    799 
    800 declare void @llvm.x86.avx512.mask.store.pd.128(i8*, <2 x double>, i8)
    801 
    802 define void@test_int_x86_avx512_mask_store_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
    803 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_128:
    804 ; X86:       # %bb.0:
    805 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    806 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    807 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    808 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    809 ; X86-NEXT:    vmovapd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x01]
    810 ; X86-NEXT:    vmovapd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x00]
    811 ; X86-NEXT:    retl # encoding: [0xc3]
    812 ;
    813 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_128:
    814 ; X64:       # %bb.0:
    815 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    816 ; X64-NEXT:    vmovapd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x29,0x07]
    817 ; X64-NEXT:    vmovapd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x29,0x06]
    818 ; X64-NEXT:    retq # encoding: [0xc3]
    819   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
    820   call void @llvm.x86.avx512.mask.store.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
    821   ret void
    822 }
    823 
    824 declare void @llvm.x86.avx512.mask.store.pd.256(i8*, <4 x double>, i8)
    825 
    826 define void@test_int_x86_avx512_mask_store_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
    827 ; X86-LABEL: test_int_x86_avx512_mask_store_pd_256:
    828 ; X86:       # %bb.0:
    829 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    830 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    831 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    832 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    833 ; X86-NEXT:    vmovapd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x01]
    834 ; X86-NEXT:    vmovapd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x00]
    835 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    836 ; X86-NEXT:    retl # encoding: [0xc3]
    837 ;
    838 ; X64-LABEL: test_int_x86_avx512_mask_store_pd_256:
    839 ; X64:       # %bb.0:
    840 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    841 ; X64-NEXT:    vmovapd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x29,0x07]
    842 ; X64-NEXT:    vmovapd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x29,0x06]
    843 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    844 ; X64-NEXT:    retq # encoding: [0xc3]
    845   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
    846   call void @llvm.x86.avx512.mask.store.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
    847   ret void
    848 }
    849 
    850 declare void @llvm.x86.avx512.mask.storeu.pd.128(i8*, <2 x double>, i8)
    851 
    852 define void@test_int_x86_avx512_mask_storeu_pd_128(i8* %ptr1, i8* %ptr2, <2 x double> %x1, i8 %x2) {
    853 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
    854 ; X86:       # %bb.0:
    855 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    856 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    857 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    858 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    859 ; X86-NEXT:    vmovupd %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x01]
    860 ; X86-NEXT:    vmovupd %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x00]
    861 ; X86-NEXT:    retl # encoding: [0xc3]
    862 ;
    863 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_128:
    864 ; X64:       # %bb.0:
    865 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    866 ; X64-NEXT:    vmovupd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x11,0x07]
    867 ; X64-NEXT:    vmovupd %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x11,0x06]
    868 ; X64-NEXT:    retq # encoding: [0xc3]
    869   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr1, <2 x double> %x1, i8 %x2)
    870   call void @llvm.x86.avx512.mask.storeu.pd.128(i8* %ptr2, <2 x double> %x1, i8 -1)
    871   ret void
    872 }
    873 
    874 declare void @llvm.x86.avx512.mask.storeu.pd.256(i8*, <4 x double>, i8)
    875 
    876 define void@test_int_x86_avx512_mask_storeu_pd_256(i8* %ptr1, i8* %ptr2, <4 x double> %x1, i8 %x2) {
    877 ; X86-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
    878 ; X86:       # %bb.0:
    879 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    880 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    881 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    882 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    883 ; X86-NEXT:    vmovupd %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x01]
    884 ; X86-NEXT:    vmovupd %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x00]
    885 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    886 ; X86-NEXT:    retl # encoding: [0xc3]
    887 ;
    888 ; X64-LABEL: test_int_x86_avx512_mask_storeu_pd_256:
    889 ; X64:       # %bb.0:
    890 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    891 ; X64-NEXT:    vmovupd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x11,0x07]
    892 ; X64-NEXT:    vmovupd %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x11,0x06]
    893 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    894 ; X64-NEXT:    retq # encoding: [0xc3]
    895   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr1, <4 x double> %x1, i8 %x2)
    896   call void @llvm.x86.avx512.mask.storeu.pd.256(i8* %ptr2, <4 x double> %x1, i8 -1)
    897   ret void
    898 }
    899 
    900 declare void @llvm.x86.avx512.mask.store.ps.128(i8*, <4 x float>, i8)
    901 
    902 define void@test_int_x86_avx512_mask_store_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
    903 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_128:
    904 ; X86:       # %bb.0:
    905 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    906 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    907 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    908 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    909 ; X86-NEXT:    vmovaps %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x01]
    910 ; X86-NEXT:    vmovaps %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x00]
    911 ; X86-NEXT:    retl # encoding: [0xc3]
    912 ;
    913 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_128:
    914 ; X64:       # %bb.0:
    915 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    916 ; X64-NEXT:    vmovaps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x29,0x07]
    917 ; X64-NEXT:    vmovaps %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x29,0x06]
    918 ; X64-NEXT:    retq # encoding: [0xc3]
    919     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
    920     call void @llvm.x86.avx512.mask.store.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
    921     ret void
    922 }
    923 
    924 declare void @llvm.x86.avx512.mask.store.ps.256(i8*, <8 x float>, i8)
    925 
    926 define void@test_int_x86_avx512_mask_store_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
    927 ; X86-LABEL: test_int_x86_avx512_mask_store_ps_256:
    928 ; X86:       # %bb.0:
    929 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    930 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    931 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    932 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    933 ; X86-NEXT:    vmovaps %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x01]
    934 ; X86-NEXT:    vmovaps %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x00]
    935 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    936 ; X86-NEXT:    retl # encoding: [0xc3]
    937 ;
    938 ; X64-LABEL: test_int_x86_avx512_mask_store_ps_256:
    939 ; X64:       # %bb.0:
    940 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    941 ; X64-NEXT:    vmovaps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x29,0x07]
    942 ; X64-NEXT:    vmovaps %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x29,0x06]
    943 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    944 ; X64-NEXT:    retq # encoding: [0xc3]
    945     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
    946     call void @llvm.x86.avx512.mask.store.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
    947     ret void
    948 }
    949 
    950 declare void @llvm.x86.avx512.mask.storeu.ps.128(i8*, <4 x float>, i8)
    951 
    952 define void@test_int_x86_avx512_mask_storeu_ps_128(i8* %ptr1, i8* %ptr2, <4 x float> %x1, i8 %x2) {
    953 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
    954 ; X86:       # %bb.0:
    955 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    956 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    957 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    958 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    959 ; X86-NEXT:    vmovups %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x01]
    960 ; X86-NEXT:    vmovups %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x00]
    961 ; X86-NEXT:    retl # encoding: [0xc3]
    962 ;
    963 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_128:
    964 ; X64:       # %bb.0:
    965 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    966 ; X64-NEXT:    vmovups %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x11,0x07]
    967 ; X64-NEXT:    vmovups %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x11,0x06]
    968 ; X64-NEXT:    retq # encoding: [0xc3]
    969     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr1, <4 x float> %x1, i8 %x2)
    970     call void @llvm.x86.avx512.mask.storeu.ps.128(i8* %ptr2, <4 x float> %x1, i8 -1)
    971     ret void
    972 }
    973 
    974 declare void @llvm.x86.avx512.mask.storeu.ps.256(i8*, <8 x float>, i8)
    975 
    976 define void@test_int_x86_avx512_mask_storeu_ps_256(i8* %ptr1, i8* %ptr2, <8 x float> %x1, i8 %x2) {
    977 ; X86-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
    978 ; X86:       # %bb.0:
    979 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
    980 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
    981 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
    982 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    983 ; X86-NEXT:    vmovups %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x01]
    984 ; X86-NEXT:    vmovups %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x00]
    985 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    986 ; X86-NEXT:    retl # encoding: [0xc3]
    987 ;
    988 ; X64-LABEL: test_int_x86_avx512_mask_storeu_ps_256:
    989 ; X64:       # %bb.0:
    990 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
    991 ; X64-NEXT:    vmovups %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x11,0x07]
    992 ; X64-NEXT:    vmovups %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x11,0x06]
    993 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
    994 ; X64-NEXT:    retq # encoding: [0xc3]
    995     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr1, <8 x float> %x1, i8 %x2)
    996     call void @llvm.x86.avx512.mask.storeu.ps.256(i8* %ptr2, <8 x float> %x1, i8 -1)
    997     ret void
    998 }
    999 
   1000 declare void @llvm.x86.avx512.mask.storeu.q.128(i8*, <2 x i64>, i8)
   1001 
   1002 define void@test_int_x86_avx512_mask_storeu_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
   1003 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_128:
   1004 ; X86:       # %bb.0:
   1005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1006 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1007 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1008 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1009 ; X86-NEXT:    vmovdqu64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x01]
   1010 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
   1011 ; X86-NEXT:    retl # encoding: [0xc3]
   1012 ;
   1013 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_128:
   1014 ; X64:       # %bb.0:
   1015 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1016 ; X64-NEXT:    vmovdqu64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x7f,0x07]
   1017 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
   1018 ; X64-NEXT:    retq # encoding: [0xc3]
   1019   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
   1020   call void @llvm.x86.avx512.mask.storeu.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
   1021   ret void
   1022 }
   1023 
   1024 declare void @llvm.x86.avx512.mask.storeu.q.256(i8*, <4 x i64>, i8)
   1025 
   1026 define void@test_int_x86_avx512_mask_storeu_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
   1027 ; X86-LABEL: test_int_x86_avx512_mask_storeu_q_256:
   1028 ; X86:       # %bb.0:
   1029 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1030 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1032 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1033 ; X86-NEXT:    vmovdqu64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x01]
   1034 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
   1035 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1036 ; X86-NEXT:    retl # encoding: [0xc3]
   1037 ;
   1038 ; X64-LABEL: test_int_x86_avx512_mask_storeu_q_256:
   1039 ; X64:       # %bb.0:
   1040 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1041 ; X64-NEXT:    vmovdqu64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x7f,0x07]
   1042 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
   1043 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1044 ; X64-NEXT:    retq # encoding: [0xc3]
   1045   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
   1046   call void @llvm.x86.avx512.mask.storeu.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
   1047   ret void
   1048 }
   1049 
   1050 declare void @llvm.x86.avx512.mask.storeu.d.128(i8*, <4 x i32>, i8)
   1051 
   1052 define void@test_int_x86_avx512_mask_storeu_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
   1053 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_128:
   1054 ; X86:       # %bb.0:
   1055 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1056 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1057 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1058 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1059 ; X86-NEXT:    vmovdqu32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x01]
   1060 ; X86-NEXT:    vmovdqu %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x00]
   1061 ; X86-NEXT:    retl # encoding: [0xc3]
   1062 ;
   1063 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_128:
   1064 ; X64:       # %bb.0:
   1065 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1066 ; X64-NEXT:    vmovdqu32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7f,0x07]
   1067 ; X64-NEXT:    vmovdqu %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7f,0x06]
   1068 ; X64-NEXT:    retq # encoding: [0xc3]
   1069   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
   1070   call void @llvm.x86.avx512.mask.storeu.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
   1071   ret void
   1072 }
   1073 
   1074 declare void @llvm.x86.avx512.mask.storeu.d.256(i8*, <8 x i32>, i8)
   1075 
   1076 define void@test_int_x86_avx512_mask_storeu_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
   1077 ; X86-LABEL: test_int_x86_avx512_mask_storeu_d_256:
   1078 ; X86:       # %bb.0:
   1079 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1080 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1081 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1082 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1083 ; X86-NEXT:    vmovdqu32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x01]
   1084 ; X86-NEXT:    vmovdqu %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x00]
   1085 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1086 ; X86-NEXT:    retl # encoding: [0xc3]
   1087 ;
   1088 ; X64-LABEL: test_int_x86_avx512_mask_storeu_d_256:
   1089 ; X64:       # %bb.0:
   1090 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1091 ; X64-NEXT:    vmovdqu32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7f,0x07]
   1092 ; X64-NEXT:    vmovdqu %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x7f,0x06]
   1093 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1094 ; X64-NEXT:    retq # encoding: [0xc3]
   1095   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
   1096   call void @llvm.x86.avx512.mask.storeu.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
   1097   ret void
   1098 }
   1099 
   1100 declare void @llvm.x86.avx512.mask.store.q.128(i8*, <2 x i64>, i8)
   1101 
   1102 define void@test_int_x86_avx512_mask_store_q_128(i8* %ptr1, i8* %ptr2, <2 x i64> %x1, i8 %x2) {
   1103 ; X86-LABEL: test_int_x86_avx512_mask_store_q_128:
   1104 ; X86:       # %bb.0:
   1105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1106 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1107 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1108 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1109 ; X86-NEXT:    vmovdqa64 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x01]
   1110 ; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
   1111 ; X86-NEXT:    retl # encoding: [0xc3]
   1112 ;
   1113 ; X64-LABEL: test_int_x86_avx512_mask_store_q_128:
   1114 ; X64:       # %bb.0:
   1115 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1116 ; X64-NEXT:    vmovdqa64 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x7f,0x07]
   1117 ; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
   1118 ; X64-NEXT:    retq # encoding: [0xc3]
   1119   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr1, <2 x i64> %x1, i8 %x2)
   1120   call void @llvm.x86.avx512.mask.store.q.128(i8* %ptr2, <2 x i64> %x1, i8 -1)
   1121   ret void
   1122 }
   1123 
   1124 declare void @llvm.x86.avx512.mask.store.q.256(i8*, <4 x i64>, i8)
   1125 
   1126 define void@test_int_x86_avx512_mask_store_q_256(i8* %ptr1, i8* %ptr2, <4 x i64> %x1, i8 %x2) {
   1127 ; X86-LABEL: test_int_x86_avx512_mask_store_q_256:
   1128 ; X86:       # %bb.0:
   1129 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1130 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1131 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1132 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1133 ; X86-NEXT:    vmovdqa64 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x01]
   1134 ; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
   1135 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1136 ; X86-NEXT:    retl # encoding: [0xc3]
   1137 ;
   1138 ; X64-LABEL: test_int_x86_avx512_mask_store_q_256:
   1139 ; X64:       # %bb.0:
   1140 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1141 ; X64-NEXT:    vmovdqa64 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x7f,0x07]
   1142 ; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
   1143 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1144 ; X64-NEXT:    retq # encoding: [0xc3]
   1145   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr1, <4 x i64> %x1, i8 %x2)
   1146   call void @llvm.x86.avx512.mask.store.q.256(i8* %ptr2, <4 x i64> %x1, i8 -1)
   1147   ret void
   1148 }
   1149 
   1150 declare void @llvm.x86.avx512.mask.store.d.128(i8*, <4 x i32>, i8)
   1151 
   1152 define void@test_int_x86_avx512_mask_store_d_128(i8* %ptr1, i8* %ptr2, <4 x i32> %x1, i8 %x2) {
   1153 ; X86-LABEL: test_int_x86_avx512_mask_store_d_128:
   1154 ; X86:       # %bb.0:
   1155 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1156 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1157 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1158 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1159 ; X86-NEXT:    vmovdqa32 %xmm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x01]
   1160 ; X86-NEXT:    vmovdqa %xmm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x00]
   1161 ; X86-NEXT:    retl # encoding: [0xc3]
   1162 ;
   1163 ; X64-LABEL: test_int_x86_avx512_mask_store_d_128:
   1164 ; X64:       # %bb.0:
   1165 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1166 ; X64-NEXT:    vmovdqa32 %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x7f,0x07]
   1167 ; X64-NEXT:    vmovdqa %xmm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x7f,0x06]
   1168 ; X64-NEXT:    retq # encoding: [0xc3]
   1169   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr1, <4 x i32> %x1, i8 %x2)
   1170   call void @llvm.x86.avx512.mask.store.d.128(i8* %ptr2, <4 x i32> %x1, i8 -1)
   1171   ret void
   1172 }
   1173 
   1174 declare void @llvm.x86.avx512.mask.store.d.256(i8*, <8 x i32>, i8)
   1175 
   1176 define void@test_int_x86_avx512_mask_store_d_256(i8* %ptr1, i8* %ptr2, <8 x i32> %x1, i8 %x2) {
   1177 ; X86-LABEL: test_int_x86_avx512_mask_store_d_256:
   1178 ; X86:       # %bb.0:
   1179 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1180 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1181 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1182 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1183 ; X86-NEXT:    vmovdqa32 %ymm0, (%ecx) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x01]
   1184 ; X86-NEXT:    vmovdqa %ymm0, (%eax) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x00]
   1185 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1186 ; X86-NEXT:    retl # encoding: [0xc3]
   1187 ;
   1188 ; X64-LABEL: test_int_x86_avx512_mask_store_d_256:
   1189 ; X64:       # %bb.0:
   1190 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1191 ; X64-NEXT:    vmovdqa32 %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x7f,0x07]
   1192 ; X64-NEXT:    vmovdqa %ymm0, (%rsi) # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x7f,0x06]
   1193 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1194 ; X64-NEXT:    retq # encoding: [0xc3]
   1195   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr1, <8 x i32> %x1, i8 %x2)
   1196   call void @llvm.x86.avx512.mask.store.d.256(i8* %ptr2, <8 x i32> %x1, i8 -1)
   1197   ret void
   1198 }
   1199 
   1200 define <8 x float> @test_mask_load_aligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
   1201 ; X86-LABEL: test_mask_load_aligned_ps_256:
   1202 ; X86:       # %bb.0:
   1203 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1204 ; X86-NEXT:    vmovaps (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x00]
   1205 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1206 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1207 ; X86-NEXT:    vmovaps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x00]
   1208 ; X86-NEXT:    vmovaps (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x08]
   1209 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   1210 ; X86-NEXT:    retl # encoding: [0xc3]
   1211 ;
   1212 ; X64-LABEL: test_mask_load_aligned_ps_256:
   1213 ; X64:       # %bb.0:
   1214 ; X64-NEXT:    vmovaps (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0x07]
   1215 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1216 ; X64-NEXT:    vmovaps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0x07]
   1217 ; X64-NEXT:    vmovaps (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0x0f]
   1218 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   1219 ; X64-NEXT:    retq # encoding: [0xc3]
   1220   %res = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
   1221   %res1 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
   1222   %res2 = call <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
   1223   %res4 = fadd <8 x float> %res2, %res1
   1224   ret <8 x float> %res4
   1225 }
   1226 
   1227 declare <8 x float> @llvm.x86.avx512.mask.load.ps.256(i8*, <8 x float>, i8)
   1228 
   1229 define <8 x float> @test_mask_load_unaligned_ps_256(<8 x float> %data, i8* %ptr, i8 %mask) {
   1230 ; X86-LABEL: test_mask_load_unaligned_ps_256:
   1231 ; X86:       # %bb.0:
   1232 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1233 ; X86-NEXT:    vmovups (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x00]
   1234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1235 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1236 ; X86-NEXT:    vmovups (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x00]
   1237 ; X86-NEXT:    vmovups (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x08]
   1238 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   1239 ; X86-NEXT:    retl # encoding: [0xc3]
   1240 ;
   1241 ; X64-LABEL: test_mask_load_unaligned_ps_256:
   1242 ; X64:       # %bb.0:
   1243 ; X64-NEXT:    vmovups (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x10,0x07]
   1244 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1245 ; X64-NEXT:    vmovups (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x10,0x07]
   1246 ; X64-NEXT:    vmovups (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x10,0x0f]
   1247 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   1248 ; X64-NEXT:    retq # encoding: [0xc3]
   1249   %res = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 -1)
   1250   %res1 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> %res, i8 %mask)
   1251   %res2 = call <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8* %ptr, <8 x float> zeroinitializer, i8 %mask)
   1252   %res4 = fadd <8 x float> %res2, %res1
   1253   ret <8 x float> %res4
   1254 }
   1255 
   1256 declare <8 x float> @llvm.x86.avx512.mask.loadu.ps.256(i8*, <8 x float>, i8)
   1257 
   1258 define <4 x double> @test_mask_load_aligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
   1259 ; X86-LABEL: test_mask_load_aligned_pd_256:
   1260 ; X86:       # %bb.0:
   1261 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1262 ; X86-NEXT:    vmovapd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x00]
   1263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1264 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1265 ; X86-NEXT:    vmovapd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x00]
   1266 ; X86-NEXT:    vmovapd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x08]
   1267 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   1268 ; X86-NEXT:    retl # encoding: [0xc3]
   1269 ;
   1270 ; X64-LABEL: test_mask_load_aligned_pd_256:
   1271 ; X64:       # %bb.0:
   1272 ; X64-NEXT:    vmovapd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0x07]
   1273 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1274 ; X64-NEXT:    vmovapd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0x07]
   1275 ; X64-NEXT:    vmovapd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0x0f]
   1276 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   1277 ; X64-NEXT:    retq # encoding: [0xc3]
   1278   %res = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
   1279   %res1 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
   1280   %res2 = call <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
   1281   %res4 = fadd <4 x double> %res2, %res1
   1282   ret <4 x double> %res4
   1283 }
   1284 
   1285 declare <4 x double> @llvm.x86.avx512.mask.load.pd.256(i8*, <4 x double>, i8)
   1286 
   1287 define <4 x double> @test_mask_load_unaligned_pd_256(<4 x double> %data, i8* %ptr, i8 %mask) {
   1288 ; X86-LABEL: test_mask_load_unaligned_pd_256:
   1289 ; X86:       # %bb.0:
   1290 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1291 ; X86-NEXT:    vmovupd (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x00]
   1292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1293 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1294 ; X86-NEXT:    vmovupd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x00]
   1295 ; X86-NEXT:    vmovupd (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x08]
   1296 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   1297 ; X86-NEXT:    retl # encoding: [0xc3]
   1298 ;
   1299 ; X64-LABEL: test_mask_load_unaligned_pd_256:
   1300 ; X64:       # %bb.0:
   1301 ; X64-NEXT:    vmovupd (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x10,0x07]
   1302 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1303 ; X64-NEXT:    vmovupd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x10,0x07]
   1304 ; X64-NEXT:    vmovupd (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x10,0x0f]
   1305 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   1306 ; X64-NEXT:    retq # encoding: [0xc3]
   1307   %res = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 -1)
   1308   %res1 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> %res, i8 %mask)
   1309   %res2 = call <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8* %ptr, <4 x double> zeroinitializer, i8 %mask)
   1310   %res4 = fadd <4 x double> %res2, %res1
   1311   ret <4 x double> %res4
   1312 }
   1313 
   1314 declare <4 x double> @llvm.x86.avx512.mask.loadu.pd.256(i8*, <4 x double>, i8)
   1315 
   1316 define <4 x float> @test_mask_load_aligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
   1317 ; X86-LABEL: test_mask_load_aligned_ps_128:
   1318 ; X86:       # %bb.0:
   1319 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1320 ; X86-NEXT:    vmovaps (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x00]
   1321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1322 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1323 ; X86-NEXT:    vmovaps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x00]
   1324 ; X86-NEXT:    vmovaps (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x08]
   1325 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   1326 ; X86-NEXT:    retl # encoding: [0xc3]
   1327 ;
   1328 ; X64-LABEL: test_mask_load_aligned_ps_128:
   1329 ; X64:       # %bb.0:
   1330 ; X64-NEXT:    vmovaps (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0x07]
   1331 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1332 ; X64-NEXT:    vmovaps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x28,0x07]
   1333 ; X64-NEXT:    vmovaps (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x28,0x0f]
   1334 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   1335 ; X64-NEXT:    retq # encoding: [0xc3]
   1336   %res = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
   1337   %res1 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
   1338   %res2 = call <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
   1339   %res4 = fadd <4 x float> %res2, %res1
   1340   ret <4 x float> %res4
   1341 }
   1342 
   1343 declare <4 x float> @llvm.x86.avx512.mask.load.ps.128(i8*, <4 x float>, i8)
   1344 
   1345 define <4 x float> @test_mask_load_unaligned_ps_128(<4 x float> %data, i8* %ptr, i8 %mask) {
   1346 ; X86-LABEL: test_mask_load_unaligned_ps_128:
   1347 ; X86:       # %bb.0:
   1348 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1349 ; X86-NEXT:    vmovups (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x00]
   1350 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1351 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1352 ; X86-NEXT:    vmovups (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x00]
   1353 ; X86-NEXT:    vmovups (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x08]
   1354 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   1355 ; X86-NEXT:    retl # encoding: [0xc3]
   1356 ;
   1357 ; X64-LABEL: test_mask_load_unaligned_ps_128:
   1358 ; X64:       # %bb.0:
   1359 ; X64-NEXT:    vmovups (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x10,0x07]
   1360 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1361 ; X64-NEXT:    vmovups (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x10,0x07]
   1362 ; X64-NEXT:    vmovups (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x10,0x0f]
   1363 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   1364 ; X64-NEXT:    retq # encoding: [0xc3]
   1365   %res = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 -1)
   1366   %res1 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> %res, i8 %mask)
   1367   %res2 = call <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8* %ptr, <4 x float> zeroinitializer, i8 %mask)
   1368   %res4 = fadd <4 x float> %res2, %res1
   1369   ret <4 x float> %res4
   1370 }
   1371 
   1372 declare <4 x float> @llvm.x86.avx512.mask.loadu.ps.128(i8*, <4 x float>, i8)
   1373 
   1374 define <2 x double> @test_mask_load_aligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
   1375 ; X86-LABEL: test_mask_load_aligned_pd_128:
   1376 ; X86:       # %bb.0:
   1377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1378 ; X86-NEXT:    vmovapd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x00]
   1379 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1380 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1381 ; X86-NEXT:    vmovapd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x00]
   1382 ; X86-NEXT:    vmovapd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x08]
   1383 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   1384 ; X86-NEXT:    retl # encoding: [0xc3]
   1385 ;
   1386 ; X64-LABEL: test_mask_load_aligned_pd_128:
   1387 ; X64:       # %bb.0:
   1388 ; X64-NEXT:    vmovapd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0x07]
   1389 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1390 ; X64-NEXT:    vmovapd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x28,0x07]
   1391 ; X64-NEXT:    vmovapd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x28,0x0f]
   1392 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   1393 ; X64-NEXT:    retq # encoding: [0xc3]
   1394   %res = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
   1395   %res1 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
   1396   %res2 = call <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
   1397   %res4 = fadd <2 x double> %res2, %res1
   1398   ret <2 x double> %res4
   1399 }
   1400 
   1401 declare <2 x double> @llvm.x86.avx512.mask.load.pd.128(i8*, <2 x double>, i8)
   1402 
   1403 define <2 x double> @test_mask_load_unaligned_pd_128(<2 x double> %data, i8* %ptr, i8 %mask) {
   1404 ; X86-LABEL: test_mask_load_unaligned_pd_128:
   1405 ; X86:       # %bb.0:
   1406 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1407 ; X86-NEXT:    vmovupd (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x00]
   1408 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1409 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1410 ; X86-NEXT:    vmovupd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x00]
   1411 ; X86-NEXT:    vmovupd (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x08]
   1412 ; X86-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   1413 ; X86-NEXT:    retl # encoding: [0xc3]
   1414 ;
   1415 ; X64-LABEL: test_mask_load_unaligned_pd_128:
   1416 ; X64:       # %bb.0:
   1417 ; X64-NEXT:    vmovupd (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x10,0x07]
   1418 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1419 ; X64-NEXT:    vmovupd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x10,0x07]
   1420 ; X64-NEXT:    vmovupd (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x10,0x0f]
   1421 ; X64-NEXT:    vaddpd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc0]
   1422 ; X64-NEXT:    retq # encoding: [0xc3]
   1423   %res = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 -1)
   1424   %res1 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> %res, i8 %mask)
   1425   %res2 = call <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8* %ptr, <2 x double> zeroinitializer, i8 %mask)
   1426   %res4 = fadd <2 x double> %res2, %res1
   1427   ret <2 x double> %res4
   1428 }
   1429 
   1430 declare <2 x double> @llvm.x86.avx512.mask.loadu.pd.128(i8*, <2 x double>, i8)
   1431 
   1432 declare <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8*, <4 x i32>, i8)
   1433 
   1434 define <4 x i32> @test_mask_load_unaligned_d_128(i8* %ptr, i8* %ptr2, <4 x i32> %data, i8 %mask) {
   1435 ; X86-LABEL: test_mask_load_unaligned_d_128:
   1436 ; X86:       # %bb.0:
   1437 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1438 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1439 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
   1440 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1441 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1442 ; X86-NEXT:    vmovdqu32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x00]
   1443 ; X86-NEXT:    vmovdqu32 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x09]
   1444 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1445 ; X86-NEXT:    retl # encoding: [0xc3]
   1446 ;
   1447 ; X64-LABEL: test_mask_load_unaligned_d_128:
   1448 ; X64:       # %bb.0:
   1449 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
   1450 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1451 ; X64-NEXT:    vmovdqu32 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x6f,0x06]
   1452 ; X64-NEXT:    vmovdqu32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0x89,0x6f,0x0f]
   1453 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1454 ; X64-NEXT:    retq # encoding: [0xc3]
   1455   %res = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
   1456   %res1 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr2, <4 x i32> %res, i8 %mask)
   1457   %res2 = call <4 x i32> @llvm.x86.avx512.mask.loadu.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
   1458   %res4 = add <4 x i32> %res2, %res1
   1459   ret <4 x i32> %res4
   1460 }
   1461 
   1462 declare <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8*, <8 x i32>, i8)
   1463 
   1464 define <8 x i32> @test_mask_load_unaligned_d_256(i8* %ptr, i8* %ptr2, <8 x i32> %data, i8 %mask) {
   1465 ; X86-LABEL: test_mask_load_unaligned_d_256:
   1466 ; X86:       # %bb.0:
   1467 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1468 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1469 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
   1470 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1471 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1472 ; X86-NEXT:    vmovdqu32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x00]
   1473 ; X86-NEXT:    vmovdqu32 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x09]
   1474 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1475 ; X86-NEXT:    retl # encoding: [0xc3]
   1476 ;
   1477 ; X64-LABEL: test_mask_load_unaligned_d_256:
   1478 ; X64:       # %bb.0:
   1479 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
   1480 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1481 ; X64-NEXT:    vmovdqu32 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x6f,0x06]
   1482 ; X64-NEXT:    vmovdqu32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7e,0xa9,0x6f,0x0f]
   1483 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1484 ; X64-NEXT:    retq # encoding: [0xc3]
   1485   %res = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
   1486   %res1 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr2, <8 x i32> %res, i8 %mask)
   1487   %res2 = call <8 x i32> @llvm.x86.avx512.mask.loadu.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
   1488   %res4 = add <8 x i32> %res2, %res1
   1489   ret <8 x i32> %res4
   1490 }
   1491 
   1492 declare <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8*, <2 x i64>, i8)
   1493 
   1494 define <2 x i64> @test_mask_load_unaligned_q_128(i8* %ptr, i8* %ptr2, <2 x i64> %data, i8 %mask) {
   1495 ; X86-LABEL: test_mask_load_unaligned_q_128:
   1496 ; X86:       # %bb.0:
   1497 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1498 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1499 ; X86-NEXT:    vmovdqu (%ecx), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x01]
   1500 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1501 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1502 ; X86-NEXT:    vmovdqu64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x00]
   1503 ; X86-NEXT:    vmovdqu64 (%ecx), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x09]
   1504 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   1505 ; X86-NEXT:    retl # encoding: [0xc3]
   1506 ;
   1507 ; X64-LABEL: test_mask_load_unaligned_q_128:
   1508 ; X64:       # %bb.0:
   1509 ; X64-NEXT:    vmovdqu (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x6f,0x07]
   1510 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1511 ; X64-NEXT:    vmovdqu64 (%rsi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x09,0x6f,0x06]
   1512 ; X64-NEXT:    vmovdqu64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0x89,0x6f,0x0f]
   1513 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   1514 ; X64-NEXT:    retq # encoding: [0xc3]
   1515   %res = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
   1516   %res1 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr2, <2 x i64> %res, i8 %mask)
   1517   %res2 = call <2 x i64> @llvm.x86.avx512.mask.loadu.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
   1518   %res4 = add <2 x i64> %res2, %res1
   1519   ret <2 x i64> %res4
   1520 }
   1521 
   1522 declare <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8*, <4 x i64>, i8)
   1523 
   1524 define <4 x i64> @test_mask_load_unaligned_q_256(i8* %ptr, i8* %ptr2, <4 x i64> %data, i8 %mask) {
   1525 ; X86-LABEL: test_mask_load_unaligned_q_256:
   1526 ; X86:       # %bb.0:
   1527 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x08]
   1528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %ecx # encoding: [0x8b,0x4c,0x24,0x04]
   1529 ; X86-NEXT:    vmovdqu (%ecx), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x01]
   1530 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %edx # encoding: [0x0f,0xb6,0x54,0x24,0x0c]
   1531 ; X86-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1532 ; X86-NEXT:    vmovdqu64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x00]
   1533 ; X86-NEXT:    vmovdqu64 (%ecx), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x09]
   1534 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   1535 ; X86-NEXT:    retl # encoding: [0xc3]
   1536 ;
   1537 ; X64-LABEL: test_mask_load_unaligned_q_256:
   1538 ; X64:       # %bb.0:
   1539 ; X64-NEXT:    vmovdqu (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x6f,0x07]
   1540 ; X64-NEXT:    kmovw %edx, %k1 # encoding: [0xc5,0xf8,0x92,0xca]
   1541 ; X64-NEXT:    vmovdqu64 (%rsi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfe,0x29,0x6f,0x06]
   1542 ; X64-NEXT:    vmovdqu64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfe,0xa9,0x6f,0x0f]
   1543 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   1544 ; X64-NEXT:    retq # encoding: [0xc3]
   1545   %res = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
   1546   %res1 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr2, <4 x i64> %res, i8 %mask)
   1547   %res2 = call <4 x i64> @llvm.x86.avx512.mask.loadu.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
   1548   %res4 = add <4 x i64> %res2, %res1
   1549   ret <4 x i64> %res4
   1550 }
   1551 
   1552 declare <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8*, <4 x i32>, i8)
   1553 
   1554 define <4 x i32> @test_mask_load_aligned_d_128(<4 x i32> %data, i8* %ptr, i8 %mask) {
   1555 ; X86-LABEL: test_mask_load_aligned_d_128:
   1556 ; X86:       # %bb.0:
   1557 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1558 ; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
   1559 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1560 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1561 ; X86-NEXT:    vmovdqa32 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x00]
   1562 ; X86-NEXT:    vmovdqa32 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x08]
   1563 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1564 ; X86-NEXT:    retl # encoding: [0xc3]
   1565 ;
   1566 ; X64-LABEL: test_mask_load_aligned_d_128:
   1567 ; X64:       # %bb.0:
   1568 ; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
   1569 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1570 ; X64-NEXT:    vmovdqa32 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6f,0x07]
   1571 ; X64-NEXT:    vmovdqa32 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x6f,0x0f]
   1572 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1573 ; X64-NEXT:    retq # encoding: [0xc3]
   1574   %res = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 -1)
   1575   %res1 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> %res, i8 %mask)
   1576   %res2 = call <4 x i32> @llvm.x86.avx512.mask.load.d.128(i8* %ptr, <4 x i32> zeroinitializer, i8 %mask)
   1577   %res4 = add <4 x i32> %res2, %res1
   1578   ret <4 x i32> %res4
   1579 }
   1580 
   1581 declare <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8*, <8 x i32>, i8)
   1582 
   1583 define <8 x i32> @test_mask_load_aligned_d_256(<8 x i32> %data, i8* %ptr, i8 %mask) {
   1584 ; X86-LABEL: test_mask_load_aligned_d_256:
   1585 ; X86:       # %bb.0:
   1586 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1587 ; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
   1588 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1589 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1590 ; X86-NEXT:    vmovdqa32 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x00]
   1591 ; X86-NEXT:    vmovdqa32 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x08]
   1592 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1593 ; X86-NEXT:    retl # encoding: [0xc3]
   1594 ;
   1595 ; X64-LABEL: test_mask_load_aligned_d_256:
   1596 ; X64:       # %bb.0:
   1597 ; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
   1598 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1599 ; X64-NEXT:    vmovdqa32 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0x07]
   1600 ; X64-NEXT:    vmovdqa32 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x6f,0x0f]
   1601 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1602 ; X64-NEXT:    retq # encoding: [0xc3]
   1603   %res = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 -1)
   1604   %res1 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> %res, i8 %mask)
   1605   %res2 = call <8 x i32> @llvm.x86.avx512.mask.load.d.256(i8* %ptr, <8 x i32> zeroinitializer, i8 %mask)
   1606   %res4 = add <8 x i32> %res2, %res1
   1607   ret <8 x i32> %res4
   1608 }
   1609 
   1610 declare <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8*, <2 x i64>, i8)
   1611 
   1612 define <2 x i64> @test_mask_load_aligned_q_128(<2 x i64> %data, i8* %ptr, i8 %mask) {
   1613 ; X86-LABEL: test_mask_load_aligned_q_128:
   1614 ; X86:       # %bb.0:
   1615 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1616 ; X86-NEXT:    vmovdqa (%eax), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x00]
   1617 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1618 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1619 ; X86-NEXT:    vmovdqa64 (%eax), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x00]
   1620 ; X86-NEXT:    vmovdqa64 (%eax), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x08]
   1621 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   1622 ; X86-NEXT:    retl # encoding: [0xc3]
   1623 ;
   1624 ; X64-LABEL: test_mask_load_aligned_q_128:
   1625 ; X64:       # %bb.0:
   1626 ; X64-NEXT:    vmovdqa (%rdi), %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0x07]
   1627 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1628 ; X64-NEXT:    vmovdqa64 (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6f,0x07]
   1629 ; X64-NEXT:    vmovdqa64 (%rdi), %xmm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x6f,0x0f]
   1630 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   1631 ; X64-NEXT:    retq # encoding: [0xc3]
   1632   %res = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 -1)
   1633   %res1 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> %res, i8 %mask)
   1634   %res2 = call <2 x i64> @llvm.x86.avx512.mask.load.q.128(i8* %ptr, <2 x i64> zeroinitializer, i8 %mask)
   1635   %res4 = add <2 x i64> %res2, %res1
   1636   ret <2 x i64> %res4
   1637 }
   1638 
   1639 declare <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8*, <4 x i64>, i8)
   1640 
   1641 define <4 x i64> @test_mask_load_aligned_q_256(<4 x i64> %data, i8* %ptr, i8 %mask) {
   1642 ; X86-LABEL: test_mask_load_aligned_q_256:
   1643 ; X86:       # %bb.0:
   1644 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1645 ; X86-NEXT:    vmovdqa (%eax), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x00]
   1646 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   1647 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   1648 ; X86-NEXT:    vmovdqa64 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x00]
   1649 ; X86-NEXT:    vmovdqa64 (%eax), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x08]
   1650 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   1651 ; X86-NEXT:    retl # encoding: [0xc3]
   1652 ;
   1653 ; X64-LABEL: test_mask_load_aligned_q_256:
   1654 ; X64:       # %bb.0:
   1655 ; X64-NEXT:    vmovdqa (%rdi), %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0x07]
   1656 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1657 ; X64-NEXT:    vmovdqa64 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0x07]
   1658 ; X64-NEXT:    vmovdqa64 (%rdi), %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x6f,0x0f]
   1659 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   1660 ; X64-NEXT:    retq # encoding: [0xc3]
   1661   %res = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 -1)
   1662   %res1 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> %res, i8 %mask)
   1663   %res2 = call <4 x i64> @llvm.x86.avx512.mask.load.q.256(i8* %ptr, <4 x i64> zeroinitializer, i8 %mask)
   1664   %res4 = add <4 x i64> %res2, %res1
   1665   ret <4 x i64> %res4
   1666 }
   1667 
   1668 declare <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32>, i32, <4 x i32>, i8)
   1669 
   1670 define <4 x i32>@test_int_x86_avx512_mask_pshuf_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
   1671 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
   1672 ; X86:       # %bb.0:
   1673 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
   1674 ; X86-NEXT:    # xmm2 = xmm0[3,0,0,0]
   1675 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   1676 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1677 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
   1678 ; X86-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
   1679 ; X86-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
   1680 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
   1681 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   1682 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1683 ; X86-NEXT:    retl # encoding: [0xc3]
   1684 ;
   1685 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_128:
   1686 ; X64:       # %bb.0:
   1687 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x70,0xd0,0x03]
   1688 ; X64-NEXT:    # xmm2 = xmm0[3,0,0,0]
   1689 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1690 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x70,0xc8,0x03]
   1691 ; X64-NEXT:    # xmm1 {%k1} = xmm0[3,0,0,0]
   1692 ; X64-NEXT:    vpshufd $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x70,0xc0,0x03]
   1693 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[3,0,0,0]
   1694 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   1695 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   1696 ; X64-NEXT:    retq # encoding: [0xc3]
   1697 	%res = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
   1698 	%res1 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
   1699 	%res2 = call <4 x i32> @llvm.x86.avx512.mask.pshuf.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
   1700 	%res3 = add <4 x i32> %res, %res1
   1701 	%res4 = add <4 x i32> %res3, %res2
   1702 	ret <4 x i32> %res4
   1703 }
   1704 
   1705 declare <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32>, i32, <8 x i32>, i8)
   1706 
   1707 define <8 x i32>@test_int_x86_avx512_mask_pshuf_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
   1708 ; X86-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
   1709 ; X86:       # %bb.0:
   1710 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
   1711 ; X86-NEXT:    # ymm2 = ymm0[3,0,0,0,7,4,4,4]
   1712 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   1713 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1714 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
   1715 ; X86-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
   1716 ; X86-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
   1717 ; X86-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
   1718 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   1719 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1720 ; X86-NEXT:    retl # encoding: [0xc3]
   1721 ;
   1722 ; X64-LABEL: test_int_x86_avx512_mask_pshuf_d_256:
   1723 ; X64:       # %bb.0:
   1724 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x70,0xd0,0x03]
   1725 ; X64-NEXT:    # ymm2 = ymm0[3,0,0,0,7,4,4,4]
   1726 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   1727 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x70,0xc8,0x03]
   1728 ; X64-NEXT:    # ymm1 {%k1} = ymm0[3,0,0,0,7,4,4,4]
   1729 ; X64-NEXT:    vpshufd $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x70,0xc0,0x03]
   1730 ; X64-NEXT:    # ymm0 {%k1} {z} = ymm0[3,0,0,0,7,4,4,4]
   1731 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   1732 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   1733 ; X64-NEXT:    retq # encoding: [0xc3]
   1734 	%res = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
   1735 	%res1 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
   1736 	%res2 = call <8 x i32> @llvm.x86.avx512.mask.pshuf.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
   1737 	%res3 = add <8 x i32> %res, %res1
   1738 	%res4 = add <8 x i32> %res3, %res2
   1739 	ret <8 x i32> %res4
   1740 }
   1741 
   1742 define i8 @test_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b) {
   1743 ; CHECK-LABEL: test_pcmpeq_d_256:
   1744 ; CHECK:       # %bb.0:
   1745 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
   1746 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1747 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1748 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1749 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1750   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
   1751   ret i8 %res
   1752 }
   1753 
   1754 define i8 @test_mask_pcmpeq_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   1755 ; X86-LABEL: test_mask_pcmpeq_d_256:
   1756 ; X86:       # %bb.0:
   1757 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
   1758 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1759 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
   1760 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1761 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1762 ; X86-NEXT:    retl # encoding: [0xc3]
   1763 ;
   1764 ; X64-LABEL: test_mask_pcmpeq_d_256:
   1765 ; X64:       # %bb.0:
   1766 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
   1767 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1768 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
   1769 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1770 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1771 ; X64-NEXT:    retq # encoding: [0xc3]
   1772   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
   1773   ret i8 %res
   1774 }
   1775 
   1776 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.256(<8 x i32>, <8 x i32>, i8)
   1777 
   1778 define i8 @test_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b) {
   1779 ; CHECK-LABEL: test_pcmpeq_q_256:
   1780 ; CHECK:       # %bb.0:
   1781 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
   1782 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1783 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1784 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1785 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1786   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
   1787   ret i8 %res
   1788 }
   1789 
   1790 define i8 @test_mask_pcmpeq_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   1791 ; X86-LABEL: test_mask_pcmpeq_q_256:
   1792 ; X86:       # %bb.0:
   1793 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1794 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1795 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
   1796 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1797 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1798 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1799 ; X86-NEXT:    retl # encoding: [0xc3]
   1800 ;
   1801 ; X64-LABEL: test_mask_pcmpeq_q_256:
   1802 ; X64:       # %bb.0:
   1803 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1804 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x29,0xc1]
   1805 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1806 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1807 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1808 ; X64-NEXT:    retq # encoding: [0xc3]
   1809   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
   1810   ret i8 %res
   1811 }
   1812 
   1813 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.256(<4 x i64>, <4 x i64>, i8)
   1814 
   1815 define i8 @test_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b) {
   1816 ; CHECK-LABEL: test_pcmpgt_d_256:
   1817 ; CHECK:       # %bb.0:
   1818 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
   1819 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1820 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1821 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1823   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 -1)
   1824   ret i8 %res
   1825 }
   1826 
   1827 define i8 @test_mask_pcmpgt_d_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   1828 ; X86-LABEL: test_mask_pcmpgt_d_256:
   1829 ; X86:       # %bb.0:
   1830 ; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
   1831 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1832 ; X86-NEXT:    andb {{[0-9]+}}(%esp), %al # encoding: [0x22,0x44,0x24,0x04]
   1833 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1834 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1835 ; X86-NEXT:    retl # encoding: [0xc3]
   1836 ;
   1837 ; X64-LABEL: test_mask_pcmpgt_d_256:
   1838 ; X64:       # %bb.0:
   1839 ; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xc1]
   1840 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1841 ; X64-NEXT:    andb %dil, %al # encoding: [0x40,0x20,0xf8]
   1842 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1843 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1844 ; X64-NEXT:    retq # encoding: [0xc3]
   1845   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32> %a, <8 x i32> %b, i8 %mask)
   1846   ret i8 %res
   1847 }
   1848 
   1849 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.256(<8 x i32>, <8 x i32>, i8)
   1850 
   1851 define i8 @test_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b) {
   1852 ; CHECK-LABEL: test_pcmpgt_q_256:
   1853 ; CHECK:       # %bb.0:
   1854 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xc1]
   1855 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1856 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1857 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1858 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1859   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 -1)
   1860   ret i8 %res
   1861 }
   1862 
   1863 define i8 @test_mask_pcmpgt_q_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   1864 ; X86-LABEL: test_mask_pcmpgt_q_256:
   1865 ; X86:       # %bb.0:
   1866 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1867 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1868 ; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
   1869 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1870 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1871 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1872 ; X86-NEXT:    retl # encoding: [0xc3]
   1873 ;
   1874 ; X64-LABEL: test_mask_pcmpgt_q_256:
   1875 ; X64:       # %bb.0:
   1876 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1877 ; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x37,0xc1]
   1878 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1879 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1880 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1881 ; X64-NEXT:    retq # encoding: [0xc3]
   1882   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64> %a, <4 x i64> %b, i8 %mask)
   1883   ret i8 %res
   1884 }
   1885 
   1886 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.256(<4 x i64>, <4 x i64>, i8)
   1887 
   1888 define i8 @test_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b) {
   1889 ; CHECK-LABEL: test_pcmpeq_d_128:
   1890 ; CHECK:       # %bb.0:
   1891 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
   1892 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1893 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1894 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1895   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
   1896   ret i8 %res
   1897 }
   1898 
   1899 define i8 @test_mask_pcmpeq_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1900 ; X86-LABEL: test_mask_pcmpeq_d_128:
   1901 ; X86:       # %bb.0:
   1902 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1903 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1904 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
   1905 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1906 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1907 ; X86-NEXT:    retl # encoding: [0xc3]
   1908 ;
   1909 ; X64-LABEL: test_mask_pcmpeq_d_128:
   1910 ; X64:       # %bb.0:
   1911 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1912 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x76,0xc1]
   1913 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1914 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1915 ; X64-NEXT:    retq # encoding: [0xc3]
   1916   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
   1917   ret i8 %res
   1918 }
   1919 
   1920 declare i8 @llvm.x86.avx512.mask.pcmpeq.d.128(<4 x i32>, <4 x i32>, i8)
   1921 
   1922 define i8 @test_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b) {
   1923 ; CHECK-LABEL: test_pcmpeq_q_128:
   1924 ; CHECK:       # %bb.0:
   1925 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
   1926 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1927 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1928 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1929   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
   1930   ret i8 %res
   1931 }
   1932 
   1933 define i8 @test_mask_pcmpeq_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   1934 ; X86-LABEL: test_mask_pcmpeq_q_128:
   1935 ; X86:       # %bb.0:
   1936 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1937 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1938 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
   1939 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1940 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1941 ; X86-NEXT:    retl # encoding: [0xc3]
   1942 ;
   1943 ; X64-LABEL: test_mask_pcmpeq_q_128:
   1944 ; X64:       # %bb.0:
   1945 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1946 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x29,0xc1]
   1947 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1948 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1949 ; X64-NEXT:    retq # encoding: [0xc3]
   1950   %res = call i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
   1951   ret i8 %res
   1952 }
   1953 
   1954 declare i8 @llvm.x86.avx512.mask.pcmpeq.q.128(<2 x i64>, <2 x i64>, i8)
   1955 
   1956 define i8 @test_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b) {
   1957 ; CHECK-LABEL: test_pcmpgt_d_128:
   1958 ; CHECK:       # %bb.0:
   1959 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xc1]
   1960 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1961 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1962 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1963   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 -1)
   1964   ret i8 %res
   1965 }
   1966 
   1967 define i8 @test_mask_pcmpgt_d_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   1968 ; X86-LABEL: test_mask_pcmpgt_d_128:
   1969 ; X86:       # %bb.0:
   1970 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   1971 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   1972 ; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
   1973 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1974 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   1975 ; X86-NEXT:    retl # encoding: [0xc3]
   1976 ;
   1977 ; X64-LABEL: test_mask_pcmpgt_d_128:
   1978 ; X64:       # %bb.0:
   1979 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   1980 ; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x66,0xc1]
   1981 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1982 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   1983 ; X64-NEXT:    retq # encoding: [0xc3]
   1984   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32> %a, <4 x i32> %b, i8 %mask)
   1985   ret i8 %res
   1986 }
   1987 
   1988 declare i8 @llvm.x86.avx512.mask.pcmpgt.d.128(<4 x i32>, <4 x i32>, i8)
   1989 
   1990 define i8 @test_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b) {
   1991 ; CHECK-LABEL: test_pcmpgt_q_128:
   1992 ; CHECK:       # %bb.0:
   1993 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xc1]
   1994 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   1995 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   1996 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1997   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 -1)
   1998   ret i8 %res
   1999 }
   2000 
   2001 define i8 @test_mask_pcmpgt_q_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   2002 ; X86-LABEL: test_mask_pcmpgt_q_128:
   2003 ; X86:       # %bb.0:
   2004 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2005 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2006 ; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
   2007 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2008 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   2009 ; X86-NEXT:    retl # encoding: [0xc3]
   2010 ;
   2011 ; X64-LABEL: test_mask_pcmpgt_q_128:
   2012 ; X64:       # %bb.0:
   2013 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2014 ; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x37,0xc1]
   2015 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   2016 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   2017 ; X64-NEXT:    retq # encoding: [0xc3]
   2018   %res = call i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64> %a, <2 x i64> %b, i8 %mask)
   2019   ret i8 %res
   2020 }
   2021 
   2022 declare i8 @llvm.x86.avx512.mask.pcmpgt.q.128(<2 x i64>, <2 x i64>, i8)
   2023 
   2024 declare <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   2025 
   2026 define <2 x double>@test_int_x86_avx512_mask_unpckh_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   2027 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
   2028 ; X86:       # %bb.0:
   2029 ; X86-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
   2030 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[1]
   2031 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2032 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2033 ; X86-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
   2034 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
   2035 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
   2036 ; X86-NEXT:    retl # encoding: [0xc3]
   2037 ;
   2038 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_128:
   2039 ; X64:       # %bb.0:
   2040 ; X64-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x15,0xd9]
   2041 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[1]
   2042 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2043 ; X64-NEXT:    vunpckhpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x15,0xd1]
   2044 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
   2045 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
   2046 ; X64-NEXT:    retq # encoding: [0xc3]
   2047   %res = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   2048   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckh.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   2049   %res2 = fadd <2 x double> %res, %res1
   2050   ret <2 x double> %res2
   2051 }
   2052 
   2053 declare <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   2054 
   2055 define <4 x double>@test_int_x86_avx512_mask_unpckh_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   2056 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
   2057 ; X86:       # %bb.0:
   2058 ; X86-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
   2059 ; X86-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2060 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2061 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2062 ; X86-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
   2063 ; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2064 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   2065 ; X86-NEXT:    retl # encoding: [0xc3]
   2066 ;
   2067 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_pd_256:
   2068 ; X64:       # %bb.0:
   2069 ; X64-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x15,0xd9]
   2070 ; X64-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2071 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2072 ; X64-NEXT:    vunpckhpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x15,0xd1]
   2073 ; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2074 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   2075 ; X64-NEXT:    retq # encoding: [0xc3]
   2076   %res = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   2077   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckh.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   2078   %res2 = fadd <4 x double> %res, %res1
   2079   ret <4 x double> %res2
   2080 }
   2081 
   2082 declare <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   2083 
   2084 define <4 x float>@test_int_x86_avx512_mask_unpckh_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   2085 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
   2086 ; X86:       # %bb.0:
   2087 ; X86-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
   2088 ; X86-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2089 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2090 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2091 ; X86-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
   2092 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2093 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   2094 ; X86-NEXT:    retl # encoding: [0xc3]
   2095 ;
   2096 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_128:
   2097 ; X64:       # %bb.0:
   2098 ; X64-NEXT:    vunpckhps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x15,0xd9]
   2099 ; X64-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2100 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2101 ; X64-NEXT:    vunpckhps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x15,0xd1]
   2102 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2103 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   2104 ; X64-NEXT:    retq # encoding: [0xc3]
   2105   %res = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   2106   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckh.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   2107   %res2 = fadd <4 x float> %res, %res1
   2108   ret <4 x float> %res2
   2109 }
   2110 
   2111 declare <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   2112 
   2113 define <8 x float>@test_int_x86_avx512_mask_unpckh_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   2114 ; X86-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
   2115 ; X86:       # %bb.0:
   2116 ; X86-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
   2117 ; X86-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2118 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2119 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2120 ; X86-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
   2121 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2122 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   2123 ; X86-NEXT:    retl # encoding: [0xc3]
   2124 ;
   2125 ; X64-LABEL: test_int_x86_avx512_mask_unpckh_ps_256:
   2126 ; X64:       # %bb.0:
   2127 ; X64-NEXT:    vunpckhps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x15,0xd9]
   2128 ; X64-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2129 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2130 ; X64-NEXT:    vunpckhps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x15,0xd1]
   2131 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2132 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   2133 ; X64-NEXT:    retq # encoding: [0xc3]
   2134   %res = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   2135   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckh.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   2136   %res2 = fadd <8 x float> %res, %res1
   2137   ret <8 x float> %res2
   2138 }
   2139 
   2140 declare <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   2141 
   2142 define <2 x double>@test_int_x86_avx512_mask_unpckl_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   2143 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
   2144 ; X86:       # %bb.0:
   2145 ; X86-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
   2146 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0]
   2147 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2148 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2149 ; X86-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
   2150 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
   2151 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
   2152 ; X86-NEXT:    retl # encoding: [0xc3]
   2153 ;
   2154 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_128:
   2155 ; X64:       # %bb.0:
   2156 ; X64-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x14,0xd9]
   2157 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0]
   2158 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2159 ; X64-NEXT:    vunpcklpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x14,0xd1]
   2160 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
   2161 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc3]
   2162 ; X64-NEXT:    retq # encoding: [0xc3]
   2163   %res = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   2164   %res1 = call <2 x double> @llvm.x86.avx512.mask.unpckl.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 -1)
   2165   %res2 = fadd <2 x double> %res, %res1
   2166   ret <2 x double> %res2
   2167 }
   2168 
   2169 declare <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   2170 
   2171 define <4 x double>@test_int_x86_avx512_mask_unpckl_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   2172 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
   2173 ; X86:       # %bb.0:
   2174 ; X86-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
   2175 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2176 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2177 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2178 ; X86-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
   2179 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2180 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   2181 ; X86-NEXT:    retl # encoding: [0xc3]
   2182 ;
   2183 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_pd_256:
   2184 ; X64:       # %bb.0:
   2185 ; X64-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x14,0xd9]
   2186 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2187 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2188 ; X64-NEXT:    vunpcklpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x14,0xd1]
   2189 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2190 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   2191 ; X64-NEXT:    retq # encoding: [0xc3]
   2192   %res = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   2193   %res1 = call <4 x double> @llvm.x86.avx512.mask.unpckl.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 -1)
   2194   %res2 = fadd <4 x double> %res, %res1
   2195   ret <4 x double> %res2
   2196 }
   2197 
   2198 declare <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   2199 
   2200 define <4 x float>@test_int_x86_avx512_mask_unpckl_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   2201 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
   2202 ; X86:       # %bb.0:
   2203 ; X86-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
   2204 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2205 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2206 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2207 ; X86-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
   2208 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2209 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   2210 ; X86-NEXT:    retl # encoding: [0xc3]
   2211 ;
   2212 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_128:
   2213 ; X64:       # %bb.0:
   2214 ; X64-NEXT:    vunpcklps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x14,0xd9]
   2215 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2216 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2217 ; X64-NEXT:    vunpcklps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x14,0xd1]
   2218 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2219 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   2220 ; X64-NEXT:    retq # encoding: [0xc3]
   2221   %res = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   2222   %res1 = call <4 x float> @llvm.x86.avx512.mask.unpckl.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 -1)
   2223   %res2 = fadd <4 x float> %res, %res1
   2224   ret <4 x float> %res2
   2225 }
   2226 
   2227 declare <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   2228 
   2229 define <8 x float>@test_int_x86_avx512_mask_unpckl_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   2230 ; X86-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
   2231 ; X86:       # %bb.0:
   2232 ; X86-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
   2233 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2235 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2236 ; X86-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
   2237 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2238 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   2239 ; X86-NEXT:    retl # encoding: [0xc3]
   2240 ;
   2241 ; X64-LABEL: test_int_x86_avx512_mask_unpckl_ps_256:
   2242 ; X64:       # %bb.0:
   2243 ; X64-NEXT:    vunpcklps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x14,0xd9]
   2244 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2245 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2246 ; X64-NEXT:    vunpcklps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x14,0xd1]
   2247 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2248 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   2249 ; X64-NEXT:    retq # encoding: [0xc3]
   2250   %res = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   2251   %res1 = call <8 x float> @llvm.x86.avx512.mask.unpckl.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 -1)
   2252   %res2 = fadd <8 x float> %res, %res1
   2253   ret <8 x float> %res2
   2254 }
   2255 
   2256 declare <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2257 
   2258 define <4 x i32>@test_int_x86_avx512_mask_punpckhd_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   2259 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
   2260 ; X86:       # %bb.0:
   2261 ; X86-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
   2262 ; X86-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2264 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2265 ; X86-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
   2266 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2267 ; X86-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   2268 ; X86-NEXT:    retl # encoding: [0xc3]
   2269 ;
   2270 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_128:
   2271 ; X64:       # %bb.0:
   2272 ; X64-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6a,0xd9]
   2273 ; X64-NEXT:    # xmm3 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2274 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2275 ; X64-NEXT:    vpunpckhdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x6a,0xd1]
   2276 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2277 ; X64-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   2278 ; X64-NEXT:    retq # encoding: [0xc3]
   2279   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   2280   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckhd.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   2281   %res2 = add <4 x i32> %res, %res1
   2282   ret <4 x i32> %res2
   2283 }
   2284 
   2285 declare <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2286 
   2287 define <4 x i32>@test_int_x86_avx512_mask_punpckld_q_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   2288 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
   2289 ; X86:       # %bb.0:
   2290 ; X86-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
   2291 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2292 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2293 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2294 ; X86-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
   2295 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2296 ; X86-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   2297 ; X86-NEXT:    retl # encoding: [0xc3]
   2298 ;
   2299 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_128:
   2300 ; X64:       # %bb.0:
   2301 ; X64-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x62,0xd9]
   2302 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2303 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2304 ; X64-NEXT:    vpunpckldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0x62,0xd1]
   2305 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   2306 ; X64-NEXT:    vpaddd %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc3]
   2307 ; X64-NEXT:    retq # encoding: [0xc3]
   2308   %res = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   2309   %res1 = call <4 x i32> @llvm.x86.avx512.mask.punpckld.q.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   2310   %res2 = add <4 x i32> %res, %res1
   2311   ret <4 x i32> %res2
   2312 }
   2313 
   2314 declare <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   2315 
   2316 define <8 x i32>@test_int_x86_avx512_mask_punpckhd_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   2317 ; X86-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
   2318 ; X86:       # %bb.0:
   2319 ; X86-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
   2320 ; X86-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2321 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2322 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2323 ; X86-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
   2324 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2325 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   2326 ; X86-NEXT:    retl # encoding: [0xc3]
   2327 ;
   2328 ; X64-LABEL: test_int_x86_avx512_mask_punpckhd_q_256:
   2329 ; X64:       # %bb.0:
   2330 ; X64-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6a,0xd9]
   2331 ; X64-NEXT:    # ymm3 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2332 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2333 ; X64-NEXT:    vpunpckhdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6a,0xd1]
   2334 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   2335 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   2336 ; X64-NEXT:    retq # encoding: [0xc3]
   2337   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   2338   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckhd.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   2339   %res2 = add <8 x i32> %res, %res1
   2340   ret <8 x i32> %res2
   2341 }
   2342 
   2343 declare <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   2344 
   2345 define <8 x i32>@test_int_x86_avx512_mask_punpckld_q_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   2346 ; X86-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
   2347 ; X86:       # %bb.0:
   2348 ; X86-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
   2349 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2350 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2351 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2352 ; X86-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
   2353 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2354 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   2355 ; X86-NEXT:    retl # encoding: [0xc3]
   2356 ;
   2357 ; X64-LABEL: test_int_x86_avx512_mask_punpckld_q_256:
   2358 ; X64:       # %bb.0:
   2359 ; X64-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x62,0xd9]
   2360 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2361 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2362 ; X64-NEXT:    vpunpckldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x62,0xd1]
   2363 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   2364 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   2365 ; X64-NEXT:    retq # encoding: [0xc3]
   2366   %res = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   2367   %res1 = call <8 x i32> @llvm.x86.avx512.mask.punpckld.q.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   2368   %res2 = add <8 x i32> %res, %res1
   2369   ret <8 x i32> %res2
   2370 }
   2371 
   2372 declare <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   2373 
   2374 define <2 x i64>@test_int_x86_avx512_mask_punpckhqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   2375 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
   2376 ; X86:       # %bb.0:
   2377 ; X86-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
   2378 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[1]
   2379 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2380 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2381 ; X86-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
   2382 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
   2383 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   2384 ; X86-NEXT:    retl # encoding: [0xc3]
   2385 ;
   2386 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_128:
   2387 ; X64:       # %bb.0:
   2388 ; X64-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6d,0xd9]
   2389 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[1]
   2390 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2391 ; X64-NEXT:    vpunpckhqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6d,0xd1]
   2392 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[1]
   2393 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   2394 ; X64-NEXT:    retq # encoding: [0xc3]
   2395   %res = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   2396   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpckhqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   2397   %res2 = add <2 x i64> %res, %res1
   2398   ret <2 x i64> %res2
   2399 }
   2400 
   2401 declare <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   2402 
   2403 define <2 x i64>@test_int_x86_avx512_mask_punpcklqd_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   2404 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
   2405 ; X86:       # %bb.0:
   2406 ; X86-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
   2407 ; X86-NEXT:    # xmm3 = xmm0[0],xmm1[0]
   2408 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2409 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2410 ; X86-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
   2411 ; X86-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
   2412 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   2413 ; X86-NEXT:    retl # encoding: [0xc3]
   2414 ;
   2415 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_128:
   2416 ; X64:       # %bb.0:
   2417 ; X64-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6c,0xd9]
   2418 ; X64-NEXT:    # xmm3 = xmm0[0],xmm1[0]
   2419 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2420 ; X64-NEXT:    vpunpcklqdq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0x6c,0xd1]
   2421 ; X64-NEXT:    # xmm2 {%k1} = xmm0[0],xmm1[0]
   2422 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   2423 ; X64-NEXT:    retq # encoding: [0xc3]
   2424   %res = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   2425   %res1 = call <2 x i64> @llvm.x86.avx512.mask.punpcklqd.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   2426   %res2 = add <2 x i64> %res, %res1
   2427   ret <2 x i64> %res2
   2428 }
   2429 
   2430 declare <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   2431 
   2432 define <4 x i64>@test_int_x86_avx512_mask_punpcklqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   2433 ; X86-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
   2434 ; X86:       # %bb.0:
   2435 ; X86-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
   2436 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2437 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2438 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2439 ; X86-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
   2440 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2441 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   2442 ; X86-NEXT:    retl # encoding: [0xc3]
   2443 ;
   2444 ; X64-LABEL: test_int_x86_avx512_mask_punpcklqd_q_256:
   2445 ; X64:       # %bb.0:
   2446 ; X64-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6c,0xd9]
   2447 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2448 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2449 ; X64-NEXT:    vpunpcklqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6c,0xd1]
   2450 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   2451 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   2452 ; X64-NEXT:    retq # encoding: [0xc3]
   2453   %res = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   2454   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpcklqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   2455   %res2 = add <4 x i64> %res, %res1
   2456   ret <4 x i64> %res2
   2457 }
   2458 
   2459 declare <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   2460 
   2461 define <4 x i64>@test_int_x86_avx512_mask_punpckhqd_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   2462 ; X86-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
   2463 ; X86:       # %bb.0:
   2464 ; X86-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
   2465 ; X86-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2466 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2467 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2468 ; X86-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
   2469 ; X86-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2470 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   2471 ; X86-NEXT:    retl # encoding: [0xc3]
   2472 ;
   2473 ; X64-LABEL: test_int_x86_avx512_mask_punpckhqd_q_256:
   2474 ; X64:       # %bb.0:
   2475 ; X64-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6d,0xd9]
   2476 ; X64-NEXT:    # ymm3 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2477 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2478 ; X64-NEXT:    vpunpckhqdq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6d,0xd1]
   2479 ; X64-NEXT:    # ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   2480 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   2481 ; X64-NEXT:    retq # encoding: [0xc3]
   2482   %res = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   2483   %res1 = call <4 x i64> @llvm.x86.avx512.mask.punpckhqd.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   2484   %res2 = add <4 x i64> %res, %res1
   2485   ret <4 x i64> %res2
   2486 }
   2487 
   2488 define <4 x i32> @test_mask_and_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   2489 ; CHECK-LABEL: test_mask_and_epi32_rr_128:
   2490 ; CHECK:       # %bb.0:
   2491 ; CHECK-NEXT:    vandps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0xc1]
   2492 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2493   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2494   ret <4 x i32> %res
   2495 }
   2496 
   2497 define <4 x i32> @test_mask_and_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   2498 ; X86-LABEL: test_mask_and_epi32_rrk_128:
   2499 ; X86:       # %bb.0:
   2500 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2501 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2502 ; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
   2503 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2504 ; X86-NEXT:    retl # encoding: [0xc3]
   2505 ;
   2506 ; X64-LABEL: test_mask_and_epi32_rrk_128:
   2507 ; X64:       # %bb.0:
   2508 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2509 ; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0xd1]
   2510 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2511 ; X64-NEXT:    retq # encoding: [0xc3]
   2512   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2513   ret <4 x i32> %res
   2514 }
   2515 
   2516 define <4 x i32> @test_mask_and_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   2517 ; X86-LABEL: test_mask_and_epi32_rrkz_128:
   2518 ; X86:       # %bb.0:
   2519 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2520 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2521 ; X86-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
   2522 ; X86-NEXT:    retl # encoding: [0xc3]
   2523 ;
   2524 ; X64-LABEL: test_mask_and_epi32_rrkz_128:
   2525 ; X64:       # %bb.0:
   2526 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2527 ; X64-NEXT:    vpandd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0xc1]
   2528 ; X64-NEXT:    retq # encoding: [0xc3]
   2529   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2530   ret <4 x i32> %res
   2531 }
   2532 
   2533 define <4 x i32> @test_mask_and_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   2534 ; X86-LABEL: test_mask_and_epi32_rm_128:
   2535 ; X86:       # %bb.0:
   2536 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2537 ; X86-NEXT:    vandps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x00]
   2538 ; X86-NEXT:    retl # encoding: [0xc3]
   2539 ;
   2540 ; X64-LABEL: test_mask_and_epi32_rm_128:
   2541 ; X64:       # %bb.0:
   2542 ; X64-NEXT:    vandps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x54,0x07]
   2543 ; X64-NEXT:    retq # encoding: [0xc3]
   2544   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2545   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2546   ret <4 x i32> %res
   2547 }
   2548 
   2549 define <4 x i32> @test_mask_and_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2550 ; X86-LABEL: test_mask_and_epi32_rmk_128:
   2551 ; X86:       # %bb.0:
   2552 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2554 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2555 ; X86-NEXT:    vpandd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x08]
   2556 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2557 ; X86-NEXT:    retl # encoding: [0xc3]
   2558 ;
   2559 ; X64-LABEL: test_mask_and_epi32_rmk_128:
   2560 ; X64:       # %bb.0:
   2561 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2562 ; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdb,0x0f]
   2563 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2564 ; X64-NEXT:    retq # encoding: [0xc3]
   2565   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2566   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2567   ret <4 x i32> %res
   2568 }
   2569 
   2570 define <4 x i32> @test_mask_and_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   2571 ; X86-LABEL: test_mask_and_epi32_rmkz_128:
   2572 ; X86:       # %bb.0:
   2573 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2574 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2575 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2576 ; X86-NEXT:    vpandd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x00]
   2577 ; X86-NEXT:    retl # encoding: [0xc3]
   2578 ;
   2579 ; X64-LABEL: test_mask_and_epi32_rmkz_128:
   2580 ; X64:       # %bb.0:
   2581 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2582 ; X64-NEXT:    vpandd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdb,0x07]
   2583 ; X64-NEXT:    retq # encoding: [0xc3]
   2584   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2585   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2586   ret <4 x i32> %res
   2587 }
   2588 
   2589 define <4 x i32> @test_mask_and_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   2590 ; X86-LABEL: test_mask_and_epi32_rmb_128:
   2591 ; X86:       # %bb.0:
   2592 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2593 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x00]
   2594 ; X86-NEXT:    retl # encoding: [0xc3]
   2595 ;
   2596 ; X64-LABEL: test_mask_and_epi32_rmb_128:
   2597 ; X64:       # %bb.0:
   2598 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdb,0x07]
   2599 ; X64-NEXT:    retq # encoding: [0xc3]
   2600   %q = load i32, i32* %ptr_b
   2601   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2602   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2603   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2604   ret <4 x i32> %res
   2605 }
   2606 
   2607 define <4 x i32> @test_mask_and_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2608 ; X86-LABEL: test_mask_and_epi32_rmbk_128:
   2609 ; X86:       # %bb.0:
   2610 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2611 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2612 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2613 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x08]
   2614 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2615 ; X86-NEXT:    retl # encoding: [0xc3]
   2616 ;
   2617 ; X64-LABEL: test_mask_and_epi32_rmbk_128:
   2618 ; X64:       # %bb.0:
   2619 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2620 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdb,0x0f]
   2621 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2622 ; X64-NEXT:    retq # encoding: [0xc3]
   2623   %q = load i32, i32* %ptr_b
   2624   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2625   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2626   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2627   ret <4 x i32> %res
   2628 }
   2629 
   2630 define <4 x i32> @test_mask_and_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   2631 ; X86-LABEL: test_mask_and_epi32_rmbkz_128:
   2632 ; X86:       # %bb.0:
   2633 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2634 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2635 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2636 ; X86-NEXT:    vpandd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x00]
   2637 ; X86-NEXT:    retl # encoding: [0xc3]
   2638 ;
   2639 ; X64-LABEL: test_mask_and_epi32_rmbkz_128:
   2640 ; X64:       # %bb.0:
   2641 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2642 ; X64-NEXT:    vpandd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdb,0x07]
   2643 ; X64-NEXT:    retq # encoding: [0xc3]
   2644   %q = load i32, i32* %ptr_b
   2645   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2646   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2647   %res = call <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2648   ret <4 x i32> %res
   2649 }
   2650 
   2651 declare <4 x i32> @llvm.x86.avx512.mask.pand.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2652 
   2653 define <8 x i32> @test_mask_and_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   2654 ; CHECK-LABEL: test_mask_and_epi32_rr_256:
   2655 ; CHECK:       # %bb.0:
   2656 ; CHECK-NEXT:    vandps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0xc1]
   2657 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2658   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2659   ret <8 x i32> %res
   2660 }
   2661 
   2662 define <8 x i32> @test_mask_and_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   2663 ; X86-LABEL: test_mask_and_epi32_rrk_256:
   2664 ; X86:       # %bb.0:
   2665 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2666 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2667 ; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
   2668 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2669 ; X86-NEXT:    retl # encoding: [0xc3]
   2670 ;
   2671 ; X64-LABEL: test_mask_and_epi32_rrk_256:
   2672 ; X64:       # %bb.0:
   2673 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2674 ; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0xd1]
   2675 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2676 ; X64-NEXT:    retq # encoding: [0xc3]
   2677   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2678   ret <8 x i32> %res
   2679 }
   2680 
   2681 define <8 x i32> @test_mask_and_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   2682 ; X86-LABEL: test_mask_and_epi32_rrkz_256:
   2683 ; X86:       # %bb.0:
   2684 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2685 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2686 ; X86-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
   2687 ; X86-NEXT:    retl # encoding: [0xc3]
   2688 ;
   2689 ; X64-LABEL: test_mask_and_epi32_rrkz_256:
   2690 ; X64:       # %bb.0:
   2691 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2692 ; X64-NEXT:    vpandd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0xc1]
   2693 ; X64-NEXT:    retq # encoding: [0xc3]
   2694   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2695   ret <8 x i32> %res
   2696 }
   2697 
   2698 define <8 x i32> @test_mask_and_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   2699 ; X86-LABEL: test_mask_and_epi32_rm_256:
   2700 ; X86:       # %bb.0:
   2701 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2702 ; X86-NEXT:    vandps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x00]
   2703 ; X86-NEXT:    retl # encoding: [0xc3]
   2704 ;
   2705 ; X64-LABEL: test_mask_and_epi32_rm_256:
   2706 ; X64:       # %bb.0:
   2707 ; X64-NEXT:    vandps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x54,0x07]
   2708 ; X64-NEXT:    retq # encoding: [0xc3]
   2709   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2710   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2711   ret <8 x i32> %res
   2712 }
   2713 
   2714 define <8 x i32> @test_mask_and_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2715 ; X86-LABEL: test_mask_and_epi32_rmk_256:
   2716 ; X86:       # %bb.0:
   2717 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2718 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2719 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2720 ; X86-NEXT:    vpandd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x08]
   2721 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2722 ; X86-NEXT:    retl # encoding: [0xc3]
   2723 ;
   2724 ; X64-LABEL: test_mask_and_epi32_rmk_256:
   2725 ; X64:       # %bb.0:
   2726 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2727 ; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdb,0x0f]
   2728 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2729 ; X64-NEXT:    retq # encoding: [0xc3]
   2730   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2731   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2732   ret <8 x i32> %res
   2733 }
   2734 
   2735 define <8 x i32> @test_mask_and_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   2736 ; X86-LABEL: test_mask_and_epi32_rmkz_256:
   2737 ; X86:       # %bb.0:
   2738 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2739 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2740 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2741 ; X86-NEXT:    vpandd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x00]
   2742 ; X86-NEXT:    retl # encoding: [0xc3]
   2743 ;
   2744 ; X64-LABEL: test_mask_and_epi32_rmkz_256:
   2745 ; X64:       # %bb.0:
   2746 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2747 ; X64-NEXT:    vpandd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdb,0x07]
   2748 ; X64-NEXT:    retq # encoding: [0xc3]
   2749   %b = load <8 x i32>, <8 x i32>* %ptr_b
   2750   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2751   ret <8 x i32> %res
   2752 }
   2753 
   2754 define <8 x i32> @test_mask_and_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   2755 ; X86-LABEL: test_mask_and_epi32_rmb_256:
   2756 ; X86:       # %bb.0:
   2757 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2758 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x00]
   2759 ; X86-NEXT:    retl # encoding: [0xc3]
   2760 ;
   2761 ; X64-LABEL: test_mask_and_epi32_rmb_256:
   2762 ; X64:       # %bb.0:
   2763 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdb,0x07]
   2764 ; X64-NEXT:    retq # encoding: [0xc3]
   2765   %q = load i32, i32* %ptr_b
   2766   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2767   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2768   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2769   ret <8 x i32> %res
   2770 }
   2771 
   2772 define <8 x i32> @test_mask_and_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   2773 ; X86-LABEL: test_mask_and_epi32_rmbk_256:
   2774 ; X86:       # %bb.0:
   2775 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2776 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2777 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2778 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x08]
   2779 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2780 ; X86-NEXT:    retl # encoding: [0xc3]
   2781 ;
   2782 ; X64-LABEL: test_mask_and_epi32_rmbk_256:
   2783 ; X64:       # %bb.0:
   2784 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2785 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdb,0x0f]
   2786 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   2787 ; X64-NEXT:    retq # encoding: [0xc3]
   2788   %q = load i32, i32* %ptr_b
   2789   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2790   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2791   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   2792   ret <8 x i32> %res
   2793 }
   2794 
   2795 define <8 x i32> @test_mask_and_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   2796 ; X86-LABEL: test_mask_and_epi32_rmbkz_256:
   2797 ; X86:       # %bb.0:
   2798 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2799 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2800 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2801 ; X86-NEXT:    vpandd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x00]
   2802 ; X86-NEXT:    retl # encoding: [0xc3]
   2803 ;
   2804 ; X64-LABEL: test_mask_and_epi32_rmbkz_256:
   2805 ; X64:       # %bb.0:
   2806 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2807 ; X64-NEXT:    vpandd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdb,0x07]
   2808 ; X64-NEXT:    retq # encoding: [0xc3]
   2809   %q = load i32, i32* %ptr_b
   2810   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   2811   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   2812   %res = call <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   2813   ret <8 x i32> %res
   2814 }
   2815 
   2816 declare <8 x i32> @llvm.x86.avx512.mask.pand.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   2817 
   2818 define <4 x i32> @test_mask_or_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   2819 ; CHECK-LABEL: test_mask_or_epi32_rr_128:
   2820 ; CHECK:       # %bb.0:
   2821 ; CHECK-NEXT:    vorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0xc1]
   2822 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2823   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2824   ret <4 x i32> %res
   2825 }
   2826 
   2827 define <4 x i32> @test_mask_or_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   2828 ; X86-LABEL: test_mask_or_epi32_rrk_128:
   2829 ; X86:       # %bb.0:
   2830 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2831 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2832 ; X86-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
   2833 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2834 ; X86-NEXT:    retl # encoding: [0xc3]
   2835 ;
   2836 ; X64-LABEL: test_mask_or_epi32_rrk_128:
   2837 ; X64:       # %bb.0:
   2838 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2839 ; X64-NEXT:    vpord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0xd1]
   2840 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   2841 ; X64-NEXT:    retq # encoding: [0xc3]
   2842   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2843   ret <4 x i32> %res
   2844 }
   2845 
   2846 define <4 x i32> @test_mask_or_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   2847 ; X86-LABEL: test_mask_or_epi32_rrkz_128:
   2848 ; X86:       # %bb.0:
   2849 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2850 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2851 ; X86-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
   2852 ; X86-NEXT:    retl # encoding: [0xc3]
   2853 ;
   2854 ; X64-LABEL: test_mask_or_epi32_rrkz_128:
   2855 ; X64:       # %bb.0:
   2856 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   2857 ; X64-NEXT:    vpord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0xc1]
   2858 ; X64-NEXT:    retq # encoding: [0xc3]
   2859   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2860   ret <4 x i32> %res
   2861 }
   2862 
   2863 define <4 x i32> @test_mask_or_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   2864 ; X86-LABEL: test_mask_or_epi32_rm_128:
   2865 ; X86:       # %bb.0:
   2866 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2867 ; X86-NEXT:    vorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x00]
   2868 ; X86-NEXT:    retl # encoding: [0xc3]
   2869 ;
   2870 ; X64-LABEL: test_mask_or_epi32_rm_128:
   2871 ; X64:       # %bb.0:
   2872 ; X64-NEXT:    vorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x56,0x07]
   2873 ; X64-NEXT:    retq # encoding: [0xc3]
   2874   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2875   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2876   ret <4 x i32> %res
   2877 }
   2878 
   2879 define <4 x i32> @test_mask_or_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2880 ; X86-LABEL: test_mask_or_epi32_rmk_128:
   2881 ; X86:       # %bb.0:
   2882 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2883 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2884 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2885 ; X86-NEXT:    vpord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x08]
   2886 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2887 ; X86-NEXT:    retl # encoding: [0xc3]
   2888 ;
   2889 ; X64-LABEL: test_mask_or_epi32_rmk_128:
   2890 ; X64:       # %bb.0:
   2891 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2892 ; X64-NEXT:    vpord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xeb,0x0f]
   2893 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2894 ; X64-NEXT:    retq # encoding: [0xc3]
   2895   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2896   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2897   ret <4 x i32> %res
   2898 }
   2899 
   2900 define <4 x i32> @test_mask_or_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   2901 ; X86-LABEL: test_mask_or_epi32_rmkz_128:
   2902 ; X86:       # %bb.0:
   2903 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2904 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2905 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2906 ; X86-NEXT:    vpord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x00]
   2907 ; X86-NEXT:    retl # encoding: [0xc3]
   2908 ;
   2909 ; X64-LABEL: test_mask_or_epi32_rmkz_128:
   2910 ; X64:       # %bb.0:
   2911 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2912 ; X64-NEXT:    vpord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xeb,0x07]
   2913 ; X64-NEXT:    retq # encoding: [0xc3]
   2914   %b = load <4 x i32>, <4 x i32>* %ptr_b
   2915   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2916   ret <4 x i32> %res
   2917 }
   2918 
   2919 define <4 x i32> @test_mask_or_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   2920 ; X86-LABEL: test_mask_or_epi32_rmb_128:
   2921 ; X86:       # %bb.0:
   2922 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2923 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x00]
   2924 ; X86-NEXT:    retl # encoding: [0xc3]
   2925 ;
   2926 ; X64-LABEL: test_mask_or_epi32_rmb_128:
   2927 ; X64:       # %bb.0:
   2928 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xeb,0x07]
   2929 ; X64-NEXT:    retq # encoding: [0xc3]
   2930   %q = load i32, i32* %ptr_b
   2931   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2932   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2933   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   2934   ret <4 x i32> %res
   2935 }
   2936 
   2937 define <4 x i32> @test_mask_or_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   2938 ; X86-LABEL: test_mask_or_epi32_rmbk_128:
   2939 ; X86:       # %bb.0:
   2940 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2941 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2942 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2943 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x08]
   2944 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2945 ; X86-NEXT:    retl # encoding: [0xc3]
   2946 ;
   2947 ; X64-LABEL: test_mask_or_epi32_rmbk_128:
   2948 ; X64:       # %bb.0:
   2949 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2950 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xeb,0x0f]
   2951 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   2952 ; X64-NEXT:    retq # encoding: [0xc3]
   2953   %q = load i32, i32* %ptr_b
   2954   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2955   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2956   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   2957   ret <4 x i32> %res
   2958 }
   2959 
   2960 define <4 x i32> @test_mask_or_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   2961 ; X86-LABEL: test_mask_or_epi32_rmbkz_128:
   2962 ; X86:       # %bb.0:
   2963 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   2964 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   2965 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   2966 ; X86-NEXT:    vpord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x00]
   2967 ; X86-NEXT:    retl # encoding: [0xc3]
   2968 ;
   2969 ; X64-LABEL: test_mask_or_epi32_rmbkz_128:
   2970 ; X64:       # %bb.0:
   2971 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   2972 ; X64-NEXT:    vpord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xeb,0x07]
   2973 ; X64-NEXT:    retq # encoding: [0xc3]
   2974   %q = load i32, i32* %ptr_b
   2975   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   2976   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   2977   %res = call <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   2978   ret <4 x i32> %res
   2979 }
   2980 
   2981 declare <4 x i32> @llvm.x86.avx512.mask.por.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   2982 
   2983 define <8 x i32> @test_mask_or_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   2984 ; CHECK-LABEL: test_mask_or_epi32_rr_256:
   2985 ; CHECK:       # %bb.0:
   2986 ; CHECK-NEXT:    vorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0xc1]
   2987 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   2988   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   2989   ret <8 x i32> %res
   2990 }
   2991 
   2992 define <8 x i32> @test_mask_or_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   2993 ; X86-LABEL: test_mask_or_epi32_rrk_256:
   2994 ; X86:       # %bb.0:
   2995 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   2996 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   2997 ; X86-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
   2998 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   2999 ; X86-NEXT:    retl # encoding: [0xc3]
   3000 ;
   3001 ; X64-LABEL: test_mask_or_epi32_rrk_256:
   3002 ; X64:       # %bb.0:
   3003 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3004 ; X64-NEXT:    vpord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0xd1]
   3005 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3006 ; X64-NEXT:    retq # encoding: [0xc3]
   3007   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3008   ret <8 x i32> %res
   3009 }
   3010 
   3011 define <8 x i32> @test_mask_or_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   3012 ; X86-LABEL: test_mask_or_epi32_rrkz_256:
   3013 ; X86:       # %bb.0:
   3014 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3015 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3016 ; X86-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
   3017 ; X86-NEXT:    retl # encoding: [0xc3]
   3018 ;
   3019 ; X64-LABEL: test_mask_or_epi32_rrkz_256:
   3020 ; X64:       # %bb.0:
   3021 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3022 ; X64-NEXT:    vpord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0xc1]
   3023 ; X64-NEXT:    retq # encoding: [0xc3]
   3024   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3025   ret <8 x i32> %res
   3026 }
   3027 
   3028 define <8 x i32> @test_mask_or_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3029 ; X86-LABEL: test_mask_or_epi32_rm_256:
   3030 ; X86:       # %bb.0:
   3031 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3032 ; X86-NEXT:    vorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x00]
   3033 ; X86-NEXT:    retl # encoding: [0xc3]
   3034 ;
   3035 ; X64-LABEL: test_mask_or_epi32_rm_256:
   3036 ; X64:       # %bb.0:
   3037 ; X64-NEXT:    vorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x56,0x07]
   3038 ; X64-NEXT:    retq # encoding: [0xc3]
   3039   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3040   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3041   ret <8 x i32> %res
   3042 }
   3043 
   3044 define <8 x i32> @test_mask_or_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3045 ; X86-LABEL: test_mask_or_epi32_rmk_256:
   3046 ; X86:       # %bb.0:
   3047 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3048 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3049 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3050 ; X86-NEXT:    vpord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x08]
   3051 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3052 ; X86-NEXT:    retl # encoding: [0xc3]
   3053 ;
   3054 ; X64-LABEL: test_mask_or_epi32_rmk_256:
   3055 ; X64:       # %bb.0:
   3056 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3057 ; X64-NEXT:    vpord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xeb,0x0f]
   3058 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3059 ; X64-NEXT:    retq # encoding: [0xc3]
   3060   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3061   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3062   ret <8 x i32> %res
   3063 }
   3064 
   3065 define <8 x i32> @test_mask_or_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   3066 ; X86-LABEL: test_mask_or_epi32_rmkz_256:
   3067 ; X86:       # %bb.0:
   3068 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3069 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3070 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3071 ; X86-NEXT:    vpord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x00]
   3072 ; X86-NEXT:    retl # encoding: [0xc3]
   3073 ;
   3074 ; X64-LABEL: test_mask_or_epi32_rmkz_256:
   3075 ; X64:       # %bb.0:
   3076 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3077 ; X64-NEXT:    vpord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xeb,0x07]
   3078 ; X64-NEXT:    retq # encoding: [0xc3]
   3079   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3080   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3081   ret <8 x i32> %res
   3082 }
   3083 
   3084 define <8 x i32> @test_mask_or_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   3085 ; X86-LABEL: test_mask_or_epi32_rmb_256:
   3086 ; X86:       # %bb.0:
   3087 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3088 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x00]
   3089 ; X86-NEXT:    retl # encoding: [0xc3]
   3090 ;
   3091 ; X64-LABEL: test_mask_or_epi32_rmb_256:
   3092 ; X64:       # %bb.0:
   3093 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xeb,0x07]
   3094 ; X64-NEXT:    retq # encoding: [0xc3]
   3095   %q = load i32, i32* %ptr_b
   3096   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3097   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3098   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3099   ret <8 x i32> %res
   3100 }
   3101 
   3102 define <8 x i32> @test_mask_or_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3103 ; X86-LABEL: test_mask_or_epi32_rmbk_256:
   3104 ; X86:       # %bb.0:
   3105 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3107 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3108 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x08]
   3109 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3110 ; X86-NEXT:    retl # encoding: [0xc3]
   3111 ;
   3112 ; X64-LABEL: test_mask_or_epi32_rmbk_256:
   3113 ; X64:       # %bb.0:
   3114 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3115 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xeb,0x0f]
   3116 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3117 ; X64-NEXT:    retq # encoding: [0xc3]
   3118   %q = load i32, i32* %ptr_b
   3119   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3120   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3121   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3122   ret <8 x i32> %res
   3123 }
   3124 
   3125 define <8 x i32> @test_mask_or_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   3126 ; X86-LABEL: test_mask_or_epi32_rmbkz_256:
   3127 ; X86:       # %bb.0:
   3128 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3129 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3130 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3131 ; X86-NEXT:    vpord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x00]
   3132 ; X86-NEXT:    retl # encoding: [0xc3]
   3133 ;
   3134 ; X64-LABEL: test_mask_or_epi32_rmbkz_256:
   3135 ; X64:       # %bb.0:
   3136 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3137 ; X64-NEXT:    vpord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xeb,0x07]
   3138 ; X64-NEXT:    retq # encoding: [0xc3]
   3139   %q = load i32, i32* %ptr_b
   3140   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3141   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3142   %res = call <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3143   ret <8 x i32> %res
   3144 }
   3145 
   3146 declare <8 x i32> @llvm.x86.avx512.mask.por.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   3147 
   3148 define <4 x i32> @test_mask_xor_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   3149 ; CHECK-LABEL: test_mask_xor_epi32_rr_128:
   3150 ; CHECK:       # %bb.0:
   3151 ; CHECK-NEXT:    vxorps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0xc1]
   3152 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3153   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3154   ret <4 x i32> %res
   3155 }
   3156 
   3157 define <4 x i32> @test_mask_xor_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   3158 ; X86-LABEL: test_mask_xor_epi32_rrk_128:
   3159 ; X86:       # %bb.0:
   3160 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3161 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3162 ; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
   3163 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3164 ; X86-NEXT:    retl # encoding: [0xc3]
   3165 ;
   3166 ; X64-LABEL: test_mask_xor_epi32_rrk_128:
   3167 ; X64:       # %bb.0:
   3168 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3169 ; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0xd1]
   3170 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3171 ; X64-NEXT:    retq # encoding: [0xc3]
   3172   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3173   ret <4 x i32> %res
   3174 }
   3175 
   3176 define <4 x i32> @test_mask_xor_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   3177 ; X86-LABEL: test_mask_xor_epi32_rrkz_128:
   3178 ; X86:       # %bb.0:
   3179 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3180 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3181 ; X86-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
   3182 ; X86-NEXT:    retl # encoding: [0xc3]
   3183 ;
   3184 ; X64-LABEL: test_mask_xor_epi32_rrkz_128:
   3185 ; X64:       # %bb.0:
   3186 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3187 ; X64-NEXT:    vpxord %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0xc1]
   3188 ; X64-NEXT:    retq # encoding: [0xc3]
   3189   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3190   ret <4 x i32> %res
   3191 }
   3192 
   3193 define <4 x i32> @test_mask_xor_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   3194 ; X86-LABEL: test_mask_xor_epi32_rm_128:
   3195 ; X86:       # %bb.0:
   3196 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3197 ; X86-NEXT:    vxorps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x00]
   3198 ; X86-NEXT:    retl # encoding: [0xc3]
   3199 ;
   3200 ; X64-LABEL: test_mask_xor_epi32_rm_128:
   3201 ; X64:       # %bb.0:
   3202 ; X64-NEXT:    vxorps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x57,0x07]
   3203 ; X64-NEXT:    retq # encoding: [0xc3]
   3204   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3205   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3206   ret <4 x i32> %res
   3207 }
   3208 
   3209 define <4 x i32> @test_mask_xor_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   3210 ; X86-LABEL: test_mask_xor_epi32_rmk_128:
   3211 ; X86:       # %bb.0:
   3212 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3213 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3214 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3215 ; X86-NEXT:    vpxord (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x08]
   3216 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3217 ; X86-NEXT:    retl # encoding: [0xc3]
   3218 ;
   3219 ; X64-LABEL: test_mask_xor_epi32_rmk_128:
   3220 ; X64:       # %bb.0:
   3221 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3222 ; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xef,0x0f]
   3223 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3224 ; X64-NEXT:    retq # encoding: [0xc3]
   3225   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3226   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3227   ret <4 x i32> %res
   3228 }
   3229 
   3230 define <4 x i32> @test_mask_xor_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   3231 ; X86-LABEL: test_mask_xor_epi32_rmkz_128:
   3232 ; X86:       # %bb.0:
   3233 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3234 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3235 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3236 ; X86-NEXT:    vpxord (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x00]
   3237 ; X86-NEXT:    retl # encoding: [0xc3]
   3238 ;
   3239 ; X64-LABEL: test_mask_xor_epi32_rmkz_128:
   3240 ; X64:       # %bb.0:
   3241 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3242 ; X64-NEXT:    vpxord (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xef,0x07]
   3243 ; X64-NEXT:    retq # encoding: [0xc3]
   3244   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3245   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3246   ret <4 x i32> %res
   3247 }
   3248 
   3249 define <4 x i32> @test_mask_xor_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   3250 ; X86-LABEL: test_mask_xor_epi32_rmb_128:
   3251 ; X86:       # %bb.0:
   3252 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3253 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x00]
   3254 ; X86-NEXT:    retl # encoding: [0xc3]
   3255 ;
   3256 ; X64-LABEL: test_mask_xor_epi32_rmb_128:
   3257 ; X64:       # %bb.0:
   3258 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xef,0x07]
   3259 ; X64-NEXT:    retq # encoding: [0xc3]
   3260   %q = load i32, i32* %ptr_b
   3261   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3262   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3263   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3264   ret <4 x i32> %res
   3265 }
   3266 
   3267 define <4 x i32> @test_mask_xor_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   3268 ; X86-LABEL: test_mask_xor_epi32_rmbk_128:
   3269 ; X86:       # %bb.0:
   3270 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3271 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3272 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3273 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x08]
   3274 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3275 ; X86-NEXT:    retl # encoding: [0xc3]
   3276 ;
   3277 ; X64-LABEL: test_mask_xor_epi32_rmbk_128:
   3278 ; X64:       # %bb.0:
   3279 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3280 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xef,0x0f]
   3281 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3282 ; X64-NEXT:    retq # encoding: [0xc3]
   3283   %q = load i32, i32* %ptr_b
   3284   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3285   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3286   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3287   ret <4 x i32> %res
   3288 }
   3289 
   3290 define <4 x i32> @test_mask_xor_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   3291 ; X86-LABEL: test_mask_xor_epi32_rmbkz_128:
   3292 ; X86:       # %bb.0:
   3293 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3294 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3295 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3296 ; X86-NEXT:    vpxord (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x00]
   3297 ; X86-NEXT:    retl # encoding: [0xc3]
   3298 ;
   3299 ; X64-LABEL: test_mask_xor_epi32_rmbkz_128:
   3300 ; X64:       # %bb.0:
   3301 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3302 ; X64-NEXT:    vpxord (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xef,0x07]
   3303 ; X64-NEXT:    retq # encoding: [0xc3]
   3304   %q = load i32, i32* %ptr_b
   3305   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3306   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3307   %res = call <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3308   ret <4 x i32> %res
   3309 }
   3310 
   3311 declare <4 x i32> @llvm.x86.avx512.mask.pxor.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   3312 
   3313 define <8 x i32> @test_mask_xor_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   3314 ; CHECK-LABEL: test_mask_xor_epi32_rr_256:
   3315 ; CHECK:       # %bb.0:
   3316 ; CHECK-NEXT:    vxorps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0xc1]
   3317 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3318   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3319   ret <8 x i32> %res
   3320 }
   3321 
   3322 define <8 x i32> @test_mask_xor_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   3323 ; X86-LABEL: test_mask_xor_epi32_rrk_256:
   3324 ; X86:       # %bb.0:
   3325 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3326 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3327 ; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
   3328 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3329 ; X86-NEXT:    retl # encoding: [0xc3]
   3330 ;
   3331 ; X64-LABEL: test_mask_xor_epi32_rrk_256:
   3332 ; X64:       # %bb.0:
   3333 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3334 ; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0xd1]
   3335 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3336 ; X64-NEXT:    retq # encoding: [0xc3]
   3337   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3338   ret <8 x i32> %res
   3339 }
   3340 
   3341 define <8 x i32> @test_mask_xor_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   3342 ; X86-LABEL: test_mask_xor_epi32_rrkz_256:
   3343 ; X86:       # %bb.0:
   3344 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3345 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3346 ; X86-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
   3347 ; X86-NEXT:    retl # encoding: [0xc3]
   3348 ;
   3349 ; X64-LABEL: test_mask_xor_epi32_rrkz_256:
   3350 ; X64:       # %bb.0:
   3351 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3352 ; X64-NEXT:    vpxord %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0xc1]
   3353 ; X64-NEXT:    retq # encoding: [0xc3]
   3354   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3355   ret <8 x i32> %res
   3356 }
   3357 
   3358 define <8 x i32> @test_mask_xor_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3359 ; X86-LABEL: test_mask_xor_epi32_rm_256:
   3360 ; X86:       # %bb.0:
   3361 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3362 ; X86-NEXT:    vxorps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x00]
   3363 ; X86-NEXT:    retl # encoding: [0xc3]
   3364 ;
   3365 ; X64-LABEL: test_mask_xor_epi32_rm_256:
   3366 ; X64:       # %bb.0:
   3367 ; X64-NEXT:    vxorps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x57,0x07]
   3368 ; X64-NEXT:    retq # encoding: [0xc3]
   3369   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3370   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3371   ret <8 x i32> %res
   3372 }
   3373 
   3374 define <8 x i32> @test_mask_xor_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3375 ; X86-LABEL: test_mask_xor_epi32_rmk_256:
   3376 ; X86:       # %bb.0:
   3377 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3378 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3379 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3380 ; X86-NEXT:    vpxord (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x08]
   3381 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3382 ; X86-NEXT:    retl # encoding: [0xc3]
   3383 ;
   3384 ; X64-LABEL: test_mask_xor_epi32_rmk_256:
   3385 ; X64:       # %bb.0:
   3386 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3387 ; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xef,0x0f]
   3388 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3389 ; X64-NEXT:    retq # encoding: [0xc3]
   3390   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3391   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3392   ret <8 x i32> %res
   3393 }
   3394 
   3395 define <8 x i32> @test_mask_xor_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   3396 ; X86-LABEL: test_mask_xor_epi32_rmkz_256:
   3397 ; X86:       # %bb.0:
   3398 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3399 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3400 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3401 ; X86-NEXT:    vpxord (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x00]
   3402 ; X86-NEXT:    retl # encoding: [0xc3]
   3403 ;
   3404 ; X64-LABEL: test_mask_xor_epi32_rmkz_256:
   3405 ; X64:       # %bb.0:
   3406 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3407 ; X64-NEXT:    vpxord (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xef,0x07]
   3408 ; X64-NEXT:    retq # encoding: [0xc3]
   3409   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3410   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3411   ret <8 x i32> %res
   3412 }
   3413 
   3414 define <8 x i32> @test_mask_xor_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   3415 ; X86-LABEL: test_mask_xor_epi32_rmb_256:
   3416 ; X86:       # %bb.0:
   3417 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3418 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x00]
   3419 ; X86-NEXT:    retl # encoding: [0xc3]
   3420 ;
   3421 ; X64-LABEL: test_mask_xor_epi32_rmb_256:
   3422 ; X64:       # %bb.0:
   3423 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xef,0x07]
   3424 ; X64-NEXT:    retq # encoding: [0xc3]
   3425   %q = load i32, i32* %ptr_b
   3426   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3427   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3428   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3429   ret <8 x i32> %res
   3430 }
   3431 
   3432 define <8 x i32> @test_mask_xor_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3433 ; X86-LABEL: test_mask_xor_epi32_rmbk_256:
   3434 ; X86:       # %bb.0:
   3435 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3436 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3437 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3438 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x08]
   3439 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3440 ; X86-NEXT:    retl # encoding: [0xc3]
   3441 ;
   3442 ; X64-LABEL: test_mask_xor_epi32_rmbk_256:
   3443 ; X64:       # %bb.0:
   3444 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3445 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xef,0x0f]
   3446 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3447 ; X64-NEXT:    retq # encoding: [0xc3]
   3448   %q = load i32, i32* %ptr_b
   3449   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3450   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3451   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3452   ret <8 x i32> %res
   3453 }
   3454 
   3455 define <8 x i32> @test_mask_xor_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   3456 ; X86-LABEL: test_mask_xor_epi32_rmbkz_256:
   3457 ; X86:       # %bb.0:
   3458 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3459 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3460 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3461 ; X86-NEXT:    vpxord (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x00]
   3462 ; X86-NEXT:    retl # encoding: [0xc3]
   3463 ;
   3464 ; X64-LABEL: test_mask_xor_epi32_rmbkz_256:
   3465 ; X64:       # %bb.0:
   3466 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3467 ; X64-NEXT:    vpxord (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xef,0x07]
   3468 ; X64-NEXT:    retq # encoding: [0xc3]
   3469   %q = load i32, i32* %ptr_b
   3470   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3471   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3472   %res = call <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3473   ret <8 x i32> %res
   3474 }
   3475 
   3476 declare <8 x i32> @llvm.x86.avx512.mask.pxor.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   3477 
   3478 define <4 x i32> @test_mask_andnot_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   3479 ; CHECK-LABEL: test_mask_andnot_epi32_rr_128:
   3480 ; CHECK:       # %bb.0:
   3481 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
   3482 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3483   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3484   ret <4 x i32> %res
   3485 }
   3486 
   3487 define <4 x i32> @test_mask_andnot_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   3488 ; X86-LABEL: test_mask_andnot_epi32_rrk_128:
   3489 ; X86:       # %bb.0:
   3490 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3491 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3492 ; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
   3493 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3494 ; X86-NEXT:    retl # encoding: [0xc3]
   3495 ;
   3496 ; X64-LABEL: test_mask_andnot_epi32_rrk_128:
   3497 ; X64:       # %bb.0:
   3498 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3499 ; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0xd1]
   3500 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3501 ; X64-NEXT:    retq # encoding: [0xc3]
   3502   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3503   ret <4 x i32> %res
   3504 }
   3505 
   3506 define <4 x i32> @test_mask_andnot_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   3507 ; X86-LABEL: test_mask_andnot_epi32_rrkz_128:
   3508 ; X86:       # %bb.0:
   3509 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3510 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3511 ; X86-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
   3512 ; X86-NEXT:    retl # encoding: [0xc3]
   3513 ;
   3514 ; X64-LABEL: test_mask_andnot_epi32_rrkz_128:
   3515 ; X64:       # %bb.0:
   3516 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3517 ; X64-NEXT:    vpandnd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0xc1]
   3518 ; X64-NEXT:    retq # encoding: [0xc3]
   3519   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3520   ret <4 x i32> %res
   3521 }
   3522 
   3523 define <4 x i32> @test_mask_andnot_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   3524 ; X86-LABEL: test_mask_andnot_epi32_rm_128:
   3525 ; X86:       # %bb.0:
   3526 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3527 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
   3528 ; X86-NEXT:    retl # encoding: [0xc3]
   3529 ;
   3530 ; X64-LABEL: test_mask_andnot_epi32_rm_128:
   3531 ; X64:       # %bb.0:
   3532 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
   3533 ; X64-NEXT:    retq # encoding: [0xc3]
   3534   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3535   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3536   ret <4 x i32> %res
   3537 }
   3538 
   3539 define <4 x i32> @test_mask_andnot_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   3540 ; X86-LABEL: test_mask_andnot_epi32_rmk_128:
   3541 ; X86:       # %bb.0:
   3542 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3543 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3544 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3545 ; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x08]
   3546 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3547 ; X86-NEXT:    retl # encoding: [0xc3]
   3548 ;
   3549 ; X64-LABEL: test_mask_andnot_epi32_rmk_128:
   3550 ; X64:       # %bb.0:
   3551 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3552 ; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xdf,0x0f]
   3553 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3554 ; X64-NEXT:    retq # encoding: [0xc3]
   3555   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3556   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3557   ret <4 x i32> %res
   3558 }
   3559 
   3560 define <4 x i32> @test_mask_andnot_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   3561 ; X86-LABEL: test_mask_andnot_epi32_rmkz_128:
   3562 ; X86:       # %bb.0:
   3563 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3564 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3565 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3566 ; X86-NEXT:    vpandnd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x00]
   3567 ; X86-NEXT:    retl # encoding: [0xc3]
   3568 ;
   3569 ; X64-LABEL: test_mask_andnot_epi32_rmkz_128:
   3570 ; X64:       # %bb.0:
   3571 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3572 ; X64-NEXT:    vpandnd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xdf,0x07]
   3573 ; X64-NEXT:    retq # encoding: [0xc3]
   3574   %b = load <4 x i32>, <4 x i32>* %ptr_b
   3575   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3576   ret <4 x i32> %res
   3577 }
   3578 
   3579 define <4 x i32> @test_mask_andnot_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   3580 ; X86-LABEL: test_mask_andnot_epi32_rmb_128:
   3581 ; X86:       # %bb.0:
   3582 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3583 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x00]
   3584 ; X86-NEXT:    retl # encoding: [0xc3]
   3585 ;
   3586 ; X64-LABEL: test_mask_andnot_epi32_rmb_128:
   3587 ; X64:       # %bb.0:
   3588 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xdf,0x07]
   3589 ; X64-NEXT:    retq # encoding: [0xc3]
   3590   %q = load i32, i32* %ptr_b
   3591   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3592   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3593   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   3594   ret <4 x i32> %res
   3595 }
   3596 
   3597 define <4 x i32> @test_mask_andnot_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   3598 ; X86-LABEL: test_mask_andnot_epi32_rmbk_128:
   3599 ; X86:       # %bb.0:
   3600 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3601 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3602 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3603 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x08]
   3604 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3605 ; X86-NEXT:    retl # encoding: [0xc3]
   3606 ;
   3607 ; X64-LABEL: test_mask_andnot_epi32_rmbk_128:
   3608 ; X64:       # %bb.0:
   3609 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3610 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xdf,0x0f]
   3611 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3612 ; X64-NEXT:    retq # encoding: [0xc3]
   3613   %q = load i32, i32* %ptr_b
   3614   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3615   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3616   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   3617   ret <4 x i32> %res
   3618 }
   3619 
   3620 define <4 x i32> @test_mask_andnot_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   3621 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_128:
   3622 ; X86:       # %bb.0:
   3623 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3624 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3625 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3626 ; X86-NEXT:    vpandnd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x00]
   3627 ; X86-NEXT:    retl # encoding: [0xc3]
   3628 ;
   3629 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_128:
   3630 ; X64:       # %bb.0:
   3631 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3632 ; X64-NEXT:    vpandnd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xdf,0x07]
   3633 ; X64-NEXT:    retq # encoding: [0xc3]
   3634   %q = load i32, i32* %ptr_b
   3635   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   3636   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   3637   %res = call <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   3638   ret <4 x i32> %res
   3639 }
   3640 
   3641 declare <4 x i32> @llvm.x86.avx512.mask.pandn.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   3642 
   3643 define <8 x i32> @test_mask_andnot_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   3644 ; CHECK-LABEL: test_mask_andnot_epi32_rr_256:
   3645 ; CHECK:       # %bb.0:
   3646 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
   3647 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3648   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3649   ret <8 x i32> %res
   3650 }
   3651 
   3652 define <8 x i32> @test_mask_andnot_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   3653 ; X86-LABEL: test_mask_andnot_epi32_rrk_256:
   3654 ; X86:       # %bb.0:
   3655 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3656 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3657 ; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
   3658 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3659 ; X86-NEXT:    retl # encoding: [0xc3]
   3660 ;
   3661 ; X64-LABEL: test_mask_andnot_epi32_rrk_256:
   3662 ; X64:       # %bb.0:
   3663 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3664 ; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0xd1]
   3665 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3666 ; X64-NEXT:    retq # encoding: [0xc3]
   3667   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3668   ret <8 x i32> %res
   3669 }
   3670 
   3671 define <8 x i32> @test_mask_andnot_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   3672 ; X86-LABEL: test_mask_andnot_epi32_rrkz_256:
   3673 ; X86:       # %bb.0:
   3674 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3675 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3676 ; X86-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
   3677 ; X86-NEXT:    retl # encoding: [0xc3]
   3678 ;
   3679 ; X64-LABEL: test_mask_andnot_epi32_rrkz_256:
   3680 ; X64:       # %bb.0:
   3681 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3682 ; X64-NEXT:    vpandnd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0xc1]
   3683 ; X64-NEXT:    retq # encoding: [0xc3]
   3684   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3685   ret <8 x i32> %res
   3686 }
   3687 
   3688 define <8 x i32> @test_mask_andnot_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   3689 ; X86-LABEL: test_mask_andnot_epi32_rm_256:
   3690 ; X86:       # %bb.0:
   3691 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3692 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
   3693 ; X86-NEXT:    retl # encoding: [0xc3]
   3694 ;
   3695 ; X64-LABEL: test_mask_andnot_epi32_rm_256:
   3696 ; X64:       # %bb.0:
   3697 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
   3698 ; X64-NEXT:    retq # encoding: [0xc3]
   3699   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3700   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3701   ret <8 x i32> %res
   3702 }
   3703 
   3704 define <8 x i32> @test_mask_andnot_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3705 ; X86-LABEL: test_mask_andnot_epi32_rmk_256:
   3706 ; X86:       # %bb.0:
   3707 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3708 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3709 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3710 ; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x08]
   3711 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3712 ; X86-NEXT:    retl # encoding: [0xc3]
   3713 ;
   3714 ; X64-LABEL: test_mask_andnot_epi32_rmk_256:
   3715 ; X64:       # %bb.0:
   3716 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3717 ; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xdf,0x0f]
   3718 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3719 ; X64-NEXT:    retq # encoding: [0xc3]
   3720   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3721   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3722   ret <8 x i32> %res
   3723 }
   3724 
   3725 define <8 x i32> @test_mask_andnot_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   3726 ; X86-LABEL: test_mask_andnot_epi32_rmkz_256:
   3727 ; X86:       # %bb.0:
   3728 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3729 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3730 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3731 ; X86-NEXT:    vpandnd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x00]
   3732 ; X86-NEXT:    retl # encoding: [0xc3]
   3733 ;
   3734 ; X64-LABEL: test_mask_andnot_epi32_rmkz_256:
   3735 ; X64:       # %bb.0:
   3736 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3737 ; X64-NEXT:    vpandnd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xdf,0x07]
   3738 ; X64-NEXT:    retq # encoding: [0xc3]
   3739   %b = load <8 x i32>, <8 x i32>* %ptr_b
   3740   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3741   ret <8 x i32> %res
   3742 }
   3743 
   3744 define <8 x i32> @test_mask_andnot_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   3745 ; X86-LABEL: test_mask_andnot_epi32_rmb_256:
   3746 ; X86:       # %bb.0:
   3747 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3748 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x00]
   3749 ; X86-NEXT:    retl # encoding: [0xc3]
   3750 ;
   3751 ; X64-LABEL: test_mask_andnot_epi32_rmb_256:
   3752 ; X64:       # %bb.0:
   3753 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xdf,0x07]
   3754 ; X64-NEXT:    retq # encoding: [0xc3]
   3755   %q = load i32, i32* %ptr_b
   3756   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3757   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3758   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   3759   ret <8 x i32> %res
   3760 }
   3761 
   3762 define <8 x i32> @test_mask_andnot_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   3763 ; X86-LABEL: test_mask_andnot_epi32_rmbk_256:
   3764 ; X86:       # %bb.0:
   3765 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3766 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3767 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3768 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x08]
   3769 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3770 ; X86-NEXT:    retl # encoding: [0xc3]
   3771 ;
   3772 ; X64-LABEL: test_mask_andnot_epi32_rmbk_256:
   3773 ; X64:       # %bb.0:
   3774 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3775 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xdf,0x0f]
   3776 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   3777 ; X64-NEXT:    retq # encoding: [0xc3]
   3778   %q = load i32, i32* %ptr_b
   3779   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3780   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3781   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   3782   ret <8 x i32> %res
   3783 }
   3784 
   3785 define <8 x i32> @test_mask_andnot_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   3786 ; X86-LABEL: test_mask_andnot_epi32_rmbkz_256:
   3787 ; X86:       # %bb.0:
   3788 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3789 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3790 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3791 ; X86-NEXT:    vpandnd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x00]
   3792 ; X86-NEXT:    retl # encoding: [0xc3]
   3793 ;
   3794 ; X64-LABEL: test_mask_andnot_epi32_rmbkz_256:
   3795 ; X64:       # %bb.0:
   3796 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3797 ; X64-NEXT:    vpandnd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xdf,0x07]
   3798 ; X64-NEXT:    retq # encoding: [0xc3]
   3799   %q = load i32, i32* %ptr_b
   3800   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   3801   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   3802   %res = call <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   3803   ret <8 x i32> %res
   3804 }
   3805 
   3806 declare <8 x i32> @llvm.x86.avx512.mask.pandn.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   3807 
   3808 define <2 x i64> @test_mask_andnot_epi64_rr_128(<2 x i64> %a, <2 x i64> %b) {
   3809 ; CHECK-LABEL: test_mask_andnot_epi64_rr_128:
   3810 ; CHECK:       # %bb.0:
   3811 ; CHECK-NEXT:    vandnps %xmm1, %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0xc1]
   3812 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3813   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   3814   ret <2 x i64> %res
   3815 }
   3816 
   3817 define <2 x i64> @test_mask_andnot_epi64_rrk_128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask) {
   3818 ; X86-LABEL: test_mask_andnot_epi64_rrk_128:
   3819 ; X86:       # %bb.0:
   3820 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3821 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3822 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
   3823 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3824 ; X86-NEXT:    retl # encoding: [0xc3]
   3825 ;
   3826 ; X64-LABEL: test_mask_andnot_epi64_rrk_128:
   3827 ; X64:       # %bb.0:
   3828 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3829 ; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xd1]
   3830 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   3831 ; X64-NEXT:    retq # encoding: [0xc3]
   3832   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   3833   ret <2 x i64> %res
   3834 }
   3835 
   3836 define <2 x i64> @test_mask_andnot_epi64_rrkz_128(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
   3837 ; X86-LABEL: test_mask_andnot_epi64_rrkz_128:
   3838 ; X86:       # %bb.0:
   3839 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3840 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3841 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
   3842 ; X86-NEXT:    retl # encoding: [0xc3]
   3843 ;
   3844 ; X64-LABEL: test_mask_andnot_epi64_rrkz_128:
   3845 ; X64:       # %bb.0:
   3846 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3847 ; X64-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
   3848 ; X64-NEXT:    retq # encoding: [0xc3]
   3849   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   3850   ret <2 x i64> %res
   3851 }
   3852 
   3853 define <2 x i64> @test_mask_andnot_epi64_rm_128(<2 x i64> %a, <2 x i64>* %ptr_b) {
   3854 ; X86-LABEL: test_mask_andnot_epi64_rm_128:
   3855 ; X86:       # %bb.0:
   3856 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3857 ; X86-NEXT:    vandnps (%eax), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x00]
   3858 ; X86-NEXT:    retl # encoding: [0xc3]
   3859 ;
   3860 ; X64-LABEL: test_mask_andnot_epi64_rm_128:
   3861 ; X64:       # %bb.0:
   3862 ; X64-NEXT:    vandnps (%rdi), %xmm0, %xmm0 # encoding: [0xc5,0xf8,0x55,0x07]
   3863 ; X64-NEXT:    retq # encoding: [0xc3]
   3864   %b = load <2 x i64>, <2 x i64>* %ptr_b
   3865   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   3866   ret <2 x i64> %res
   3867 }
   3868 
   3869 define <2 x i64> @test_mask_andnot_epi64_rmk_128(<2 x i64> %a, <2 x i64>* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   3870 ; X86-LABEL: test_mask_andnot_epi64_rmk_128:
   3871 ; X86:       # %bb.0:
   3872 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3873 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3874 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3875 ; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x08]
   3876 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3877 ; X86-NEXT:    retl # encoding: [0xc3]
   3878 ;
   3879 ; X64-LABEL: test_mask_andnot_epi64_rmk_128:
   3880 ; X64:       # %bb.0:
   3881 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3882 ; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0x0f]
   3883 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3884 ; X64-NEXT:    retq # encoding: [0xc3]
   3885   %b = load <2 x i64>, <2 x i64>* %ptr_b
   3886   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   3887   ret <2 x i64> %res
   3888 }
   3889 
   3890 define <2 x i64> @test_mask_andnot_epi64_rmkz_128(<2 x i64> %a, <2 x i64>* %ptr_b, i8 %mask) {
   3891 ; X86-LABEL: test_mask_andnot_epi64_rmkz_128:
   3892 ; X86:       # %bb.0:
   3893 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3894 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   3895 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   3896 ; X86-NEXT:    vpandnq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x00]
   3897 ; X86-NEXT:    retl # encoding: [0xc3]
   3898 ;
   3899 ; X64-LABEL: test_mask_andnot_epi64_rmkz_128:
   3900 ; X64:       # %bb.0:
   3901 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3902 ; X64-NEXT:    vpandnq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0x07]
   3903 ; X64-NEXT:    retq # encoding: [0xc3]
   3904   %b = load <2 x i64>, <2 x i64>* %ptr_b
   3905   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   3906   ret <2 x i64> %res
   3907 }
   3908 
   3909 define <2 x i64> @test_mask_andnot_epi64_rmb_128(<2 x i64> %a, i64* %ptr_b) {
   3910 ; X86-LABEL: test_mask_andnot_epi64_rmb_128:
   3911 ; X86:       # %bb.0:
   3912 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3913 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   3914 ; X86-NEXT:    vpandn %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xdf,0xc1]
   3915 ; X86-NEXT:    retl # encoding: [0xc3]
   3916 ;
   3917 ; X64-LABEL: test_mask_andnot_epi64_rmb_128:
   3918 ; X64:       # %bb.0:
   3919 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xdf,0x07]
   3920 ; X64-NEXT:    retq # encoding: [0xc3]
   3921   %q = load i64, i64* %ptr_b
   3922   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   3923   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   3924   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 -1)
   3925   ret <2 x i64> %res
   3926 }
   3927 
   3928 define <2 x i64> @test_mask_andnot_epi64_rmbk_128(<2 x i64> %a, i64* %ptr_b, <2 x i64> %passThru, i8 %mask) {
   3929 ; X86-LABEL: test_mask_andnot_epi64_rmbk_128:
   3930 ; X86:       # %bb.0:
   3931 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3932 ; X86-NEXT:    vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
   3933 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3934 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3935 ; X86-NEXT:    vpandnq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xdf,0xca]
   3936 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3937 ; X86-NEXT:    retl # encoding: [0xc3]
   3938 ;
   3939 ; X64-LABEL: test_mask_andnot_epi64_rmbk_128:
   3940 ; X64:       # %bb.0:
   3941 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3942 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xdf,0x0f]
   3943 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   3944 ; X64-NEXT:    retq # encoding: [0xc3]
   3945   %q = load i64, i64* %ptr_b
   3946   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   3947   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   3948   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passThru, i8 %mask)
   3949   ret <2 x i64> %res
   3950 }
   3951 
   3952 define <2 x i64> @test_mask_andnot_epi64_rmbkz_128(<2 x i64> %a, i64* %ptr_b, i8 %mask) {
   3953 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_128:
   3954 ; X86:       # %bb.0:
   3955 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   3956 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   3957 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   3958 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3959 ; X86-NEXT:    vpandnq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xdf,0xc1]
   3960 ; X86-NEXT:    retl # encoding: [0xc3]
   3961 ;
   3962 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_128:
   3963 ; X64:       # %bb.0:
   3964 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   3965 ; X64-NEXT:    vpandnq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xdf,0x07]
   3966 ; X64-NEXT:    retq # encoding: [0xc3]
   3967   %q = load i64, i64* %ptr_b
   3968   %vecinit.i = insertelement <2 x i64> undef, i64 %q, i32 0
   3969   %b = shufflevector <2 x i64> %vecinit.i, <2 x i64> undef, <2 x i32> zeroinitializer
   3970   %res = call <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64> %a, <2 x i64> %b, <2 x i64> zeroinitializer, i8 %mask)
   3971   ret <2 x i64> %res
   3972 }
   3973 
   3974 declare <2 x i64> @llvm.x86.avx512.mask.pandn.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   3975 
   3976 define <4 x i64> @test_mask_andnot_epi64_rr_256(<4 x i64> %a, <4 x i64> %b) {
   3977 ; CHECK-LABEL: test_mask_andnot_epi64_rr_256:
   3978 ; CHECK:       # %bb.0:
   3979 ; CHECK-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
   3980 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   3981   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   3982   ret <4 x i64> %res
   3983 }
   3984 
   3985 define <4 x i64> @test_mask_andnot_epi64_rrk_256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask) {
   3986 ; X86-LABEL: test_mask_andnot_epi64_rrk_256:
   3987 ; X86:       # %bb.0:
   3988 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   3989 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   3990 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
   3991 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3992 ; X86-NEXT:    retl # encoding: [0xc3]
   3993 ;
   3994 ; X64-LABEL: test_mask_andnot_epi64_rrk_256:
   3995 ; X64:       # %bb.0:
   3996 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   3997 ; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xd1]
   3998 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   3999 ; X64-NEXT:    retq # encoding: [0xc3]
   4000   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   4001   ret <4 x i64> %res
   4002 }
   4003 
   4004 define <4 x i64> @test_mask_andnot_epi64_rrkz_256(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
   4005 ; X86-LABEL: test_mask_andnot_epi64_rrkz_256:
   4006 ; X86:       # %bb.0:
   4007 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4008 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4009 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
   4010 ; X86-NEXT:    retl # encoding: [0xc3]
   4011 ;
   4012 ; X64-LABEL: test_mask_andnot_epi64_rrkz_256:
   4013 ; X64:       # %bb.0:
   4014 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4015 ; X64-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
   4016 ; X64-NEXT:    retq # encoding: [0xc3]
   4017   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   4018   ret <4 x i64> %res
   4019 }
   4020 
   4021 define <4 x i64> @test_mask_andnot_epi64_rm_256(<4 x i64> %a, <4 x i64>* %ptr_b) {
   4022 ; X86-LABEL: test_mask_andnot_epi64_rm_256:
   4023 ; X86:       # %bb.0:
   4024 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4025 ; X86-NEXT:    vandnps (%eax), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x00]
   4026 ; X86-NEXT:    retl # encoding: [0xc3]
   4027 ;
   4028 ; X64-LABEL: test_mask_andnot_epi64_rm_256:
   4029 ; X64:       # %bb.0:
   4030 ; X64-NEXT:    vandnps (%rdi), %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0x07]
   4031 ; X64-NEXT:    retq # encoding: [0xc3]
   4032   %b = load <4 x i64>, <4 x i64>* %ptr_b
   4033   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   4034   ret <4 x i64> %res
   4035 }
   4036 
   4037 define <4 x i64> @test_mask_andnot_epi64_rmk_256(<4 x i64> %a, <4 x i64>* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   4038 ; X86-LABEL: test_mask_andnot_epi64_rmk_256:
   4039 ; X86:       # %bb.0:
   4040 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4041 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4042 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4043 ; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x08]
   4044 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4045 ; X86-NEXT:    retl # encoding: [0xc3]
   4046 ;
   4047 ; X64-LABEL: test_mask_andnot_epi64_rmk_256:
   4048 ; X64:       # %bb.0:
   4049 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4050 ; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0x0f]
   4051 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4052 ; X64-NEXT:    retq # encoding: [0xc3]
   4053   %b = load <4 x i64>, <4 x i64>* %ptr_b
   4054   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   4055   ret <4 x i64> %res
   4056 }
   4057 
   4058 define <4 x i64> @test_mask_andnot_epi64_rmkz_256(<4 x i64> %a, <4 x i64>* %ptr_b, i8 %mask) {
   4059 ; X86-LABEL: test_mask_andnot_epi64_rmkz_256:
   4060 ; X86:       # %bb.0:
   4061 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4062 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4063 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4064 ; X86-NEXT:    vpandnq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x00]
   4065 ; X86-NEXT:    retl # encoding: [0xc3]
   4066 ;
   4067 ; X64-LABEL: test_mask_andnot_epi64_rmkz_256:
   4068 ; X64:       # %bb.0:
   4069 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4070 ; X64-NEXT:    vpandnq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0x07]
   4071 ; X64-NEXT:    retq # encoding: [0xc3]
   4072   %b = load <4 x i64>, <4 x i64>* %ptr_b
   4073   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   4074   ret <4 x i64> %res
   4075 }
   4076 
   4077 define <4 x i64> @test_mask_andnot_epi64_rmb_256(<4 x i64> %a, i64* %ptr_b) {
   4078 ; X86-LABEL: test_mask_andnot_epi64_rmb_256:
   4079 ; X86:       # %bb.0:
   4080 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4081 ; X86-NEXT:    vmovsd (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfb,0x10,0x08]
   4082 ; X86-NEXT:    # xmm1 = mem[0],zero
   4083 ; X86-NEXT:    vbroadcastsd %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x19,0xc9]
   4084 ; X86-NEXT:    vandnps %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfc,0x55,0xc1]
   4085 ; X86-NEXT:    retl # encoding: [0xc3]
   4086 ;
   4087 ; X64-LABEL: test_mask_andnot_epi64_rmb_256:
   4088 ; X64:       # %bb.0:
   4089 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xdf,0x07]
   4090 ; X64-NEXT:    retq # encoding: [0xc3]
   4091   %q = load i64, i64* %ptr_b
   4092   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   4093   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   4094   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 -1)
   4095   ret <4 x i64> %res
   4096 }
   4097 
   4098 define <4 x i64> @test_mask_andnot_epi64_rmbk_256(<4 x i64> %a, i64* %ptr_b, <4 x i64> %passThru, i8 %mask) {
   4099 ; X86-LABEL: test_mask_andnot_epi64_rmbk_256:
   4100 ; X86:       # %bb.0:
   4101 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4102 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   4103 ; X86-NEXT:    # xmm2 = mem[0],zero
   4104 ; X86-NEXT:    vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
   4105 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4106 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4107 ; X86-NEXT:    vpandnq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xdf,0xca]
   4108 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4109 ; X86-NEXT:    retl # encoding: [0xc3]
   4110 ;
   4111 ; X64-LABEL: test_mask_andnot_epi64_rmbk_256:
   4112 ; X64:       # %bb.0:
   4113 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4114 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xdf,0x0f]
   4115 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4116 ; X64-NEXT:    retq # encoding: [0xc3]
   4117   %q = load i64, i64* %ptr_b
   4118   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   4119   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   4120   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passThru, i8 %mask)
   4121   ret <4 x i64> %res
   4122 }
   4123 
   4124 define <4 x i64> @test_mask_andnot_epi64_rmbkz_256(<4 x i64> %a, i64* %ptr_b, i8 %mask) {
   4125 ; X86-LABEL: test_mask_andnot_epi64_rmbkz_256:
   4126 ; X86:       # %bb.0:
   4127 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4128 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   4129 ; X86-NEXT:    # xmm1 = mem[0],zero
   4130 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   4131 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   4132 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4133 ; X86-NEXT:    vpandnq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xdf,0xc1]
   4134 ; X86-NEXT:    retl # encoding: [0xc3]
   4135 ;
   4136 ; X64-LABEL: test_mask_andnot_epi64_rmbkz_256:
   4137 ; X64:       # %bb.0:
   4138 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4139 ; X64-NEXT:    vpandnq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xdf,0x07]
   4140 ; X64-NEXT:    retq # encoding: [0xc3]
   4141   %q = load i64, i64* %ptr_b
   4142   %vecinit.i = insertelement <4 x i64> undef, i64 %q, i32 0
   4143   %b = shufflevector <4 x i64> %vecinit.i, <4 x i64> undef, <4 x i32> zeroinitializer
   4144   %res = call <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64> %a, <4 x i64> %b, <4 x i64> zeroinitializer, i8 %mask)
   4145   ret <4 x i64> %res
   4146 }
   4147 
   4148 declare <4 x i64> @llvm.x86.avx512.mask.pandn.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   4149 
   4150 define <4 x i32> @test_mask_add_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   4151 ; CHECK-LABEL: test_mask_add_epi32_rr_128:
   4152 ; CHECK:       # %bb.0:
   4153 ; CHECK-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc1]
   4154 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4155   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4156   ret <4 x i32> %res
   4157 }
   4158 
   4159 define <4 x i32> @test_mask_add_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   4160 ; X86-LABEL: test_mask_add_epi32_rrk_128:
   4161 ; X86:       # %bb.0:
   4162 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4163 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4164 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
   4165 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4166 ; X86-NEXT:    retl # encoding: [0xc3]
   4167 ;
   4168 ; X64-LABEL: test_mask_add_epi32_rrk_128:
   4169 ; X64:       # %bb.0:
   4170 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4171 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0xd1]
   4172 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4173 ; X64-NEXT:    retq # encoding: [0xc3]
   4174   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4175   ret <4 x i32> %res
   4176 }
   4177 
   4178 define <4 x i32> @test_mask_add_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   4179 ; X86-LABEL: test_mask_add_epi32_rrkz_128:
   4180 ; X86:       # %bb.0:
   4181 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4182 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4183 ; X86-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
   4184 ; X86-NEXT:    retl # encoding: [0xc3]
   4185 ;
   4186 ; X64-LABEL: test_mask_add_epi32_rrkz_128:
   4187 ; X64:       # %bb.0:
   4188 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4189 ; X64-NEXT:    vpaddd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0xc1]
   4190 ; X64-NEXT:    retq # encoding: [0xc3]
   4191   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4192   ret <4 x i32> %res
   4193 }
   4194 
   4195 define <4 x i32> @test_mask_add_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   4196 ; X86-LABEL: test_mask_add_epi32_rm_128:
   4197 ; X86:       # %bb.0:
   4198 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4199 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x00]
   4200 ; X86-NEXT:    retl # encoding: [0xc3]
   4201 ;
   4202 ; X64-LABEL: test_mask_add_epi32_rm_128:
   4203 ; X64:       # %bb.0:
   4204 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0x07]
   4205 ; X64-NEXT:    retq # encoding: [0xc3]
   4206   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4207   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4208   ret <4 x i32> %res
   4209 }
   4210 
   4211 define <4 x i32> @test_mask_add_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   4212 ; X86-LABEL: test_mask_add_epi32_rmk_128:
   4213 ; X86:       # %bb.0:
   4214 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4215 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4216 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4217 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x08]
   4218 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4219 ; X86-NEXT:    retl # encoding: [0xc3]
   4220 ;
   4221 ; X64-LABEL: test_mask_add_epi32_rmk_128:
   4222 ; X64:       # %bb.0:
   4223 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4224 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfe,0x0f]
   4225 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4226 ; X64-NEXT:    retq # encoding: [0xc3]
   4227   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4228   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4229   ret <4 x i32> %res
   4230 }
   4231 
   4232 define <4 x i32> @test_mask_add_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   4233 ; X86-LABEL: test_mask_add_epi32_rmkz_128:
   4234 ; X86:       # %bb.0:
   4235 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4236 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4237 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4238 ; X86-NEXT:    vpaddd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x00]
   4239 ; X86-NEXT:    retl # encoding: [0xc3]
   4240 ;
   4241 ; X64-LABEL: test_mask_add_epi32_rmkz_128:
   4242 ; X64:       # %bb.0:
   4243 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4244 ; X64-NEXT:    vpaddd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfe,0x07]
   4245 ; X64-NEXT:    retq # encoding: [0xc3]
   4246   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4247   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4248   ret <4 x i32> %res
   4249 }
   4250 
   4251 define <4 x i32> @test_mask_add_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   4252 ; X86-LABEL: test_mask_add_epi32_rmb_128:
   4253 ; X86:       # %bb.0:
   4254 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4255 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x00]
   4256 ; X86-NEXT:    retl # encoding: [0xc3]
   4257 ;
   4258 ; X64-LABEL: test_mask_add_epi32_rmb_128:
   4259 ; X64:       # %bb.0:
   4260 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfe,0x07]
   4261 ; X64-NEXT:    retq # encoding: [0xc3]
   4262   %q = load i32, i32* %ptr_b
   4263   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4264   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4265   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4266   ret <4 x i32> %res
   4267 }
   4268 
   4269 define <4 x i32> @test_mask_add_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   4270 ; X86-LABEL: test_mask_add_epi32_rmbk_128:
   4271 ; X86:       # %bb.0:
   4272 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4273 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4274 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4275 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x08]
   4276 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4277 ; X86-NEXT:    retl # encoding: [0xc3]
   4278 ;
   4279 ; X64-LABEL: test_mask_add_epi32_rmbk_128:
   4280 ; X64:       # %bb.0:
   4281 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4282 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfe,0x0f]
   4283 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4284 ; X64-NEXT:    retq # encoding: [0xc3]
   4285   %q = load i32, i32* %ptr_b
   4286   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4287   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4288   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4289   ret <4 x i32> %res
   4290 }
   4291 
   4292 define <4 x i32> @test_mask_add_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   4293 ; X86-LABEL: test_mask_add_epi32_rmbkz_128:
   4294 ; X86:       # %bb.0:
   4295 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4296 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4297 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4298 ; X86-NEXT:    vpaddd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x00]
   4299 ; X86-NEXT:    retl # encoding: [0xc3]
   4300 ;
   4301 ; X64-LABEL: test_mask_add_epi32_rmbkz_128:
   4302 ; X64:       # %bb.0:
   4303 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4304 ; X64-NEXT:    vpaddd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfe,0x07]
   4305 ; X64-NEXT:    retq # encoding: [0xc3]
   4306   %q = load i32, i32* %ptr_b
   4307   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4308   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4309   %res = call <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4310   ret <4 x i32> %res
   4311 }
   4312 
   4313 declare <4 x i32> @llvm.x86.avx512.mask.padd.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   4314 
   4315 define <4 x i32> @test_mask_sub_epi32_rr_128(<4 x i32> %a, <4 x i32> %b) {
   4316 ; CHECK-LABEL: test_mask_sub_epi32_rr_128:
   4317 ; CHECK:       # %bb.0:
   4318 ; CHECK-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0xc1]
   4319 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4320   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4321   ret <4 x i32> %res
   4322 }
   4323 
   4324 define <4 x i32> @test_mask_sub_epi32_rrk_128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask) {
   4325 ; X86-LABEL: test_mask_sub_epi32_rrk_128:
   4326 ; X86:       # %bb.0:
   4327 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4328 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4329 ; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
   4330 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4331 ; X86-NEXT:    retl # encoding: [0xc3]
   4332 ;
   4333 ; X64-LABEL: test_mask_sub_epi32_rrk_128:
   4334 ; X64:       # %bb.0:
   4335 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4336 ; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0xd1]
   4337 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   4338 ; X64-NEXT:    retq # encoding: [0xc3]
   4339   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4340   ret <4 x i32> %res
   4341 }
   4342 
   4343 define <4 x i32> @test_mask_sub_epi32_rrkz_128(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
   4344 ; X86-LABEL: test_mask_sub_epi32_rrkz_128:
   4345 ; X86:       # %bb.0:
   4346 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4347 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4348 ; X86-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
   4349 ; X86-NEXT:    retl # encoding: [0xc3]
   4350 ;
   4351 ; X64-LABEL: test_mask_sub_epi32_rrkz_128:
   4352 ; X64:       # %bb.0:
   4353 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4354 ; X64-NEXT:    vpsubd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0xc1]
   4355 ; X64-NEXT:    retq # encoding: [0xc3]
   4356   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4357   ret <4 x i32> %res
   4358 }
   4359 
   4360 define <4 x i32> @test_mask_sub_epi32_rm_128(<4 x i32> %a, <4 x i32>* %ptr_b) {
   4361 ; X86-LABEL: test_mask_sub_epi32_rm_128:
   4362 ; X86:       # %bb.0:
   4363 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4364 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x00]
   4365 ; X86-NEXT:    retl # encoding: [0xc3]
   4366 ;
   4367 ; X64-LABEL: test_mask_sub_epi32_rm_128:
   4368 ; X64:       # %bb.0:
   4369 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfa,0x07]
   4370 ; X64-NEXT:    retq # encoding: [0xc3]
   4371   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4372   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4373   ret <4 x i32> %res
   4374 }
   4375 
   4376 define <4 x i32> @test_mask_sub_epi32_rmk_128(<4 x i32> %a, <4 x i32>* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   4377 ; X86-LABEL: test_mask_sub_epi32_rmk_128:
   4378 ; X86:       # %bb.0:
   4379 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4380 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4381 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4382 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x08]
   4383 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4384 ; X86-NEXT:    retl # encoding: [0xc3]
   4385 ;
   4386 ; X64-LABEL: test_mask_sub_epi32_rmk_128:
   4387 ; X64:       # %bb.0:
   4388 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4389 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xfa,0x0f]
   4390 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4391 ; X64-NEXT:    retq # encoding: [0xc3]
   4392   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4393   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4394   ret <4 x i32> %res
   4395 }
   4396 
   4397 define <4 x i32> @test_mask_sub_epi32_rmkz_128(<4 x i32> %a, <4 x i32>* %ptr_b, i8 %mask) {
   4398 ; X86-LABEL: test_mask_sub_epi32_rmkz_128:
   4399 ; X86:       # %bb.0:
   4400 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4401 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4402 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4403 ; X86-NEXT:    vpsubd (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x00]
   4404 ; X86-NEXT:    retl # encoding: [0xc3]
   4405 ;
   4406 ; X64-LABEL: test_mask_sub_epi32_rmkz_128:
   4407 ; X64:       # %bb.0:
   4408 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4409 ; X64-NEXT:    vpsubd (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xfa,0x07]
   4410 ; X64-NEXT:    retq # encoding: [0xc3]
   4411   %b = load <4 x i32>, <4 x i32>* %ptr_b
   4412   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4413   ret <4 x i32> %res
   4414 }
   4415 
   4416 define <4 x i32> @test_mask_sub_epi32_rmb_128(<4 x i32> %a, i32* %ptr_b) {
   4417 ; X86-LABEL: test_mask_sub_epi32_rmb_128:
   4418 ; X86:       # %bb.0:
   4419 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4420 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x00]
   4421 ; X86-NEXT:    retl # encoding: [0xc3]
   4422 ;
   4423 ; X64-LABEL: test_mask_sub_epi32_rmb_128:
   4424 ; X64:       # %bb.0:
   4425 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0x7d,0x18,0xfa,0x07]
   4426 ; X64-NEXT:    retq # encoding: [0xc3]
   4427   %q = load i32, i32* %ptr_b
   4428   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4429   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4430   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 -1)
   4431   ret <4 x i32> %res
   4432 }
   4433 
   4434 define <4 x i32> @test_mask_sub_epi32_rmbk_128(<4 x i32> %a, i32* %ptr_b, <4 x i32> %passThru, i8 %mask) {
   4435 ; X86-LABEL: test_mask_sub_epi32_rmbk_128:
   4436 ; X86:       # %bb.0:
   4437 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4438 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4439 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4440 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x08]
   4441 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4442 ; X86-NEXT:    retl # encoding: [0xc3]
   4443 ;
   4444 ; X64-LABEL: test_mask_sub_epi32_rmbk_128:
   4445 ; X64:       # %bb.0:
   4446 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4447 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x19,0xfa,0x0f]
   4448 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   4449 ; X64-NEXT:    retq # encoding: [0xc3]
   4450   %q = load i32, i32* %ptr_b
   4451   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4452   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4453   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passThru, i8 %mask)
   4454   ret <4 x i32> %res
   4455 }
   4456 
   4457 define <4 x i32> @test_mask_sub_epi32_rmbkz_128(<4 x i32> %a, i32* %ptr_b, i8 %mask) {
   4458 ; X86-LABEL: test_mask_sub_epi32_rmbkz_128:
   4459 ; X86:       # %bb.0:
   4460 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4461 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4462 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4463 ; X86-NEXT:    vpsubd (%eax){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x00]
   4464 ; X86-NEXT:    retl # encoding: [0xc3]
   4465 ;
   4466 ; X64-LABEL: test_mask_sub_epi32_rmbkz_128:
   4467 ; X64:       # %bb.0:
   4468 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4469 ; X64-NEXT:    vpsubd (%rdi){1to4}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x99,0xfa,0x07]
   4470 ; X64-NEXT:    retq # encoding: [0xc3]
   4471   %q = load i32, i32* %ptr_b
   4472   %vecinit.i = insertelement <4 x i32> undef, i32 %q, i32 0
   4473   %b = shufflevector <4 x i32> %vecinit.i, <4 x i32> undef, <4 x i32> zeroinitializer
   4474   %res = call <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32> %a, <4 x i32> %b, <4 x i32> zeroinitializer, i8 %mask)
   4475   ret <4 x i32> %res
   4476 }
   4477 
   4478 declare <4 x i32> @llvm.x86.avx512.mask.psub.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   4479 
   4480 define <8 x i32> @test_mask_sub_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   4481 ; CHECK-LABEL: test_mask_sub_epi32_rr_256:
   4482 ; CHECK:       # %bb.0:
   4483 ; CHECK-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0xc1]
   4484 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4485   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4486   ret <8 x i32> %res
   4487 }
   4488 
   4489 define <8 x i32> @test_mask_sub_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   4490 ; X86-LABEL: test_mask_sub_epi32_rrk_256:
   4491 ; X86:       # %bb.0:
   4492 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4493 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4494 ; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
   4495 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4496 ; X86-NEXT:    retl # encoding: [0xc3]
   4497 ;
   4498 ; X64-LABEL: test_mask_sub_epi32_rrk_256:
   4499 ; X64:       # %bb.0:
   4500 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4501 ; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0xd1]
   4502 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4503 ; X64-NEXT:    retq # encoding: [0xc3]
   4504   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4505   ret <8 x i32> %res
   4506 }
   4507 
   4508 define <8 x i32> @test_mask_sub_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   4509 ; X86-LABEL: test_mask_sub_epi32_rrkz_256:
   4510 ; X86:       # %bb.0:
   4511 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4512 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4513 ; X86-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
   4514 ; X86-NEXT:    retl # encoding: [0xc3]
   4515 ;
   4516 ; X64-LABEL: test_mask_sub_epi32_rrkz_256:
   4517 ; X64:       # %bb.0:
   4518 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4519 ; X64-NEXT:    vpsubd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0xc1]
   4520 ; X64-NEXT:    retq # encoding: [0xc3]
   4521   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4522   ret <8 x i32> %res
   4523 }
   4524 
   4525 define <8 x i32> @test_mask_sub_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   4526 ; X86-LABEL: test_mask_sub_epi32_rm_256:
   4527 ; X86:       # %bb.0:
   4528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4529 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x00]
   4530 ; X86-NEXT:    retl # encoding: [0xc3]
   4531 ;
   4532 ; X64-LABEL: test_mask_sub_epi32_rm_256:
   4533 ; X64:       # %bb.0:
   4534 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfa,0x07]
   4535 ; X64-NEXT:    retq # encoding: [0xc3]
   4536   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4537   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4538   ret <8 x i32> %res
   4539 }
   4540 
   4541 define <8 x i32> @test_mask_sub_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   4542 ; X86-LABEL: test_mask_sub_epi32_rmk_256:
   4543 ; X86:       # %bb.0:
   4544 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4545 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4546 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4547 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x08]
   4548 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4549 ; X86-NEXT:    retl # encoding: [0xc3]
   4550 ;
   4551 ; X64-LABEL: test_mask_sub_epi32_rmk_256:
   4552 ; X64:       # %bb.0:
   4553 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4554 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfa,0x0f]
   4555 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4556 ; X64-NEXT:    retq # encoding: [0xc3]
   4557   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4558   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4559   ret <8 x i32> %res
   4560 }
   4561 
   4562 define <8 x i32> @test_mask_sub_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   4563 ; X86-LABEL: test_mask_sub_epi32_rmkz_256:
   4564 ; X86:       # %bb.0:
   4565 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4566 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4567 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4568 ; X86-NEXT:    vpsubd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x00]
   4569 ; X86-NEXT:    retl # encoding: [0xc3]
   4570 ;
   4571 ; X64-LABEL: test_mask_sub_epi32_rmkz_256:
   4572 ; X64:       # %bb.0:
   4573 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4574 ; X64-NEXT:    vpsubd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfa,0x07]
   4575 ; X64-NEXT:    retq # encoding: [0xc3]
   4576   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4577   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4578   ret <8 x i32> %res
   4579 }
   4580 
   4581 define <8 x i32> @test_mask_sub_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   4582 ; X86-LABEL: test_mask_sub_epi32_rmb_256:
   4583 ; X86:       # %bb.0:
   4584 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4585 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x00]
   4586 ; X86-NEXT:    retl # encoding: [0xc3]
   4587 ;
   4588 ; X64-LABEL: test_mask_sub_epi32_rmb_256:
   4589 ; X64:       # %bb.0:
   4590 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfa,0x07]
   4591 ; X64-NEXT:    retq # encoding: [0xc3]
   4592   %q = load i32, i32* %ptr_b
   4593   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4594   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4595   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4596   ret <8 x i32> %res
   4597 }
   4598 
   4599 define <8 x i32> @test_mask_sub_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   4600 ; X86-LABEL: test_mask_sub_epi32_rmbk_256:
   4601 ; X86:       # %bb.0:
   4602 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4603 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4604 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4605 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x08]
   4606 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4607 ; X86-NEXT:    retl # encoding: [0xc3]
   4608 ;
   4609 ; X64-LABEL: test_mask_sub_epi32_rmbk_256:
   4610 ; X64:       # %bb.0:
   4611 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4612 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfa,0x0f]
   4613 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4614 ; X64-NEXT:    retq # encoding: [0xc3]
   4615   %q = load i32, i32* %ptr_b
   4616   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4617   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4618   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4619   ret <8 x i32> %res
   4620 }
   4621 
   4622 define <8 x i32> @test_mask_sub_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   4623 ; X86-LABEL: test_mask_sub_epi32_rmbkz_256:
   4624 ; X86:       # %bb.0:
   4625 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4626 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4627 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4628 ; X86-NEXT:    vpsubd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x00]
   4629 ; X86-NEXT:    retl # encoding: [0xc3]
   4630 ;
   4631 ; X64-LABEL: test_mask_sub_epi32_rmbkz_256:
   4632 ; X64:       # %bb.0:
   4633 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4634 ; X64-NEXT:    vpsubd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfa,0x07]
   4635 ; X64-NEXT:    retq # encoding: [0xc3]
   4636   %q = load i32, i32* %ptr_b
   4637   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4638   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4639   %res = call <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4640   ret <8 x i32> %res
   4641 }
   4642 
   4643 declare <8 x i32> @llvm.x86.avx512.mask.psub.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   4644 
   4645 define <8 x i32> @test_mask_add_epi32_rr_256(<8 x i32> %a, <8 x i32> %b) {
   4646 ; CHECK-LABEL: test_mask_add_epi32_rr_256:
   4647 ; CHECK:       # %bb.0:
   4648 ; CHECK-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc1]
   4649 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4650   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4651   ret <8 x i32> %res
   4652 }
   4653 
   4654 define <8 x i32> @test_mask_add_epi32_rrk_256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask) {
   4655 ; X86-LABEL: test_mask_add_epi32_rrk_256:
   4656 ; X86:       # %bb.0:
   4657 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4658 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4659 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
   4660 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4661 ; X86-NEXT:    retl # encoding: [0xc3]
   4662 ;
   4663 ; X64-LABEL: test_mask_add_epi32_rrk_256:
   4664 ; X64:       # %bb.0:
   4665 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4666 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0xd1]
   4667 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   4668 ; X64-NEXT:    retq # encoding: [0xc3]
   4669   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4670   ret <8 x i32> %res
   4671 }
   4672 
   4673 define <8 x i32> @test_mask_add_epi32_rrkz_256(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
   4674 ; X86-LABEL: test_mask_add_epi32_rrkz_256:
   4675 ; X86:       # %bb.0:
   4676 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4677 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4678 ; X86-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
   4679 ; X86-NEXT:    retl # encoding: [0xc3]
   4680 ;
   4681 ; X64-LABEL: test_mask_add_epi32_rrkz_256:
   4682 ; X64:       # %bb.0:
   4683 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4684 ; X64-NEXT:    vpaddd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0xc1]
   4685 ; X64-NEXT:    retq # encoding: [0xc3]
   4686   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4687   ret <8 x i32> %res
   4688 }
   4689 
   4690 define <8 x i32> @test_mask_add_epi32_rm_256(<8 x i32> %a, <8 x i32>* %ptr_b) {
   4691 ; X86-LABEL: test_mask_add_epi32_rm_256:
   4692 ; X86:       # %bb.0:
   4693 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4694 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x00]
   4695 ; X86-NEXT:    retl # encoding: [0xc3]
   4696 ;
   4697 ; X64-LABEL: test_mask_add_epi32_rm_256:
   4698 ; X64:       # %bb.0:
   4699 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0x07]
   4700 ; X64-NEXT:    retq # encoding: [0xc3]
   4701   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4702   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4703   ret <8 x i32> %res
   4704 }
   4705 
   4706 define <8 x i32> @test_mask_add_epi32_rmk_256(<8 x i32> %a, <8 x i32>* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   4707 ; X86-LABEL: test_mask_add_epi32_rmk_256:
   4708 ; X86:       # %bb.0:
   4709 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4710 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4711 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4712 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x08]
   4713 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4714 ; X86-NEXT:    retl # encoding: [0xc3]
   4715 ;
   4716 ; X64-LABEL: test_mask_add_epi32_rmk_256:
   4717 ; X64:       # %bb.0:
   4718 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4719 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xfe,0x0f]
   4720 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4721 ; X64-NEXT:    retq # encoding: [0xc3]
   4722   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4723   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4724   ret <8 x i32> %res
   4725 }
   4726 
   4727 define <8 x i32> @test_mask_add_epi32_rmkz_256(<8 x i32> %a, <8 x i32>* %ptr_b, i8 %mask) {
   4728 ; X86-LABEL: test_mask_add_epi32_rmkz_256:
   4729 ; X86:       # %bb.0:
   4730 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4731 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4732 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4733 ; X86-NEXT:    vpaddd (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x00]
   4734 ; X86-NEXT:    retl # encoding: [0xc3]
   4735 ;
   4736 ; X64-LABEL: test_mask_add_epi32_rmkz_256:
   4737 ; X64:       # %bb.0:
   4738 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4739 ; X64-NEXT:    vpaddd (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xfe,0x07]
   4740 ; X64-NEXT:    retq # encoding: [0xc3]
   4741   %b = load <8 x i32>, <8 x i32>* %ptr_b
   4742   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4743   ret <8 x i32> %res
   4744 }
   4745 
   4746 define <8 x i32> @test_mask_add_epi32_rmb_256(<8 x i32> %a, i32* %ptr_b) {
   4747 ; X86-LABEL: test_mask_add_epi32_rmb_256:
   4748 ; X86:       # %bb.0:
   4749 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4750 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x00]
   4751 ; X86-NEXT:    retl # encoding: [0xc3]
   4752 ;
   4753 ; X64-LABEL: test_mask_add_epi32_rmb_256:
   4754 ; X64:       # %bb.0:
   4755 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0x7d,0x38,0xfe,0x07]
   4756 ; X64-NEXT:    retq # encoding: [0xc3]
   4757   %q = load i32, i32* %ptr_b
   4758   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4759   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4760   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 -1)
   4761   ret <8 x i32> %res
   4762 }
   4763 
   4764 define <8 x i32> @test_mask_add_epi32_rmbk_256(<8 x i32> %a, i32* %ptr_b, <8 x i32> %passThru, i8 %mask) {
   4765 ; X86-LABEL: test_mask_add_epi32_rmbk_256:
   4766 ; X86:       # %bb.0:
   4767 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4768 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4769 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4770 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x08]
   4771 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4772 ; X86-NEXT:    retl # encoding: [0xc3]
   4773 ;
   4774 ; X64-LABEL: test_mask_add_epi32_rmbk_256:
   4775 ; X64:       # %bb.0:
   4776 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4777 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x39,0xfe,0x0f]
   4778 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   4779 ; X64-NEXT:    retq # encoding: [0xc3]
   4780   %q = load i32, i32* %ptr_b
   4781   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4782   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4783   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passThru, i8 %mask)
   4784   ret <8 x i32> %res
   4785 }
   4786 
   4787 define <8 x i32> @test_mask_add_epi32_rmbkz_256(<8 x i32> %a, i32* %ptr_b, i8 %mask) {
   4788 ; X86-LABEL: test_mask_add_epi32_rmbkz_256:
   4789 ; X86:       # %bb.0:
   4790 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   4791 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   4792 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   4793 ; X86-NEXT:    vpaddd (%eax){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x00]
   4794 ; X86-NEXT:    retl # encoding: [0xc3]
   4795 ;
   4796 ; X64-LABEL: test_mask_add_epi32_rmbkz_256:
   4797 ; X64:       # %bb.0:
   4798 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   4799 ; X64-NEXT:    vpaddd (%rdi){1to8}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xb9,0xfe,0x07]
   4800 ; X64-NEXT:    retq # encoding: [0xc3]
   4801   %q = load i32, i32* %ptr_b
   4802   %vecinit.i = insertelement <8 x i32> undef, i32 %q, i32 0
   4803   %b = shufflevector <8 x i32> %vecinit.i, <8 x i32> undef, <8 x i32> zeroinitializer
   4804   %res = call <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32> %a, <8 x i32> %b, <8 x i32> zeroinitializer, i8 %mask)
   4805   ret <8 x i32> %res
   4806 }
   4807 
   4808 declare <8 x i32> @llvm.x86.avx512.mask.padd.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   4809 
   4810 define <8 x float> @test_mm512_maskz_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   4811 ; X86-LABEL: test_mm512_maskz_add_ps_256:
   4812 ; X86:       # %bb.0:
   4813 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4814 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4815 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
   4816 ; X86-NEXT:    retl # encoding: [0xc3]
   4817 ;
   4818 ; X64-LABEL: test_mm512_maskz_add_ps_256:
   4819 ; X64:       # %bb.0:
   4820 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4821 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x58,0xc1]
   4822 ; X64-NEXT:    retq # encoding: [0xc3]
   4823   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   4824   ret <8 x float> %res
   4825 }
   4826 
   4827 define <8 x float> @test_mm512_mask_add_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   4828 ; X86-LABEL: test_mm512_mask_add_ps_256:
   4829 ; X86:       # %bb.0:
   4830 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4831 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4832 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
   4833 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   4834 ; X86-NEXT:    retl # encoding: [0xc3]
   4835 ;
   4836 ; X64-LABEL: test_mm512_mask_add_ps_256:
   4837 ; X64:       # %bb.0:
   4838 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4839 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x58,0xd1]
   4840 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   4841 ; X64-NEXT:    retq # encoding: [0xc3]
   4842   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   4843   ret <8 x float> %res
   4844 }
   4845 
   4846 define <8 x float> @test_mm512_add_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   4847 ; CHECK-LABEL: test_mm512_add_ps_256:
   4848 ; CHECK:       # %bb.0:
   4849 ; CHECK-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
   4850 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4851   %res = call <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   4852   ret <8 x float> %res
   4853 }
   4854 declare <8 x float> @llvm.x86.avx512.mask.add.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   4855 
   4856 define <4 x float> @test_mm512_maskz_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   4857 ; X86-LABEL: test_mm512_maskz_add_ps_128:
   4858 ; X86:       # %bb.0:
   4859 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4860 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4861 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
   4862 ; X86-NEXT:    retl # encoding: [0xc3]
   4863 ;
   4864 ; X64-LABEL: test_mm512_maskz_add_ps_128:
   4865 ; X64:       # %bb.0:
   4866 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4867 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x58,0xc1]
   4868 ; X64-NEXT:    retq # encoding: [0xc3]
   4869   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   4870   ret <4 x float> %res
   4871 }
   4872 
   4873 define <4 x float> @test_mm512_mask_add_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   4874 ; X86-LABEL: test_mm512_mask_add_ps_128:
   4875 ; X86:       # %bb.0:
   4876 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4877 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4878 ; X86-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
   4879 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   4880 ; X86-NEXT:    retl # encoding: [0xc3]
   4881 ;
   4882 ; X64-LABEL: test_mm512_mask_add_ps_128:
   4883 ; X64:       # %bb.0:
   4884 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4885 ; X64-NEXT:    vaddps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x58,0xd1]
   4886 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   4887 ; X64-NEXT:    retq # encoding: [0xc3]
   4888   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   4889   ret <4 x float> %res
   4890 }
   4891 
   4892 define <4 x float> @test_mm512_add_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   4893 ; CHECK-LABEL: test_mm512_add_ps_128:
   4894 ; CHECK:       # %bb.0:
   4895 ; CHECK-NEXT:    vaddps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x58,0xc1]
   4896 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4897   %res = call <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   4898   ret <4 x float> %res
   4899 }
   4900 declare <4 x float> @llvm.x86.avx512.mask.add.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   4901 
   4902 define <8 x float> @test_mm512_maskz_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   4903 ; X86-LABEL: test_mm512_maskz_sub_ps_256:
   4904 ; X86:       # %bb.0:
   4905 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4906 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4907 ; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
   4908 ; X86-NEXT:    retl # encoding: [0xc3]
   4909 ;
   4910 ; X64-LABEL: test_mm512_maskz_sub_ps_256:
   4911 ; X64:       # %bb.0:
   4912 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4913 ; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5c,0xc1]
   4914 ; X64-NEXT:    retq # encoding: [0xc3]
   4915   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   4916   ret <8 x float> %res
   4917 }
   4918 
   4919 define <8 x float> @test_mm512_mask_sub_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   4920 ; X86-LABEL: test_mm512_mask_sub_ps_256:
   4921 ; X86:       # %bb.0:
   4922 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4923 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4924 ; X86-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
   4925 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   4926 ; X86-NEXT:    retl # encoding: [0xc3]
   4927 ;
   4928 ; X64-LABEL: test_mm512_mask_sub_ps_256:
   4929 ; X64:       # %bb.0:
   4930 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4931 ; X64-NEXT:    vsubps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5c,0xd1]
   4932 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   4933 ; X64-NEXT:    retq # encoding: [0xc3]
   4934   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   4935   ret <8 x float> %res
   4936 }
   4937 
   4938 define <8 x float> @test_mm512_sub_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   4939 ; CHECK-LABEL: test_mm512_sub_ps_256:
   4940 ; CHECK:       # %bb.0:
   4941 ; CHECK-NEXT:    vsubps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5c,0xc1]
   4942 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4943   %res = call <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   4944   ret <8 x float> %res
   4945 }
   4946 declare <8 x float> @llvm.x86.avx512.mask.sub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   4947 
   4948 define <4 x float> @test_mm512_maskz_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   4949 ; X86-LABEL: test_mm512_maskz_sub_ps_128:
   4950 ; X86:       # %bb.0:
   4951 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4952 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4953 ; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
   4954 ; X86-NEXT:    retl # encoding: [0xc3]
   4955 ;
   4956 ; X64-LABEL: test_mm512_maskz_sub_ps_128:
   4957 ; X64:       # %bb.0:
   4958 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4959 ; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5c,0xc1]
   4960 ; X64-NEXT:    retq # encoding: [0xc3]
   4961   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   4962   ret <4 x float> %res
   4963 }
   4964 
   4965 define <4 x float> @test_mm512_mask_sub_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   4966 ; X86-LABEL: test_mm512_mask_sub_ps_128:
   4967 ; X86:       # %bb.0:
   4968 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4969 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4970 ; X86-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
   4971 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   4972 ; X86-NEXT:    retl # encoding: [0xc3]
   4973 ;
   4974 ; X64-LABEL: test_mm512_mask_sub_ps_128:
   4975 ; X64:       # %bb.0:
   4976 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   4977 ; X64-NEXT:    vsubps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5c,0xd1]
   4978 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   4979 ; X64-NEXT:    retq # encoding: [0xc3]
   4980   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   4981   ret <4 x float> %res
   4982 }
   4983 
   4984 define <4 x float> @test_mm512_sub_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   4985 ; CHECK-LABEL: test_mm512_sub_ps_128:
   4986 ; CHECK:       # %bb.0:
   4987 ; CHECK-NEXT:    vsubps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5c,0xc1]
   4988 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   4989   %res = call <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   4990   ret <4 x float> %res
   4991 }
   4992 declare <4 x float> @llvm.x86.avx512.mask.sub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   4993 
   4994 define <8 x float> @test_mm512_maskz_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   4995 ; X86-LABEL: test_mm512_maskz_mul_ps_256:
   4996 ; X86:       # %bb.0:
   4997 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   4998 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   4999 ; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
   5000 ; X86-NEXT:    retl # encoding: [0xc3]
   5001 ;
   5002 ; X64-LABEL: test_mm512_maskz_mul_ps_256:
   5003 ; X64:       # %bb.0:
   5004 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5005 ; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x59,0xc1]
   5006 ; X64-NEXT:    retq # encoding: [0xc3]
   5007   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   5008   ret <8 x float> %res
   5009 }
   5010 
   5011 define <8 x float> @test_mm512_mask_mul_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   5012 ; X86-LABEL: test_mm512_mask_mul_ps_256:
   5013 ; X86:       # %bb.0:
   5014 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5015 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5016 ; X86-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
   5017 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   5018 ; X86-NEXT:    retl # encoding: [0xc3]
   5019 ;
   5020 ; X64-LABEL: test_mm512_mask_mul_ps_256:
   5021 ; X64:       # %bb.0:
   5022 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5023 ; X64-NEXT:    vmulps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x59,0xd1]
   5024 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   5025 ; X64-NEXT:    retq # encoding: [0xc3]
   5026   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   5027   ret <8 x float> %res
   5028 }
   5029 
   5030 define <8 x float> @test_mm512_mul_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   5031 ; CHECK-LABEL: test_mm512_mul_ps_256:
   5032 ; CHECK:       # %bb.0:
   5033 ; CHECK-NEXT:    vmulps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x59,0xc1]
   5034 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5035   %res = call <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   5036   ret <8 x float> %res
   5037 }
   5038 declare <8 x float> @llvm.x86.avx512.mask.mul.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   5039 
   5040 define <4 x float> @test_mm512_maskz_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   5041 ; X86-LABEL: test_mm512_maskz_mul_ps_128:
   5042 ; X86:       # %bb.0:
   5043 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5044 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5045 ; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
   5046 ; X86-NEXT:    retl # encoding: [0xc3]
   5047 ;
   5048 ; X64-LABEL: test_mm512_maskz_mul_ps_128:
   5049 ; X64:       # %bb.0:
   5050 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5051 ; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x59,0xc1]
   5052 ; X64-NEXT:    retq # encoding: [0xc3]
   5053   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   5054   ret <4 x float> %res
   5055 }
   5056 
   5057 define <4 x float> @test_mm512_mask_mul_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   5058 ; X86-LABEL: test_mm512_mask_mul_ps_128:
   5059 ; X86:       # %bb.0:
   5060 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5061 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5062 ; X86-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
   5063 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   5064 ; X86-NEXT:    retl # encoding: [0xc3]
   5065 ;
   5066 ; X64-LABEL: test_mm512_mask_mul_ps_128:
   5067 ; X64:       # %bb.0:
   5068 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5069 ; X64-NEXT:    vmulps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x59,0xd1]
   5070 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   5071 ; X64-NEXT:    retq # encoding: [0xc3]
   5072   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   5073   ret <4 x float> %res
   5074 }
   5075 
   5076 define <4 x float> @test_mm512_mul_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   5077 ; CHECK-LABEL: test_mm512_mul_ps_128:
   5078 ; CHECK:       # %bb.0:
   5079 ; CHECK-NEXT:    vmulps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x59,0xc1]
   5080 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5081   %res = call <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   5082   ret <4 x float> %res
   5083 }
   5084 declare <4 x float> @llvm.x86.avx512.mask.mul.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   5085 
   5086 define <8 x float> @test_mm512_maskz_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   5087 ; X86-LABEL: test_mm512_maskz_div_ps_256:
   5088 ; X86:       # %bb.0:
   5089 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5090 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5091 ; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
   5092 ; X86-NEXT:    retl # encoding: [0xc3]
   5093 ;
   5094 ; X64-LABEL: test_mm512_maskz_div_ps_256:
   5095 ; X64:       # %bb.0:
   5096 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5097 ; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5e,0xc1]
   5098 ; X64-NEXT:    retq # encoding: [0xc3]
   5099   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   5100   ret <8 x float> %res
   5101 }
   5102 
   5103 define <8 x float> @test_mm512_mask_div_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   5104 ; X86-LABEL: test_mm512_mask_div_ps_256:
   5105 ; X86:       # %bb.0:
   5106 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5107 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5108 ; X86-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
   5109 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   5110 ; X86-NEXT:    retl # encoding: [0xc3]
   5111 ;
   5112 ; X64-LABEL: test_mm512_mask_div_ps_256:
   5113 ; X64:       # %bb.0:
   5114 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5115 ; X64-NEXT:    vdivps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5e,0xd1]
   5116 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   5117 ; X64-NEXT:    retq # encoding: [0xc3]
   5118   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   5119   ret <8 x float> %res
   5120 }
   5121 
   5122 define <8 x float> @test_mm512_div_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   5123 ; CHECK-LABEL: test_mm512_div_ps_256:
   5124 ; CHECK:       # %bb.0:
   5125 ; CHECK-NEXT:    vdivps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5e,0xc1]
   5126 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5127   %res = call <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   5128   ret <8 x float> %res
   5129 }
   5130 declare <8 x float> @llvm.x86.avx512.mask.div.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   5131 
   5132 define <4 x float> @test_mm512_maskz_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   5133 ; X86-LABEL: test_mm512_maskz_div_ps_128:
   5134 ; X86:       # %bb.0:
   5135 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5136 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5137 ; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
   5138 ; X86-NEXT:    retl # encoding: [0xc3]
   5139 ;
   5140 ; X64-LABEL: test_mm512_maskz_div_ps_128:
   5141 ; X64:       # %bb.0:
   5142 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5143 ; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5e,0xc1]
   5144 ; X64-NEXT:    retq # encoding: [0xc3]
   5145   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   5146   ret <4 x float> %res
   5147 }
   5148 
   5149 define <4 x float> @test_mm512_mask_div_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   5150 ; X86-LABEL: test_mm512_mask_div_ps_128:
   5151 ; X86:       # %bb.0:
   5152 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5153 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5154 ; X86-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
   5155 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   5156 ; X86-NEXT:    retl # encoding: [0xc3]
   5157 ;
   5158 ; X64-LABEL: test_mm512_mask_div_ps_128:
   5159 ; X64:       # %bb.0:
   5160 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5161 ; X64-NEXT:    vdivps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5e,0xd1]
   5162 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   5163 ; X64-NEXT:    retq # encoding: [0xc3]
   5164   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   5165   ret <4 x float> %res
   5166 }
   5167 
   5168 define <4 x float> @test_mm512_div_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   5169 ; CHECK-LABEL: test_mm512_div_ps_128:
   5170 ; CHECK:       # %bb.0:
   5171 ; CHECK-NEXT:    vdivps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5e,0xc1]
   5172 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   5173   %res = call <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   5174   ret <4 x float> %res
   5175 }
   5176 declare <4 x float> @llvm.x86.avx512.mask.div.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   5177 
   5178 declare <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
   5179 
   5180 define <8 x float>@test_int_x86_avx512_mask_shuf_f32x4_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
   5181 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
   5182 ; X86:       # %bb.0:
   5183 ; X86-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
   5184 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5185 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5186 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5187 ; X86-NEXT:    vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
   5188 ; X86-NEXT:    vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
   5189 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   5190 ; X86-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   5191 ; X86-NEXT:    retl # encoding: [0xc3]
   5192 ;
   5193 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f32x4_256:
   5194 ; X64:       # %bb.0:
   5195 ; X64-NEXT:    vblendps $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0c,0xc1,0xf0]
   5196 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5197 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5198 ; X64-NEXT:    vmovaps %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x28,0xd0]
   5199 ; X64-NEXT:    vmovaps %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x28,0xc8]
   5200 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   5201 ; X64-NEXT:    vaddps %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc0]
   5202 ; X64-NEXT:    retq # encoding: [0xc3]
   5203     %res = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
   5204     %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
   5205     %res2 = call <8 x float> @llvm.x86.avx512.mask.shuf.f32x4.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> zeroinitializer, i8 %x4)
   5206     %res3 = fadd <8 x float> %res, %res1
   5207     %res4 = fadd <8 x float> %res2, %res3
   5208     ret <8 x float> %res4
   5209 }
   5210 
   5211 declare <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
   5212 
   5213 define <4 x double>@test_int_x86_avx512_mask_shuf_f64x2_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
   5214 ; X86-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
   5215 ; X86:       # %bb.0:
   5216 ; X86-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
   5217 ; X86-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
   5218 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5219 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5220 ; X86-NEXT:    vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
   5221 ; X86-NEXT:    vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
   5222 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   5223 ; X86-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   5224 ; X86-NEXT:    retl # encoding: [0xc3]
   5225 ;
   5226 ; X64-LABEL: test_int_x86_avx512_mask_shuf_f64x2_256:
   5227 ; X64:       # %bb.0:
   5228 ; X64-NEXT:    vblendpd $12, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x0d,0xc1,0x0c]
   5229 ; X64-NEXT:    # ymm0 = ymm0[0,1],ymm1[2,3]
   5230 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5231 ; X64-NEXT:    vmovapd %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x28,0xd0]
   5232 ; X64-NEXT:    vmovapd %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x28,0xc8]
   5233 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   5234 ; X64-NEXT:    vaddpd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc0]
   5235 ; X64-NEXT:    retq # encoding: [0xc3]
   5236     %res = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 %x4)
   5237     %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> %x3, i8 -1)
   5238     %res2 = call <4 x double> @llvm.x86.avx512.mask.shuf.f64x2.256(<4 x double> %x0, <4 x double> %x1, i32 22, <4 x double> zeroinitializer, i8 %x4)
   5239     %res3 = fadd <4 x double> %res, %res1
   5240     %res4 = fadd <4 x double> %res2, %res3
   5241     ret <4 x double> %res4
   5242 }
   5243 
   5244 declare <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
   5245 
   5246 define <8 x i32>@test_int_x86_avx512_mask_shuf_i32x4_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
   5247 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
   5248 ; X86:       # %bb.0:
   5249 ; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
   5250 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5251 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5252 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5253 ; X86-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
   5254 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5255 ; X86-NEXT:    retl # encoding: [0xc3]
   5256 ;
   5257 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i32x4_256:
   5258 ; X64:       # %bb.0:
   5259 ; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
   5260 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5261 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5262 ; X64-NEXT:    vmovdqa32 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x6f,0xd0]
   5263 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5264 ; X64-NEXT:    retq # encoding: [0xc3]
   5265     %res = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 %x4)
   5266     %res1 = call <8 x i32> @llvm.x86.avx512.mask.shuf.i32x4.256(<8 x i32> %x0, <8 x i32> %x1, i32 22, <8 x i32> %x3, i8 -1)
   5267     %res2 = add <8 x i32> %res, %res1
   5268     ret <8 x i32> %res2
   5269 }
   5270 
   5271 declare <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
   5272 
   5273 define <4 x i64>@test_int_x86_avx512_mask_shuf_i64x2_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
   5274 ; X86-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
   5275 ; X86:       # %bb.0:
   5276 ; X86-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
   5277 ; X86-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5278 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5279 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5280 ; X86-NEXT:    vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
   5281 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5282 ; X86-NEXT:    retl # encoding: [0xc3]
   5283 ;
   5284 ; X64-LABEL: test_int_x86_avx512_mask_shuf_i64x2_256:
   5285 ; X64:       # %bb.0:
   5286 ; X64-NEXT:    vpblendd $240, %ymm1, %ymm0, %ymm0 # encoding: [0xc4,0xe3,0x7d,0x02,0xc1,0xf0]
   5287 ; X64-NEXT:    # ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   5288 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5289 ; X64-NEXT:    vmovdqa64 %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x6f,0xd0]
   5290 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5291 ; X64-NEXT:    retq # encoding: [0xc3]
   5292     %res = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 %x4)
   5293     %res1 = call <4 x i64> @llvm.x86.avx512.mask.shuf.i64x2.256(<4 x i64> %x0, <4 x i64> %x1, i32 22, <4 x i64> %x3, i8 -1)
   5294     %res2 = add <4 x i64> %res, %res1
   5295     ret <4 x i64> %res2
   5296 }
   5297 
   5298 declare <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double>, <2 x double>, i32, <2 x double>, i8)
   5299 
   5300 define <2 x double>@test_int_x86_avx512_mask_shuf_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x3, i8 %x4) {
   5301 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
   5302 ; X86:       # %bb.0:
   5303 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
   5304 ; X86-NEXT:    # xmm3 = xmm0[1],xmm1[0]
   5305 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5306 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5307 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
   5308 ; X86-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
   5309 ; X86-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
   5310 ; X86-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
   5311 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
   5312 ; X86-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
   5313 ; X86-NEXT:    retl # encoding: [0xc3]
   5314 ;
   5315 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_128:
   5316 ; X64:       # %bb.0:
   5317 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xc6,0xd9,0x01]
   5318 ; X64-NEXT:    # xmm3 = xmm0[1],xmm1[0]
   5319 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5320 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xc6,0xd1,0x01]
   5321 ; X64-NEXT:    # xmm2 {%k1} = xmm0[1],xmm1[0]
   5322 ; X64-NEXT:    vaddpd %xmm3, %xmm2, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xd3]
   5323 ; X64-NEXT:    vshufpd $1, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xc6,0xc1,0x01]
   5324 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[1],xmm1[0]
   5325 ; X64-NEXT:    vaddpd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc2]
   5326 ; X64-NEXT:    retq # encoding: [0xc3]
   5327   %res = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 %x4)
   5328   %res1 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> %x3, i8 -1)
   5329   %res2 = call <2 x double> @llvm.x86.avx512.mask.shuf.pd.128(<2 x double> %x0, <2 x double> %x1, i32 1, <2 x double> zeroinitializer, i8 %x4)
   5330   %res3 = fadd <2 x double> %res, %res1
   5331   %res4 = fadd <2 x double> %res2, %res3
   5332   ret <2 x double> %res4
   5333 }
   5334 
   5335 declare <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double>, <4 x double>, i32, <4 x double>, i8)
   5336 
   5337 define <4 x double>@test_int_x86_avx512_mask_shuf_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x3, i8 %x4) {
   5338 ; X86-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
   5339 ; X86:       # %bb.0:
   5340 ; X86-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
   5341 ; X86-NEXT:    # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
   5342 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5343 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5344 ; X86-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
   5345 ; X86-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
   5346 ; X86-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   5347 ; X86-NEXT:    retl # encoding: [0xc3]
   5348 ;
   5349 ; X64-LABEL: test_int_x86_avx512_mask_shuf_pd_256:
   5350 ; X64:       # %bb.0:
   5351 ; X64-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xc6,0xd9,0x06]
   5352 ; X64-NEXT:    # ymm3 = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
   5353 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5354 ; X64-NEXT:    vshufpd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xc6,0xd1,0x06]
   5355 ; X64-NEXT:    # ymm2 {%k1} = ymm0[0],ymm1[1],ymm0[3],ymm1[2]
   5356 ; X64-NEXT:    vaddpd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc3]
   5357 ; X64-NEXT:    retq # encoding: [0xc3]
   5358   %res = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 %x4)
   5359   %res1 = call <4 x double> @llvm.x86.avx512.mask.shuf.pd.256(<4 x double> %x0, <4 x double> %x1, i32 6, <4 x double> %x3, i8 -1)
   5360   %res2 = fadd <4 x double> %res, %res1
   5361   ret <4 x double> %res2
   5362 }
   5363 
   5364 declare <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float>, <4 x float>, i32, <4 x float>, i8)
   5365 
   5366 define <4 x float>@test_int_x86_avx512_mask_shuf_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x3, i8 %x4) {
   5367 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
   5368 ; X86:       # %bb.0:
   5369 ; X86-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
   5370 ; X86-NEXT:    # xmm3 = xmm0[2,1],xmm1[1,0]
   5371 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5372 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5373 ; X86-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
   5374 ; X86-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
   5375 ; X86-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   5376 ; X86-NEXT:    retl # encoding: [0xc3]
   5377 ;
   5378 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_128:
   5379 ; X64:       # %bb.0:
   5380 ; X64-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0xc6,0xd9,0x16]
   5381 ; X64-NEXT:    # xmm3 = xmm0[2,1],xmm1[1,0]
   5382 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5383 ; X64-NEXT:    vshufps $22, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0xc6,0xd1,0x16]
   5384 ; X64-NEXT:    # xmm2 {%k1} = xmm0[2,1],xmm1[1,0]
   5385 ; X64-NEXT:    vaddps %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc3]
   5386 ; X64-NEXT:    retq # encoding: [0xc3]
   5387   %res = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 %x4)
   5388   %res1 = call <4 x float> @llvm.x86.avx512.mask.shuf.ps.128(<4 x float> %x0, <4 x float> %x1, i32 22, <4 x float> %x3, i8 -1)
   5389   %res2 = fadd <4 x float> %res, %res1
   5390   ret <4 x float> %res2
   5391 }
   5392 
   5393 declare <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float>, <8 x float>, i32, <8 x float>, i8)
   5394 
   5395 define <8 x float>@test_int_x86_avx512_mask_shuf_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x3, i8 %x4) {
   5396 ; X86-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
   5397 ; X86:       # %bb.0:
   5398 ; X86-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
   5399 ; X86-NEXT:    # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
   5400 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5401 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5402 ; X86-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
   5403 ; X86-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
   5404 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   5405 ; X86-NEXT:    retl # encoding: [0xc3]
   5406 ;
   5407 ; X64-LABEL: test_int_x86_avx512_mask_shuf_ps_256:
   5408 ; X64:       # %bb.0:
   5409 ; X64-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0xc6,0xd9,0x16]
   5410 ; X64-NEXT:    # ymm3 = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
   5411 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5412 ; X64-NEXT:    vshufps $22, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0xc6,0xd1,0x16]
   5413 ; X64-NEXT:    # ymm2 {%k1} = ymm0[2,1],ymm1[1,0],ymm0[6,5],ymm1[5,4]
   5414 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc3]
   5415 ; X64-NEXT:    retq # encoding: [0xc3]
   5416   %res = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 %x4)
   5417   %res1 = call <8 x float> @llvm.x86.avx512.mask.shuf.ps.256(<8 x float> %x0, <8 x float> %x1, i32 22, <8 x float> %x3, i8 -1)
   5418   %res2 = fadd <8 x float> %res, %res1
   5419   ret <8 x float> %res2
   5420 }
   5421 
   5422 declare <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5423 
   5424 define <4 x i32>@test_int_x86_avx512_mask_pmaxs_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   5425 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
   5426 ; X86:       # %bb.0:
   5427 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5428 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5429 ; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
   5430 ; X86-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
   5431 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5432 ; X86-NEXT:    retl # encoding: [0xc3]
   5433 ;
   5434 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_128:
   5435 ; X64:       # %bb.0:
   5436 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5437 ; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3d,0xd1]
   5438 ; X64-NEXT:    vpmaxsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3d,0xc1]
   5439 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5440 ; X64-NEXT:    retq # encoding: [0xc3]
   5441   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2 ,i8 %mask)
   5442   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxs.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
   5443   %res2 = add <4 x i32> %res, %res1
   5444   ret <4 x i32> %res2
   5445 }
   5446 
   5447 declare <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   5448 
   5449 define <8 x i32>@test_int_x86_avx512_mask_pmaxs_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5450 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
   5451 ; X86:       # %bb.0:
   5452 ; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
   5453 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5454 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5455 ; X86-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
   5456 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5457 ; X86-NEXT:    retl # encoding: [0xc3]
   5458 ;
   5459 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_d_256:
   5460 ; X64:       # %bb.0:
   5461 ; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3d,0xd9]
   5462 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5463 ; X64-NEXT:    vpmaxsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3d,0xd1]
   5464 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5465 ; X64-NEXT:    retq # encoding: [0xc3]
   5466   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   5467   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxs.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   5468   %res2 = add <8 x i32> %res, %res1
   5469   ret <8 x i32> %res2
   5470 }
   5471 
   5472 declare <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   5473 
   5474 define <2 x i64>@test_int_x86_avx512_mask_pmaxs_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   5475 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
   5476 ; X86:       # %bb.0:
   5477 ; X86-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
   5478 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5479 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5480 ; X86-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
   5481 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5482 ; X86-NEXT:    retl # encoding: [0xc3]
   5483 ;
   5484 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_128:
   5485 ; X64:       # %bb.0:
   5486 ; X64-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3d,0xd9]
   5487 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5488 ; X64-NEXT:    vpmaxsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3d,0xd1]
   5489 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5490 ; X64-NEXT:    retq # encoding: [0xc3]
   5491   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   5492   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxs.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   5493   %res2 = add <2 x i64> %res, %res1
   5494   ret <2 x i64> %res2
   5495 }
   5496 
   5497 declare <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   5498 
   5499 define <4 x i64>@test_int_x86_avx512_mask_pmaxs_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   5500 ; X86-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
   5501 ; X86:       # %bb.0:
   5502 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5503 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5504 ; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
   5505 ; X86-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
   5506 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5507 ; X86-NEXT:    retl # encoding: [0xc3]
   5508 ;
   5509 ; X64-LABEL: test_int_x86_avx512_mask_pmaxs_q_256:
   5510 ; X64:       # %bb.0:
   5511 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5512 ; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3d,0xd1]
   5513 ; X64-NEXT:    vpmaxsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3d,0xc1]
   5514 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5515 ; X64-NEXT:    retq # encoding: [0xc3]
   5516   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   5517   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxs.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
   5518   %res2 = add <4 x i64> %res, %res1
   5519   ret <4 x i64> %res2
   5520 }
   5521 
   5522 declare <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5523 
   5524 define <4 x i32>@test_int_x86_avx512_mask_pmaxu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2,i8 %mask) {
   5525 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
   5526 ; X86:       # %bb.0:
   5527 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5528 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5529 ; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
   5530 ; X86-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
   5531 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5532 ; X86-NEXT:    retl # encoding: [0xc3]
   5533 ;
   5534 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_128:
   5535 ; X64:       # %bb.0:
   5536 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5537 ; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3f,0xd1]
   5538 ; X64-NEXT:    vpmaxud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3f,0xc1]
   5539 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5540 ; X64-NEXT:    retq # encoding: [0xc3]
   5541   %res = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   5542   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmaxu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
   5543   %res2 = add <4 x i32> %res, %res1
   5544   ret <4 x i32> %res2
   5545 }
   5546 
   5547 declare <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   5548 
   5549 define <8 x i32>@test_int_x86_avx512_mask_pmaxu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5550 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
   5551 ; X86:       # %bb.0:
   5552 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
   5553 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5554 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5555 ; X86-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
   5556 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5557 ; X86-NEXT:    retl # encoding: [0xc3]
   5558 ;
   5559 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_d_256:
   5560 ; X64:       # %bb.0:
   5561 ; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3f,0xd9]
   5562 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5563 ; X64-NEXT:    vpmaxud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3f,0xd1]
   5564 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5565 ; X64-NEXT:    retq # encoding: [0xc3]
   5566   %res = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   5567   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmaxu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   5568   %res2 = add <8 x i32> %res, %res1
   5569   ret <8 x i32> %res2
   5570 }
   5571 
   5572 declare <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   5573 
   5574 define <2 x i64>@test_int_x86_avx512_mask_pmaxu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   5575 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
   5576 ; X86:       # %bb.0:
   5577 ; X86-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
   5578 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5579 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5580 ; X86-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
   5581 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5582 ; X86-NEXT:    retl # encoding: [0xc3]
   5583 ;
   5584 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_128:
   5585 ; X64:       # %bb.0:
   5586 ; X64-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3f,0xd9]
   5587 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5588 ; X64-NEXT:    vpmaxuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3f,0xd1]
   5589 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5590 ; X64-NEXT:    retq # encoding: [0xc3]
   5591   %res = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   5592   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmaxu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   5593   %res2 = add <2 x i64> %res, %res1
   5594   ret <2 x i64> %res2
   5595 }
   5596 
   5597 declare <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   5598 
   5599 define <4 x i64>@test_int_x86_avx512_mask_pmaxu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   5600 ; X86-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
   5601 ; X86:       # %bb.0:
   5602 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5603 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5604 ; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
   5605 ; X86-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
   5606 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5607 ; X86-NEXT:    retl # encoding: [0xc3]
   5608 ;
   5609 ; X64-LABEL: test_int_x86_avx512_mask_pmaxu_q_256:
   5610 ; X64:       # %bb.0:
   5611 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5612 ; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3f,0xd1]
   5613 ; X64-NEXT:    vpmaxuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3f,0xc1]
   5614 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5615 ; X64-NEXT:    retq # encoding: [0xc3]
   5616   %res = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   5617   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmaxu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
   5618   %res2 = add <4 x i64> %res, %res1
   5619   ret <4 x i64> %res2
   5620 }
   5621 
   5622 declare <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5623 
   5624 define <4 x i32>@test_int_x86_avx512_mask_pmins_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   5625 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_128:
   5626 ; X86:       # %bb.0:
   5627 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5628 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5629 ; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
   5630 ; X86-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
   5631 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5632 ; X86-NEXT:    retl # encoding: [0xc3]
   5633 ;
   5634 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_128:
   5635 ; X64:       # %bb.0:
   5636 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5637 ; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x39,0xd1]
   5638 ; X64-NEXT:    vpminsd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x39,0xc1]
   5639 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5640 ; X64-NEXT:    retq # encoding: [0xc3]
   5641   %res = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   5642   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmins.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
   5643   %res2 = add <4 x i32> %res, %res1
   5644   ret <4 x i32> %res2
   5645 }
   5646 
   5647 declare <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   5648 
   5649 define <8 x i32>@test_int_x86_avx512_mask_pmins_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5650 ; X86-LABEL: test_int_x86_avx512_mask_pmins_d_256:
   5651 ; X86:       # %bb.0:
   5652 ; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
   5653 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5654 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5655 ; X86-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
   5656 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5657 ; X86-NEXT:    retl # encoding: [0xc3]
   5658 ;
   5659 ; X64-LABEL: test_int_x86_avx512_mask_pmins_d_256:
   5660 ; X64:       # %bb.0:
   5661 ; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x39,0xd9]
   5662 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5663 ; X64-NEXT:    vpminsd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x39,0xd1]
   5664 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5665 ; X64-NEXT:    retq # encoding: [0xc3]
   5666   %res = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   5667   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmins.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   5668   %res2 = add <8 x i32> %res, %res1
   5669   ret <8 x i32> %res2
   5670 }
   5671 
   5672 declare <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   5673 
   5674 define <2 x i64>@test_int_x86_avx512_mask_pmins_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   5675 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_128:
   5676 ; X86:       # %bb.0:
   5677 ; X86-NEXT:    vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
   5678 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5679 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5680 ; X86-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
   5681 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5682 ; X86-NEXT:    retl # encoding: [0xc3]
   5683 ;
   5684 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_128:
   5685 ; X64:       # %bb.0:
   5686 ; X64-NEXT:    vpminsq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x39,0xd9]
   5687 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5688 ; X64-NEXT:    vpminsq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x39,0xd1]
   5689 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5690 ; X64-NEXT:    retq # encoding: [0xc3]
   5691   %res = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   5692   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmins.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   5693   %res2 = add <2 x i64> %res, %res1
   5694   ret <2 x i64> %res2
   5695 }
   5696 
   5697 declare <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   5698 
   5699 define <4 x i64>@test_int_x86_avx512_mask_pmins_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   5700 ; X86-LABEL: test_int_x86_avx512_mask_pmins_q_256:
   5701 ; X86:       # %bb.0:
   5702 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5703 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5704 ; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
   5705 ; X86-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
   5706 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5707 ; X86-NEXT:    retl # encoding: [0xc3]
   5708 ;
   5709 ; X64-LABEL: test_int_x86_avx512_mask_pmins_q_256:
   5710 ; X64:       # %bb.0:
   5711 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5712 ; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x39,0xd1]
   5713 ; X64-NEXT:    vpminsq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x39,0xc1]
   5714 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5715 ; X64-NEXT:    retq # encoding: [0xc3]
   5716   %res = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   5717   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmins.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
   5718   %res2 = add <4 x i64> %res, %res1
   5719   ret <4 x i64> %res2
   5720 }
   5721 
   5722 declare <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5723 
   5724 define <4 x i32>@test_int_x86_avx512_mask_pminu_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask) {
   5725 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_128:
   5726 ; X86:       # %bb.0:
   5727 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5728 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5729 ; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
   5730 ; X86-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
   5731 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5732 ; X86-NEXT:    retl # encoding: [0xc3]
   5733 ;
   5734 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_128:
   5735 ; X64:       # %bb.0:
   5736 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5737 ; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x3b,0xd1]
   5738 ; X64-NEXT:    vpminud %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x3b,0xc1]
   5739 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5740 ; X64-NEXT:    retq # encoding: [0xc3]
   5741   %res = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %mask)
   5742   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pminu.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %mask)
   5743   %res2 = add <4 x i32> %res, %res1
   5744   ret <4 x i32> %res2
   5745 }
   5746 
   5747 declare <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   5748 
   5749 define <8 x i32>@test_int_x86_avx512_mask_pminu_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5750 ; X86-LABEL: test_int_x86_avx512_mask_pminu_d_256:
   5751 ; X86:       # %bb.0:
   5752 ; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
   5753 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5754 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5755 ; X86-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
   5756 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5757 ; X86-NEXT:    retl # encoding: [0xc3]
   5758 ;
   5759 ; X64-LABEL: test_int_x86_avx512_mask_pminu_d_256:
   5760 ; X64:       # %bb.0:
   5761 ; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x3b,0xd9]
   5762 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5763 ; X64-NEXT:    vpminud %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x3b,0xd1]
   5764 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   5765 ; X64-NEXT:    retq # encoding: [0xc3]
   5766   %res = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   5767   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pminu.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   5768   %res2 = add <8 x i32> %res, %res1
   5769   ret <8 x i32> %res2
   5770 }
   5771 
   5772 declare <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   5773 
   5774 define <2 x i64>@test_int_x86_avx512_mask_pminu_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   5775 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_128:
   5776 ; X86:       # %bb.0:
   5777 ; X86-NEXT:    vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
   5778 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5779 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5780 ; X86-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
   5781 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5782 ; X86-NEXT:    retl # encoding: [0xc3]
   5783 ;
   5784 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_128:
   5785 ; X64:       # %bb.0:
   5786 ; X64-NEXT:    vpminuq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x3b,0xd9]
   5787 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5788 ; X64-NEXT:    vpminuq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x3b,0xd1]
   5789 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   5790 ; X64-NEXT:    retq # encoding: [0xc3]
   5791   %res = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   5792   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pminu.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   5793   %res2 = add <2 x i64> %res, %res1
   5794   ret <2 x i64> %res2
   5795 }
   5796 
   5797 declare <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   5798 
   5799 define <4 x i64>@test_int_x86_avx512_mask_pminu_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask) {
   5800 ; X86-LABEL: test_int_x86_avx512_mask_pminu_q_256:
   5801 ; X86:       # %bb.0:
   5802 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5803 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5804 ; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
   5805 ; X86-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
   5806 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5807 ; X86-NEXT:    retl # encoding: [0xc3]
   5808 ;
   5809 ; X64-LABEL: test_int_x86_avx512_mask_pminu_q_256:
   5810 ; X64:       # %bb.0:
   5811 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5812 ; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x3b,0xd1]
   5813 ; X64-NEXT:    vpminuq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x3b,0xc1]
   5814 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5815 ; X64-NEXT:    retq # encoding: [0xc3]
   5816   %res = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %mask)
   5817   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pminu.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %mask)
   5818   %res2 = add <4 x i64> %res, %res1
   5819   ret <4 x i64> %res2
   5820 }
   5821 
   5822 declare <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   5823 
   5824 define <2 x i64>@test_int_x86_avx512_mask_psrl_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   5825 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_128:
   5826 ; X86:       # %bb.0:
   5827 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
   5828 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5829 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5830 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
   5831 ; X86-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
   5832 ; X86-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
   5833 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   5834 ; X86-NEXT:    retl # encoding: [0xc3]
   5835 ;
   5836 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_128:
   5837 ; X64:       # %bb.0:
   5838 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd3,0xd9]
   5839 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5840 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xd3,0xd1]
   5841 ; X64-NEXT:    vpsrlq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xd3,0xc1]
   5842 ; X64-NEXT:    vpaddq %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xd4,0xc0]
   5843 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   5844 ; X64-NEXT:    retq # encoding: [0xc3]
   5845   %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   5846   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   5847   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   5848   %res3 = add <2 x i64> %res, %res1
   5849   %res4 = add <2 x i64> %res3, %res2
   5850   ret <2 x i64> %res4
   5851 }
   5852 
   5853 declare <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
   5854 
   5855 define <4 x i64>@test_int_x86_avx512_mask_psrl_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   5856 ; X86-LABEL: test_int_x86_avx512_mask_psrl_q_256:
   5857 ; X86:       # %bb.0:
   5858 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
   5859 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5860 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5861 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
   5862 ; X86-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
   5863 ; X86-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
   5864 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5865 ; X86-NEXT:    retl # encoding: [0xc3]
   5866 ;
   5867 ; X64-LABEL: test_int_x86_avx512_mask_psrl_q_256:
   5868 ; X64:       # %bb.0:
   5869 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd3,0xd9]
   5870 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5871 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xd3,0xd1]
   5872 ; X64-NEXT:    vpsrlq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xd3,0xc1]
   5873 ; X64-NEXT:    vpaddq %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xd4,0xc0]
   5874 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   5875 ; X64-NEXT:    retq # encoding: [0xc3]
   5876   %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
   5877   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
   5878   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   5879   %res3 = add <4 x i64> %res, %res1
   5880   %res4 = add <4 x i64> %res3, %res2
   5881   ret <4 x i64> %res4
   5882 }
   5883 
   5884 declare <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5885 
   5886 define <4 x i32>@test_int_x86_avx512_mask_psrl_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   5887 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_128:
   5888 ; X86:       # %bb.0:
   5889 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
   5890 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5891 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5892 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
   5893 ; X86-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
   5894 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
   5895 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5896 ; X86-NEXT:    retl # encoding: [0xc3]
   5897 ;
   5898 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_128:
   5899 ; X64:       # %bb.0:
   5900 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd2,0xd9]
   5901 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5902 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xd2,0xd1]
   5903 ; X64-NEXT:    vpsrld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xd2,0xc1]
   5904 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
   5905 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5906 ; X64-NEXT:    retq # encoding: [0xc3]
   5907   %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   5908   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   5909   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   5910   %res3 = add <4 x i32> %res, %res1
   5911   %res4 = add <4 x i32> %res3, %res2
   5912   ret <4 x i32> %res4
   5913 }
   5914 
   5915 declare <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
   5916 
   5917 define <8 x i32>@test_int_x86_avx512_mask_psrl_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5918 ; X86-LABEL: test_int_x86_avx512_mask_psrl_d_256:
   5919 ; X86:       # %bb.0:
   5920 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
   5921 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5922 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5923 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
   5924 ; X86-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
   5925 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
   5926 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5927 ; X86-NEXT:    retl # encoding: [0xc3]
   5928 ;
   5929 ; X64-LABEL: test_int_x86_avx512_mask_psrl_d_256:
   5930 ; X64:       # %bb.0:
   5931 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd2,0xd9]
   5932 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5933 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xd2,0xd1]
   5934 ; X64-NEXT:    vpsrld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xd2,0xc1]
   5935 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
   5936 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5937 ; X64-NEXT:    retq # encoding: [0xc3]
   5938   %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
   5939   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
   5940   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   5941   %res3 = add <8 x i32> %res, %res1
   5942   %res4 = add <8 x i32> %res2, %res3
   5943   ret <8 x i32> %res4
   5944 }
   5945 
   5946 declare <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   5947 
   5948 define <4 x i32>@test_int_x86_avx512_mask_psra_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   5949 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_128:
   5950 ; X86:       # %bb.0:
   5951 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
   5952 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5953 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5954 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
   5955 ; X86-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
   5956 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   5957 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5958 ; X86-NEXT:    retl # encoding: [0xc3]
   5959 ;
   5960 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_128:
   5961 ; X64:       # %bb.0:
   5962 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xe2,0xd9]
   5963 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5964 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xe2,0xd1]
   5965 ; X64-NEXT:    vpsrad %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xe2,0xc1]
   5966 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   5967 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   5968 ; X64-NEXT:    retq # encoding: [0xc3]
   5969   %res = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   5970   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   5971   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psra.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   5972   %res3 = add <4 x i32> %res, %res1
   5973   %res4 = add <4 x i32> %res3, %res2
   5974   ret <4 x i32> %res4
   5975 }
   5976 
   5977 declare <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
   5978 
   5979 define <8 x i32>@test_int_x86_avx512_mask_psra_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   5980 ; X86-LABEL: test_int_x86_avx512_mask_psra_d_256:
   5981 ; X86:       # %bb.0:
   5982 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
   5983 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   5984 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   5985 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
   5986 ; X86-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
   5987 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   5988 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5989 ; X86-NEXT:    retl # encoding: [0xc3]
   5990 ;
   5991 ; X64-LABEL: test_int_x86_avx512_mask_psra_d_256:
   5992 ; X64:       # %bb.0:
   5993 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe2,0xd9]
   5994 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   5995 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xe2,0xd1]
   5996 ; X64-NEXT:    vpsrad %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xe2,0xc1]
   5997 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   5998 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   5999 ; X64-NEXT:    retq # encoding: [0xc3]
   6000   %res = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
   6001   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   6002   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psra.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
   6003   %res3 = add <8 x i32> %res, %res1
   6004   %res4 = add <8 x i32> %res3, %res2
   6005   ret <8 x i32> %res4
   6006 }
   6007 
   6008 declare <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   6009 
   6010 define <4 x i32>@test_int_x86_avx512_mask_psll_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   6011 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_128:
   6012 ; X86:       # %bb.0:
   6013 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
   6014 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6015 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6016 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
   6017 ; X86-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
   6018 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6019 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6020 ; X86-NEXT:    retl # encoding: [0xc3]
   6021 ;
   6022 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_128:
   6023 ; X64:       # %bb.0:
   6024 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf2,0xd9]
   6025 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6026 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x09,0xf2,0xd1]
   6027 ; X64-NEXT:    vpslld %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0xf2,0xc1]
   6028 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6029 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6030 ; X64-NEXT:    retq # encoding: [0xc3]
   6031   %res = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   6032   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   6033   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   6034   %res3 = add <4 x i32> %res, %res1
   6035   %res4 = add <4 x i32> %res3, %res2
   6036   ret <4 x i32> %res4
   6037 }
   6038 
   6039 declare <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32>, <4 x i32>, <8 x i32>, i8)
   6040 
   6041 define <8 x i32>@test_int_x86_avx512_mask_psll_d_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   6042 ; X86-LABEL: test_int_x86_avx512_mask_psll_d_256:
   6043 ; X86:       # %bb.0:
   6044 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
   6045 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6046 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6047 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
   6048 ; X86-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
   6049 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6050 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6051 ; X86-NEXT:    retl # encoding: [0xc3]
   6052 ;
   6053 ; X64-LABEL: test_int_x86_avx512_mask_psll_d_256:
   6054 ; X64:       # %bb.0:
   6055 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf2,0xd9]
   6056 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6057 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0xf2,0xd1]
   6058 ; X64-NEXT:    vpslld %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0xf2,0xc1]
   6059 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6060 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6061 ; X64-NEXT:    retq # encoding: [0xc3]
   6062   %res = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 %x3)
   6063   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   6064   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.d.256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x2, i8 -1)
   6065   %res3 = add <8 x i32> %res, %res1
   6066   %res4 = add <8 x i32> %res3, %res2
   6067   ret <8 x i32> %res4
   6068 }
   6069 
   6070 declare <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
   6071 
   6072 define <4 x i64>@test_int_x86_avx512_mask_psll_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   6073 ; X86-LABEL: test_int_x86_avx512_mask_psll_q_256:
   6074 ; X86:       # %bb.0:
   6075 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
   6076 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6077 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6078 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
   6079 ; X86-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
   6080 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6081 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6082 ; X86-NEXT:    retl # encoding: [0xc3]
   6083 ;
   6084 ; X64-LABEL: test_int_x86_avx512_mask_psll_q_256:
   6085 ; X64:       # %bb.0:
   6086 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf3,0xd9]
   6087 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6088 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf3,0xd1]
   6089 ; X64-NEXT:    vpsllq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf3,0xc1]
   6090 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6091 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6092 ; X64-NEXT:    retq # encoding: [0xc3]
   6093   %res = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
   6094   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   6095   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psll.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
   6096   %res3 = add <4 x i64> %res, %res1
   6097   %res4 = add <4 x i64> %res3, %res2
   6098   ret <4 x i64> %res4
   6099 }
   6100 
   6101 declare <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64>, i32, <2 x i64>, i8)
   6102 
   6103 define <2 x i64>@test_int_x86_avx512_mask_psrl_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
   6104 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
   6105 ; X86:       # %bb.0:
   6106 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
   6107 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6108 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6109 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
   6110 ; X86-NEXT:    vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
   6111 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6112 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6113 ; X86-NEXT:    retl # encoding: [0xc3]
   6114 ;
   6115 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_128:
   6116 ; X64:       # %bb.0:
   6117 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x73,0xd0,0x03]
   6118 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6119 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x73,0xd0,0x03]
   6120 ; X64-NEXT:    vpsrlq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x73,0xd0,0x03]
   6121 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6122 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6123 ; X64-NEXT:    retq # encoding: [0xc3]
   6124   %res = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
   6125   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
   6126   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrl.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
   6127   %res3 = add <2 x i64> %res, %res1
   6128   %res4 = add <2 x i64> %res2, %res3
   6129   ret <2 x i64> %res4
   6130 }
   6131 
   6132 declare <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64>, i32, <4 x i64>, i8)
   6133 
   6134 define <4 x i64>@test_int_x86_avx512_mask_psrl_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
   6135 ; X86-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
   6136 ; X86:       # %bb.0:
   6137 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
   6138 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6139 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6140 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
   6141 ; X86-NEXT:    vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
   6142 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6143 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6144 ; X86-NEXT:    retl # encoding: [0xc3]
   6145 ;
   6146 ; X64-LABEL: test_int_x86_avx512_mask_psrl_qi_256:
   6147 ; X64:       # %bb.0:
   6148 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x73,0xd0,0x03]
   6149 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6150 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x73,0xd0,0x03]
   6151 ; X64-NEXT:    vpsrlq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x73,0xd0,0x03]
   6152 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6153 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6154 ; X64-NEXT:    retq # encoding: [0xc3]
   6155   %res = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
   6156   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
   6157   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrl.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
   6158   %res3 = add <4 x i64> %res, %res1
   6159   %res4 = add <4 x i64> %res2, %res3
   6160   ret <4 x i64> %res4
   6161 }
   6162 
   6163 declare <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32>, i32, <4 x i32>, i8)
   6164 
   6165 define <4 x i32>@test_int_x86_avx512_mask_psrl_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
   6166 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_128:
   6167 ; X86:       # %bb.0:
   6168 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
   6169 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6170 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6171 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
   6172 ; X86-NEXT:    vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
   6173 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6174 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6175 ; X86-NEXT:    retl # encoding: [0xc3]
   6176 ;
   6177 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_128:
   6178 ; X64:       # %bb.0:
   6179 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xd0,0x03]
   6180 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6181 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xd0,0x03]
   6182 ; X64-NEXT:    vpsrld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xd0,0x03]
   6183 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6184 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6185 ; X64-NEXT:    retq # encoding: [0xc3]
   6186   %res = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
   6187   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
   6188   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrl.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
   6189   %res3 = add <4 x i32> %res, %res1
   6190   %res4 = add <4 x i32> %res2, %res3
   6191   ret <4 x i32> %res4
   6192 }
   6193 
   6194 declare <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32>, i32, <8 x i32>, i8)
   6195 
   6196 define <8 x i32>@test_int_x86_avx512_mask_psrl_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
   6197 ; X86-LABEL: test_int_x86_avx512_mask_psrl_di_256:
   6198 ; X86:       # %bb.0:
   6199 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
   6200 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6201 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6202 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
   6203 ; X86-NEXT:    vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
   6204 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6205 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6206 ; X86-NEXT:    retl # encoding: [0xc3]
   6207 ;
   6208 ; X64-LABEL: test_int_x86_avx512_mask_psrl_di_256:
   6209 ; X64:       # %bb.0:
   6210 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xd0,0x03]
   6211 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6212 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xd0,0x03]
   6213 ; X64-NEXT:    vpsrld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xd0,0x03]
   6214 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6215 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6216 ; X64-NEXT:    retq # encoding: [0xc3]
   6217   %res = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
   6218   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
   6219   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrl.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
   6220   %res3 = add <8 x i32> %res, %res1
   6221   %res4 = add <8 x i32> %res2, %res3
   6222   ret <8 x i32> %res4
   6223 }
   6224 
   6225 declare <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32>, i32, <4 x i32>, i8)
   6226 
   6227 define <4 x i32>@test_int_x86_avx512_mask_psll_di_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
   6228 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_128:
   6229 ; X86:       # %bb.0:
   6230 ; X86-NEXT:    vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
   6231 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6232 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6233 ; X86-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
   6234 ; X86-NEXT:    vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
   6235 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6236 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6237 ; X86-NEXT:    retl # encoding: [0xc3]
   6238 ;
   6239 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_128:
   6240 ; X64:       # %bb.0:
   6241 ; X64-NEXT:    vpslld $3, %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x72,0xf0,0x03]
   6242 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6243 ; X64-NEXT:    vpslld $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xf0,0x03]
   6244 ; X64-NEXT:    vpslld $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xf0,0x03]
   6245 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6246 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6247 ; X64-NEXT:    retq # encoding: [0xc3]
   6248   %res = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
   6249   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
   6250   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psll.di.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
   6251   %res3 = add <4 x i32> %res, %res1
   6252   %res4 = add <4 x i32> %res3, %res2
   6253   ret <4 x i32> %res4
   6254 }
   6255 
   6256 declare <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32>, i32, <8 x i32>, i8)
   6257 
   6258 define <8 x i32>@test_int_x86_avx512_mask_psll_di_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
   6259 ; X86-LABEL: test_int_x86_avx512_mask_psll_di_256:
   6260 ; X86:       # %bb.0:
   6261 ; X86-NEXT:    vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
   6262 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   6263 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6264 ; X86-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
   6265 ; X86-NEXT:    vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
   6266 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6267 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6268 ; X86-NEXT:    retl # encoding: [0xc3]
   6269 ;
   6270 ; X64-LABEL: test_int_x86_avx512_mask_psll_di_256:
   6271 ; X64:       # %bb.0:
   6272 ; X64-NEXT:    vpslld $3, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x72,0xf0,0x03]
   6273 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   6274 ; X64-NEXT:    vpslld $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xf0,0x03]
   6275 ; X64-NEXT:    vpslld $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xf0,0x03]
   6276 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6277 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6278 ; X64-NEXT:    retq # encoding: [0xc3]
   6279   %res = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
   6280   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
   6281   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psll.di.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
   6282   %res3 = add <8 x i32> %res, %res1
   6283   %res4 = add <8 x i32> %res3, %res2
   6284   ret <8 x i32> %res4
   6285 }
   6286 
   6287 declare <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   6288 
   6289 define <2 x i64>@test_int_x86_avx512_mask_psrlv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   6290 ; X86-LABEL: test_int_x86_avx512_mask_psrlv2_di:
   6291 ; X86:       # %bb.0:
   6292 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
   6293 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6294 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6295 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
   6296 ; X86-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
   6297 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   6298 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6299 ; X86-NEXT:    retl # encoding: [0xc3]
   6300 ;
   6301 ; X64-LABEL: test_int_x86_avx512_mask_psrlv2_di:
   6302 ; X64:       # %bb.0:
   6303 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x45,0xd9]
   6304 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6305 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x45,0xd1]
   6306 ; X64-NEXT:    vpsrlvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x45,0xc1]
   6307 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   6308 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6309 ; X64-NEXT:    retq # encoding: [0xc3]
   6310   %res = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   6311   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   6312   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrlv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   6313   %res3 = add <2 x i64> %res, %res1
   6314   %res4 = add <2 x i64> %res3, %res2
   6315   ret <2 x i64> %res4
   6316 }
   6317 
   6318 declare <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   6319 
   6320 define <4 x i64>@test_int_x86_avx512_mask_psrlv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   6321 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_di:
   6322 ; X86:       # %bb.0:
   6323 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
   6324 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6325 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6326 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
   6327 ; X86-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
   6328 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6329 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6330 ; X86-NEXT:    retl # encoding: [0xc3]
   6331 ;
   6332 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_di:
   6333 ; X64:       # %bb.0:
   6334 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x45,0xd9]
   6335 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6336 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x45,0xd1]
   6337 ; X64-NEXT:    vpsrlvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x45,0xc1]
   6338 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6339 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6340 ; X64-NEXT:    retq # encoding: [0xc3]
   6341   %res = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   6342   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   6343   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrlv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   6344   %res3 = add <4 x i64> %res, %res1
   6345   %res4 = add <4 x i64> %res3, %res2
   6346   ret <4 x i64> %res4
   6347 }
   6348 
   6349 declare <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   6350 
   6351 define <4 x i32>@test_int_x86_avx512_mask_psrlv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   6352 ; X86-LABEL: test_int_x86_avx512_mask_psrlv4_si:
   6353 ; X86:       # %bb.0:
   6354 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
   6355 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6356 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6357 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
   6358 ; X86-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
   6359 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6360 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6361 ; X86-NEXT:    retl # encoding: [0xc3]
   6362 ;
   6363 ; X64-LABEL: test_int_x86_avx512_mask_psrlv4_si:
   6364 ; X64:       # %bb.0:
   6365 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x45,0xd9]
   6366 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6367 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x45,0xd1]
   6368 ; X64-NEXT:    vpsrlvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x45,0xc1]
   6369 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6370 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6371 ; X64-NEXT:    retq # encoding: [0xc3]
   6372   %res = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   6373   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   6374   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrlv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   6375   %res3 = add <4 x i32> %res, %res1
   6376   %res4 = add <4 x i32> %res3, %res2
   6377   ret <4 x i32> %res4
   6378 }
   6379 
   6380 declare <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   6381 
   6382 define <8 x i32>@test_int_x86_avx512_mask_psrlv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   6383 ; X86-LABEL: test_int_x86_avx512_mask_psrlv8_si:
   6384 ; X86:       # %bb.0:
   6385 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
   6386 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6387 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6388 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
   6389 ; X86-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
   6390 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6391 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6392 ; X86-NEXT:    retl # encoding: [0xc3]
   6393 ;
   6394 ; X64-LABEL: test_int_x86_avx512_mask_psrlv8_si:
   6395 ; X64:       # %bb.0:
   6396 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x45,0xd9]
   6397 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6398 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x45,0xd1]
   6399 ; X64-NEXT:    vpsrlvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x45,0xc1]
   6400 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6401 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6402 ; X64-NEXT:    retq # encoding: [0xc3]
   6403   %res = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   6404   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   6405   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrlv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   6406   %res3 = add <8 x i32> %res, %res1
   6407   %res4 = add <8 x i32> %res3, %res2
   6408   ret <8 x i32> %res4
   6409 }
   6410 
   6411 declare <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   6412 
   6413 define <4 x i32>@test_int_x86_avx512_mask_psrav4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   6414 ; X86-LABEL: test_int_x86_avx512_mask_psrav4_si:
   6415 ; X86:       # %bb.0:
   6416 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
   6417 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6418 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6419 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
   6420 ; X86-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
   6421 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6422 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6423 ; X86-NEXT:    retl # encoding: [0xc3]
   6424 ;
   6425 ; X64-LABEL: test_int_x86_avx512_mask_psrav4_si:
   6426 ; X64:       # %bb.0:
   6427 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x46,0xd9]
   6428 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6429 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x46,0xd1]
   6430 ; X64-NEXT:    vpsravd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x46,0xc1]
   6431 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6432 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6433 ; X64-NEXT:    retq # encoding: [0xc3]
   6434   %res = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   6435   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   6436   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psrav4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   6437   %res3 = add <4 x i32> %res, %res1
   6438   %res4 = add <4 x i32> %res3, %res2
   6439   ret <4 x i32> %res4
   6440 }
   6441 
   6442 declare <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   6443 
   6444 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   6445 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si:
   6446 ; X86:       # %bb.0:
   6447 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
   6448 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6449 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6450 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
   6451 ; X86-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
   6452 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6453 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6454 ; X86-NEXT:    retl # encoding: [0xc3]
   6455 ;
   6456 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si:
   6457 ; X64:       # %bb.0:
   6458 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0xd9]
   6459 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6460 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x46,0xd1]
   6461 ; X64-NEXT:    vpsravd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x46,0xc1]
   6462 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6463 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6464 ; X64-NEXT:    retq # encoding: [0xc3]
   6465   %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   6466   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   6467   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   6468   %res3 = add <8 x i32> %res, %res1
   6469   %res4 = add <8 x i32> %res3, %res2
   6470   ret <8 x i32> %res4
   6471 }
   6472 
   6473 define <8 x i32>@test_int_x86_avx512_mask_psrav8_si_const() {
   6474 ; X86-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
   6475 ; X86:       # %bb.0:
   6476 ; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   6477 ; X86-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   6478 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
   6479 ; X86-NEXT:    vpsravd {{\.LCPI.*}}, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   6480 ; X86-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}, kind: FK_Data_4
   6481 ; X86-NEXT:    retl # encoding: [0xc3]
   6482 ;
   6483 ; X64-LABEL: test_int_x86_avx512_mask_psrav8_si_const:
   6484 ; X64:       # %bb.0:
   6485 ; X64-NEXT:    vmovdqa {{.*}}(%rip), %ymm0 # EVEX TO VEX Compression ymm0 = [2,9,4294967284,23,4294967270,37,4294967256,51]
   6486 ; X64-NEXT:    # encoding: [0xc5,0xfd,0x6f,0x05,A,A,A,A]
   6487 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   6488 ; X64-NEXT:    vpsravd {{.*}}(%rip), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x46,0x05,A,A,A,A]
   6489 ; X64-NEXT:    # fixup A - offset: 5, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   6490 ; X64-NEXT:    retq # encoding: [0xc3]
   6491   %res = call <8 x i32> @llvm.x86.avx512.mask.psrav8.si(<8 x i32> <i32 2, i32 9, i32 -12, i32 23, i32 -26, i32 37, i32 -40, i32 51>, <8 x i32> <i32 1, i32 18, i32 35, i32 52, i32 69, i32 15, i32 32, i32 49>, <8 x i32> zeroinitializer, i8 -1)
   6492   ret <8 x i32> %res
   6493 }
   6494 
   6495 declare <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   6496 
   6497 define <2 x i64>@test_int_x86_avx512_mask_psllv2_di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   6498 ; X86-LABEL: test_int_x86_avx512_mask_psllv2_di:
   6499 ; X86:       # %bb.0:
   6500 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
   6501 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6502 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6503 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
   6504 ; X86-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
   6505 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   6506 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6507 ; X86-NEXT:    retl # encoding: [0xc3]
   6508 ;
   6509 ; X64-LABEL: test_int_x86_avx512_mask_psllv2_di:
   6510 ; X64:       # %bb.0:
   6511 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf9,0x47,0xd9]
   6512 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6513 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x47,0xd1]
   6514 ; X64-NEXT:    vpsllvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x47,0xc1]
   6515 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   6516 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   6517 ; X64-NEXT:    retq # encoding: [0xc3]
   6518   %res = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   6519   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   6520   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psllv2.di(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   6521   %res3 = add <2 x i64> %res, %res1
   6522   %res4 = add <2 x i64> %res3, %res2
   6523   ret <2 x i64> %res4
   6524 }
   6525 
   6526 declare <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   6527 
   6528 define <4 x i64>@test_int_x86_avx512_mask_psllv4_di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   6529 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_di:
   6530 ; X86:       # %bb.0:
   6531 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
   6532 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6533 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6534 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
   6535 ; X86-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
   6536 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6537 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6538 ; X86-NEXT:    retl # encoding: [0xc3]
   6539 ;
   6540 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_di:
   6541 ; X64:       # %bb.0:
   6542 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xfd,0x47,0xd9]
   6543 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6544 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x47,0xd1]
   6545 ; X64-NEXT:    vpsllvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x47,0xc1]
   6546 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   6547 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   6548 ; X64-NEXT:    retq # encoding: [0xc3]
   6549   %res = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   6550   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   6551   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psllv4.di(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   6552   %res3 = add <4 x i64> %res, %res1
   6553   %res4 = add <4 x i64> %res3, %res2
   6554   ret <4 x i64> %res4
   6555 }
   6556 
   6557 declare <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   6558 
   6559 define <4 x i32>@test_int_x86_avx512_mask_psllv4_si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   6560 ; X86-LABEL: test_int_x86_avx512_mask_psllv4_si:
   6561 ; X86:       # %bb.0:
   6562 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
   6563 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6564 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6565 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
   6566 ; X86-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
   6567 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6568 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6569 ; X86-NEXT:    retl # encoding: [0xc3]
   6570 ;
   6571 ; X64-LABEL: test_int_x86_avx512_mask_psllv4_si:
   6572 ; X64:       # %bb.0:
   6573 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x47,0xd9]
   6574 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6575 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x47,0xd1]
   6576 ; X64-NEXT:    vpsllvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x47,0xc1]
   6577 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   6578 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   6579 ; X64-NEXT:    retq # encoding: [0xc3]
   6580   %res = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   6581   %res1 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   6582   %res2 = call <4 x i32> @llvm.x86.avx512.mask.psllv4.si(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   6583   %res3 = add <4 x i32> %res, %res1
   6584   %res4 = add <4 x i32> %res3, %res2
   6585   ret <4 x i32> %res4
   6586 }
   6587 
   6588 declare <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   6589 
   6590 define <8 x i32>@test_int_x86_avx512_mask_psllv8_si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   6591 ; X86-LABEL: test_int_x86_avx512_mask_psllv8_si:
   6592 ; X86:       # %bb.0:
   6593 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
   6594 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6595 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6596 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
   6597 ; X86-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
   6598 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6599 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6600 ; X86-NEXT:    retl # encoding: [0xc3]
   6601 ;
   6602 ; X64-LABEL: test_int_x86_avx512_mask_psllv8_si:
   6603 ; X64:       # %bb.0:
   6604 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x47,0xd9]
   6605 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6606 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x47,0xd1]
   6607 ; X64-NEXT:    vpsllvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x47,0xc1]
   6608 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   6609 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   6610 ; X64-NEXT:    retq # encoding: [0xc3]
   6611   %res = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   6612   %res1 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   6613   %res2 = call <8 x i32> @llvm.x86.avx512.mask.psllv8.si(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   6614   %res3 = add <8 x i32> %res, %res1
   6615   %res4 = add <8 x i32> %res3, %res2
   6616   ret <8 x i32> %res4
   6617 }
   6618 
   6619 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8>, <4 x i32>, i8)
   6620 
   6621 define <4 x i32>@test_int_x86_avx512_mask_pmovzxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
   6622 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
   6623 ; X86:       # %bb.0:
   6624 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
   6625 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6626 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6627 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6628 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
   6629 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6630 ; X86-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
   6631 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6632 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6633 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6634 ; X86-NEXT:    retl # encoding: [0xc3]
   6635 ;
   6636 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_128:
   6637 ; X64:       # %bb.0:
   6638 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x31,0xd0]
   6639 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6640 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6641 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x31,0xc8]
   6642 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6643 ; X64-NEXT:    vpmovzxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x31,0xc0]
   6644 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6645 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6646 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6647 ; X64-NEXT:    retq # encoding: [0xc3]
   6648   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
   6649   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
   6650   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
   6651   %res3 = add <4 x i32> %res, %res1
   6652   %res4 = add <4 x i32> %res3, %res2
   6653   ret <4 x i32> %res4
   6654 }
   6655 
   6656 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8>, <8 x i32>, i8)
   6657 
   6658 define <8 x i32>@test_int_x86_avx512_mask_pmovzxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
   6659 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
   6660 ; X86:       # %bb.0:
   6661 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
   6662 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6663 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6664 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6665 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
   6666 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6667 ; X86-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
   6668 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6669 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6670 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6671 ; X86-NEXT:    retl # encoding: [0xc3]
   6672 ;
   6673 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_d_256:
   6674 ; X64:       # %bb.0:
   6675 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x31,0xd0]
   6676 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6677 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6678 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x31,0xc8]
   6679 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6680 ; X64-NEXT:    vpmovzxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x31,0xc0]
   6681 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero,xmm0[4],zero,zero,zero,xmm0[5],zero,zero,zero,xmm0[6],zero,zero,zero,xmm0[7],zero,zero,zero
   6682 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6683 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6684 ; X64-NEXT:    retq # encoding: [0xc3]
   6685   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
   6686   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
   6687   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
   6688   %res3 = add <8 x i32> %res, %res1
   6689   %res4 = add <8 x i32> %res3, %res2
   6690   ret <8 x i32> %res4
   6691 }
   6692 
   6693 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8>, <2 x i64>, i8)
   6694 
   6695 define <2 x i64>@test_int_x86_avx512_mask_pmovzxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
   6696 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
   6697 ; X86:       # %bb.0:
   6698 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
   6699 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6700 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6701 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6702 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
   6703 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6704 ; X86-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
   6705 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6706 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6707 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6708 ; X86-NEXT:    retl # encoding: [0xc3]
   6709 ;
   6710 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_128:
   6711 ; X64:       # %bb.0:
   6712 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x32,0xd0]
   6713 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6714 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6715 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x32,0xc8]
   6716 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6717 ; X64-NEXT:    vpmovzxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x32,0xc0]
   6718 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero
   6719 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6720 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6721 ; X64-NEXT:    retq # encoding: [0xc3]
   6722   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
   6723   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
   6724   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
   6725   %res3 = add <2 x i64> %res, %res1
   6726   %res4 = add <2 x i64> %res3, %res2
   6727   ret <2 x i64> %res4
   6728 }
   6729 
   6730 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8>, <4 x i64>, i8)
   6731 
   6732 define <4 x i64>@test_int_x86_avx512_mask_pmovzxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
   6733 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
   6734 ; X86:       # %bb.0:
   6735 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
   6736 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6737 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6738 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6739 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
   6740 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6741 ; X86-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
   6742 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6743 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6744 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6745 ; X86-NEXT:    retl # encoding: [0xc3]
   6746 ;
   6747 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxb_q_256:
   6748 ; X64:       # %bb.0:
   6749 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x32,0xd0]
   6750 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6751 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6752 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x32,0xc8]
   6753 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6754 ; X64-NEXT:    vpmovzxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x32,0xc0]
   6755 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,zero,zero,zero,zero,xmm0[1],zero,zero,zero,zero,zero,zero,zero,xmm0[2],zero,zero,zero,zero,zero,zero,zero,xmm0[3],zero,zero,zero,zero,zero,zero,zero
   6756 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6757 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6758 ; X64-NEXT:    retq # encoding: [0xc3]
   6759   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
   6760   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
   6761   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
   6762   %res3 = add <4 x i64> %res, %res1
   6763   %res4 = add <4 x i64> %res3, %res2
   6764   ret <4 x i64> %res4
   6765 }
   6766 
   6767 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32>, <2 x i64>, i8)
   6768 
   6769 define <2 x i64>@test_int_x86_avx512_mask_pmovzxd_q_128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2) {
   6770 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
   6771 ; X86:       # %bb.0:
   6772 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
   6773 ; X86-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero
   6774 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6775 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6776 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
   6777 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
   6778 ; X86-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
   6779 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
   6780 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6781 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6782 ; X86-NEXT:    retl # encoding: [0xc3]
   6783 ;
   6784 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_128:
   6785 ; X64:       # %bb.0:
   6786 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x35,0xd0]
   6787 ; X64-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero
   6788 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6789 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x35,0xc8]
   6790 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero
   6791 ; X64-NEXT:    vpmovzxdq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x35,0xc0]
   6792 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero
   6793 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6794 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6795 ; X64-NEXT:    retq # encoding: [0xc3]
   6796   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 %x2)
   6797   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> zeroinitializer, i8 %x2)
   6798   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxd.q.128(<4 x i32> %x0, <2 x i64> %x1, i8 -1)
   6799   %res3 = add <2 x i64> %res, %res1
   6800   %res4 = add <2 x i64> %res3, %res2
   6801   ret <2 x i64> %res4
   6802 }
   6803 
   6804 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32>, <4 x i64>, i8)
   6805 
   6806 define <4 x i64>@test_int_x86_avx512_mask_pmovzxd_q_256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2) {
   6807 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
   6808 ; X86:       # %bb.0:
   6809 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
   6810 ; X86-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6811 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6812 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6813 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
   6814 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6815 ; X86-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
   6816 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6817 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6818 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6819 ; X86-NEXT:    retl # encoding: [0xc3]
   6820 ;
   6821 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxd_q_256:
   6822 ; X64:       # %bb.0:
   6823 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x35,0xd0]
   6824 ; X64-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6825 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6826 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x35,0xc8]
   6827 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6828 ; X64-NEXT:    vpmovzxdq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x35,0xc0]
   6829 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6830 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6831 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6832 ; X64-NEXT:    retq # encoding: [0xc3]
   6833   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 %x2)
   6834   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> zeroinitializer, i8 %x2)
   6835   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxd.q.256(<4 x i32> %x0, <4 x i64> %x1, i8 -1)
   6836   %res3 = add <4 x i64> %res, %res1
   6837   %res4 = add <4 x i64> %res3, %res2
   6838   ret <4 x i64> %res4
   6839 }
   6840 
   6841 declare <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16>, <4 x i32>, i8)
   6842 
   6843 define <4 x i32>@test_int_x86_avx512_mask_pmovzxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
   6844 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
   6845 ; X86:       # %bb.0:
   6846 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
   6847 ; X86-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6848 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6849 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6850 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
   6851 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6852 ; X86-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
   6853 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6854 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6855 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6856 ; X86-NEXT:    retl # encoding: [0xc3]
   6857 ;
   6858 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_128:
   6859 ; X64:       # %bb.0:
   6860 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x33,0xd0]
   6861 ; X64-NEXT:    # xmm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6862 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6863 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x33,0xc8]
   6864 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6865 ; X64-NEXT:    vpmovzxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x33,0xc0]
   6866 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   6867 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   6868 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   6869 ; X64-NEXT:    retq # encoding: [0xc3]
   6870   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
   6871   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
   6872   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovzxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
   6873   %res3 = add <4 x i32> %res, %res1
   6874   %res4 = add <4 x i32> %res3, %res2
   6875   ret <4 x i32> %res4
   6876 }
   6877 
   6878 declare <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16>, <8 x i32>, i8)
   6879 
   6880 define <8 x i32>@test_int_x86_avx512_mask_pmovzxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
   6881 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
   6882 ; X86:       # %bb.0:
   6883 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
   6884 ; X86-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6885 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6886 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6887 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
   6888 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6889 ; X86-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
   6890 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6891 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6892 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6893 ; X86-NEXT:    retl # encoding: [0xc3]
   6894 ;
   6895 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_d_256:
   6896 ; X64:       # %bb.0:
   6897 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x33,0xd0]
   6898 ; X64-NEXT:    # ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6899 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6900 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x33,0xc8]
   6901 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6902 ; X64-NEXT:    vpmovzxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x33,0xc0]
   6903 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero,xmm0[4],zero,xmm0[5],zero,xmm0[6],zero,xmm0[7],zero
   6904 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   6905 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   6906 ; X64-NEXT:    retq # encoding: [0xc3]
   6907   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
   6908   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
   6909   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovzxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
   6910   %res3 = add <8 x i32> %res, %res1
   6911   %res4 = add <8 x i32> %res3, %res2
   6912   ret <8 x i32> %res4
   6913 }
   6914 
   6915 declare <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16>, <2 x i64>, i8)
   6916 
   6917 define <2 x i64>@test_int_x86_avx512_mask_pmovzxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
   6918 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
   6919 ; X86:       # %bb.0:
   6920 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
   6921 ; X86-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6922 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6923 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6924 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
   6925 ; X86-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6926 ; X86-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
   6927 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6928 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6929 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6930 ; X86-NEXT:    retl # encoding: [0xc3]
   6931 ;
   6932 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_128:
   6933 ; X64:       # %bb.0:
   6934 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x34,0xd0]
   6935 ; X64-NEXT:    # xmm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6936 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6937 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x34,0xc8]
   6938 ; X64-NEXT:    # xmm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6939 ; X64-NEXT:    vpmovzxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x34,0xc0]
   6940 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero
   6941 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   6942 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   6943 ; X64-NEXT:    retq # encoding: [0xc3]
   6944   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
   6945   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
   6946   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovzxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
   6947   %res3 = add <2 x i64> %res, %res1
   6948   %res4 = add <2 x i64> %res3, %res2
   6949   ret <2 x i64> %res4
   6950 }
   6951 
   6952 declare <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16>, <4 x i64>, i8)
   6953 
   6954 define <4 x i64>@test_int_x86_avx512_mask_pmovzxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
   6955 ; X86-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
   6956 ; X86:       # %bb.0:
   6957 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
   6958 ; X86-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6959 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6960 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6961 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
   6962 ; X86-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6963 ; X86-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
   6964 ; X86-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6965 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6966 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6967 ; X86-NEXT:    retl # encoding: [0xc3]
   6968 ;
   6969 ; X64-LABEL: test_int_x86_avx512_mask_pmovzxw_q_256:
   6970 ; X64:       # %bb.0:
   6971 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x34,0xd0]
   6972 ; X64-NEXT:    # ymm2 = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6973 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   6974 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x34,0xc8]
   6975 ; X64-NEXT:    # ymm1 {%k1} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6976 ; X64-NEXT:    vpmovzxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x34,0xc0]
   6977 ; X64-NEXT:    # ymm0 {%k1} {z} = xmm0[0],zero,zero,zero,xmm0[1],zero,zero,zero,xmm0[2],zero,zero,zero,xmm0[3],zero,zero,zero
   6978 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   6979 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   6980 ; X64-NEXT:    retq # encoding: [0xc3]
   6981   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
   6982   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
   6983   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovzxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
   6984   %res3 = add <4 x i64> %res, %res1
   6985   %res4 = add <4 x i64> %res3, %res2
   6986   ret <4 x i64> %res4
   6987 }
   6988 
   6989 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8>, <4 x i32>, i8)
   6990 
   6991 define <4 x i32>@test_int_x86_avx512_mask_pmovsxb_d_128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2) {
   6992 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
   6993 ; X86:       # %bb.0:
   6994 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
   6995 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   6996 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   6997 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
   6998 ; X86-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
   6999 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   7000 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   7001 ; X86-NEXT:    retl # encoding: [0xc3]
   7002 ;
   7003 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_128:
   7004 ; X64:       # %bb.0:
   7005 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x21,0xd0]
   7006 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7007 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x21,0xc8]
   7008 ; X64-NEXT:    vpmovsxbd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x21,0xc0]
   7009 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   7010 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   7011 ; X64-NEXT:    retq # encoding: [0xc3]
   7012   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 %x2)
   7013   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> zeroinitializer, i8 %x2)
   7014   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxb.d.128(<16 x i8> %x0, <4 x i32> %x1, i8 -1)
   7015   %res3 = add <4 x i32> %res, %res1
   7016   %res4 = add <4 x i32> %res3, %res2
   7017   ret <4 x i32> %res4
   7018 }
   7019 
   7020 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8>, <8 x i32>, i8)
   7021 
   7022 define <8 x i32>@test_int_x86_avx512_mask_pmovsxb_d_256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2) {
   7023 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
   7024 ; X86:       # %bb.0:
   7025 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
   7026 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7027 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7028 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
   7029 ; X86-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
   7030 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   7031 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   7032 ; X86-NEXT:    retl # encoding: [0xc3]
   7033 ;
   7034 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_d_256:
   7035 ; X64:       # %bb.0:
   7036 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x21,0xd0]
   7037 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7038 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x21,0xc8]
   7039 ; X64-NEXT:    vpmovsxbd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x21,0xc0]
   7040 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   7041 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   7042 ; X64-NEXT:    retq # encoding: [0xc3]
   7043   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 %x2)
   7044   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> zeroinitializer, i8 %x2)
   7045   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxb.d.256(<16 x i8> %x0, <8 x i32> %x1, i8 -1)
   7046   %res3 = add <8 x i32> %res, %res1
   7047   %res4 = add <8 x i32> %res3, %res2
   7048   ret <8 x i32> %res4
   7049 }
   7050 
   7051 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8>, <2 x i64>, i8)
   7052 
   7053 define <2 x i64>@test_int_x86_avx512_mask_pmovsxb_q_128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2) {
   7054 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
   7055 ; X86:       # %bb.0:
   7056 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
   7057 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7058 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7059 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
   7060 ; X86-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
   7061 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7062 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7063 ; X86-NEXT:    retl # encoding: [0xc3]
   7064 ;
   7065 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_128:
   7066 ; X64:       # %bb.0:
   7067 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x22,0xd0]
   7068 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7069 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x22,0xc8]
   7070 ; X64-NEXT:    vpmovsxbq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x22,0xc0]
   7071 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7072 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7073 ; X64-NEXT:    retq # encoding: [0xc3]
   7074   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 %x2)
   7075   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> zeroinitializer, i8 %x2)
   7076   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxb.q.128(<16 x i8> %x0, <2 x i64> %x1, i8 -1)
   7077   %res3 = add <2 x i64> %res, %res1
   7078   %res4 = add <2 x i64> %res3, %res2
   7079   ret <2 x i64> %res4
   7080 }
   7081 
   7082 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8>, <4 x i64>, i8)
   7083 
   7084 define <4 x i64>@test_int_x86_avx512_mask_pmovsxb_q_256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2) {
   7085 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
   7086 ; X86:       # %bb.0:
   7087 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
   7088 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7089 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7090 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
   7091 ; X86-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
   7092 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7093 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7094 ; X86-NEXT:    retl # encoding: [0xc3]
   7095 ;
   7096 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxb_q_256:
   7097 ; X64:       # %bb.0:
   7098 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x22,0xd0]
   7099 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7100 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x22,0xc8]
   7101 ; X64-NEXT:    vpmovsxbq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x22,0xc0]
   7102 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7103 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7104 ; X64-NEXT:    retq # encoding: [0xc3]
   7105   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 %x2)
   7106   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> zeroinitializer, i8 %x2)
   7107   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxb.q.256(<16 x i8> %x0, <4 x i64> %x1, i8 -1)
   7108   %res3 = add <4 x i64> %res, %res1
   7109   %res4 = add <4 x i64> %res3, %res2
   7110   ret <4 x i64> %res4
   7111 }
   7112 
   7113 declare <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16>, <4 x i32>, i8)
   7114 
   7115 define <4 x i32>@test_int_x86_avx512_mask_pmovsxw_d_128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2) {
   7116 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
   7117 ; X86:       # %bb.0:
   7118 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
   7119 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7120 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7121 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
   7122 ; X86-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
   7123 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   7124 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   7125 ; X86-NEXT:    retl # encoding: [0xc3]
   7126 ;
   7127 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_128:
   7128 ; X64:       # %bb.0:
   7129 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x23,0xd0]
   7130 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7131 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x23,0xc8]
   7132 ; X64-NEXT:    vpmovsxwd %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x23,0xc0]
   7133 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   7134 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   7135 ; X64-NEXT:    retq # encoding: [0xc3]
   7136   %res = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 %x2)
   7137   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> zeroinitializer, i8 %x2)
   7138   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pmovsxw.d.128(<8 x i16> %x0, <4 x i32> %x1, i8 -1)
   7139   %res3 = add <4 x i32> %res, %res1
   7140   %res4 = add <4 x i32> %res3, %res2
   7141   ret <4 x i32> %res4
   7142 }
   7143 
   7144 declare <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16>, <8 x i32>, i8)
   7145 
   7146 define <8 x i32>@test_int_x86_avx512_mask_pmovsxw_d_256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2) {
   7147 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
   7148 ; X86:       # %bb.0:
   7149 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
   7150 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7151 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7152 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
   7153 ; X86-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
   7154 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   7155 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   7156 ; X86-NEXT:    retl # encoding: [0xc3]
   7157 ;
   7158 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_d_256:
   7159 ; X64:       # %bb.0:
   7160 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x23,0xd0]
   7161 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7162 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x23,0xc8]
   7163 ; X64-NEXT:    vpmovsxwd %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x23,0xc0]
   7164 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   7165 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   7166 ; X64-NEXT:    retq # encoding: [0xc3]
   7167   %res = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 %x2)
   7168   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> zeroinitializer, i8 %x2)
   7169   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pmovsxw.d.256(<8 x i16> %x0, <8 x i32> %x1, i8 -1)
   7170   %res3 = add <8 x i32> %res, %res1
   7171   %res4 = add <8 x i32> %res3, %res2
   7172   ret <8 x i32> %res4
   7173 }
   7174 
   7175 declare <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16>, <2 x i64>, i8)
   7176 
   7177 define <2 x i64>@test_int_x86_avx512_mask_pmovsxw_q_128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2) {
   7178 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
   7179 ; X86:       # %bb.0:
   7180 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
   7181 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7182 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7183 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
   7184 ; X86-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
   7185 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7186 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7187 ; X86-NEXT:    retl # encoding: [0xc3]
   7188 ;
   7189 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_128:
   7190 ; X64:       # %bb.0:
   7191 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x24,0xd0]
   7192 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7193 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x24,0xc8]
   7194 ; X64-NEXT:    vpmovsxwq %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x24,0xc0]
   7195 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7196 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7197 ; X64-NEXT:    retq # encoding: [0xc3]
   7198   %res = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 %x2)
   7199   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> zeroinitializer, i8 %x2)
   7200   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pmovsxw.q.128(<8 x i16> %x0, <2 x i64> %x1, i8 -1)
   7201   %res3 = add <2 x i64> %res, %res1
   7202   %res4 = add <2 x i64> %res3, %res2
   7203   ret <2 x i64> %res4
   7204 }
   7205 
   7206 declare <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16>, <4 x i64>, i8)
   7207 
   7208 define <4 x i64>@test_int_x86_avx512_mask_pmovsxw_q_256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2) {
   7209 ; X86-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
   7210 ; X86:       # %bb.0:
   7211 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
   7212 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7213 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7214 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
   7215 ; X86-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
   7216 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7217 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7218 ; X86-NEXT:    retl # encoding: [0xc3]
   7219 ;
   7220 ; X64-LABEL: test_int_x86_avx512_mask_pmovsxw_q_256:
   7221 ; X64:       # %bb.0:
   7222 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x24,0xd0]
   7223 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7224 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x24,0xc8]
   7225 ; X64-NEXT:    vpmovsxwq %xmm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x24,0xc0]
   7226 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7227 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7228 ; X64-NEXT:    retq # encoding: [0xc3]
   7229   %res = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 %x2)
   7230   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> zeroinitializer, i8 %x2)
   7231   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pmovsxw.q.256(<8 x i16> %x0, <4 x i64> %x1, i8 -1)
   7232   %res3 = add <4 x i64> %res, %res1
   7233   %res4 = add <4 x i64> %res3, %res2
   7234   ret <4 x i64> %res4
   7235 }
   7236 
   7237 declare <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   7238 
   7239 define <2 x i64>@test_int_x86_avx512_mask_psra_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   7240 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_128:
   7241 ; X86:       # %bb.0:
   7242 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
   7243 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7244 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7245 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
   7246 ; X86-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
   7247 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   7248 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   7249 ; X86-NEXT:    retl # encoding: [0xc3]
   7250 ;
   7251 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_128:
   7252 ; X64:       # %bb.0:
   7253 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf1,0xfd,0x08,0xe2,0xd9]
   7254 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7255 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xe2,0xd1]
   7256 ; X64-NEXT:    vpsraq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xe2,0xc1]
   7257 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   7258 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   7259 ; X64-NEXT:    retq # encoding: [0xc3]
   7260   %res = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   7261   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   7262   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   7263   %res3 = add <2 x i64> %res, %res1
   7264   %res4 = add <2 x i64> %res3, %res2
   7265   ret <2 x i64> %res4
   7266 }
   7267 
   7268 declare <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64>, <2 x i64>, <4 x i64>, i8)
   7269 
   7270 define <4 x i64>@test_int_x86_avx512_mask_psra_q_256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   7271 ; X86-LABEL: test_int_x86_avx512_mask_psra_q_256:
   7272 ; X86:       # %bb.0:
   7273 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
   7274 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7275 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7276 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
   7277 ; X86-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
   7278 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   7279 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   7280 ; X86-NEXT:    retl # encoding: [0xc3]
   7281 ;
   7282 ; X64-LABEL: test_int_x86_avx512_mask_psra_q_256:
   7283 ; X64:       # %bb.0:
   7284 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm3 # encoding: [0x62,0xf1,0xfd,0x28,0xe2,0xd9]
   7285 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7286 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe2,0xd1]
   7287 ; X64-NEXT:    vpsraq %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xe2,0xc1]
   7288 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   7289 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   7290 ; X64-NEXT:    retq # encoding: [0xc3]
   7291   %res = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 %x3)
   7292   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   7293   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.q.256(<4 x i64> %x0, <2 x i64> %x1, <4 x i64> %x2, i8 -1)
   7294   %res3 = add <4 x i64> %res, %res1
   7295   %res4 = add <4 x i64> %res3, %res2
   7296   ret <4 x i64> %res4
   7297 }
   7298 
   7299 declare <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64>, i32, <2 x i64>, i8)
   7300 
   7301 define <2 x i64>@test_int_x86_avx512_mask_psra_qi_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
   7302 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_128:
   7303 ; X86:       # %bb.0:
   7304 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
   7305 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   7306 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7307 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
   7308 ; X86-NEXT:    vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
   7309 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7310 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7311 ; X86-NEXT:    retl # encoding: [0xc3]
   7312 ;
   7313 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_128:
   7314 ; X64:       # %bb.0:
   7315 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xe0,0x03]
   7316 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   7317 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xe0,0x03]
   7318 ; X64-NEXT:    vpsraq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xe0,0x03]
   7319 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   7320 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   7321 ; X64-NEXT:    retq # encoding: [0xc3]
   7322   %res = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
   7323   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
   7324   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psra.qi.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
   7325   %res3 = add <2 x i64> %res, %res1
   7326   %res4 = add <2 x i64> %res3, %res2
   7327   ret <2 x i64> %res4
   7328 }
   7329 
   7330 declare <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64>, i32, <4 x i64>, i8)
   7331 
   7332 define <4 x i64>@test_int_x86_avx512_mask_psra_qi_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
   7333 ; X86-LABEL: test_int_x86_avx512_mask_psra_qi_256:
   7334 ; X86:       # %bb.0:
   7335 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
   7336 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   7337 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7338 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
   7339 ; X86-NEXT:    vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
   7340 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7341 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7342 ; X86-NEXT:    retl # encoding: [0xc3]
   7343 ;
   7344 ; X64-LABEL: test_int_x86_avx512_mask_psra_qi_256:
   7345 ; X64:       # %bb.0:
   7346 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xe0,0x03]
   7347 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   7348 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xe0,0x03]
   7349 ; X64-NEXT:    vpsraq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xe0,0x03]
   7350 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   7351 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   7352 ; X64-NEXT:    retq # encoding: [0xc3]
   7353   %res = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
   7354   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
   7355   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psra.qi.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
   7356   %res3 = add <4 x i64> %res, %res1
   7357   %res4 = add <4 x i64> %res3, %res2
   7358   ret <4 x i64> %res4
   7359 }
   7360 
   7361 declare <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   7362 
   7363 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   7364 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128:
   7365 ; X86:       # %bb.0:
   7366 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
   7367 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7368 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7369 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
   7370 ; X86-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
   7371 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   7372 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   7373 ; X86-NEXT:    retl # encoding: [0xc3]
   7374 ;
   7375 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128:
   7376 ; X64:       # %bb.0:
   7377 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0xd9]
   7378 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7379 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x46,0xd1]
   7380 ; X64-NEXT:    vpsravq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x46,0xc1]
   7381 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   7382 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   7383 ; X64-NEXT:    retq # encoding: [0xc3]
   7384   %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   7385   %res1 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   7386   %res2 = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   7387   %res3 = add <2 x i64> %res, %res1
   7388   %res4 = add <2 x i64> %res3, %res2
   7389   ret <2 x i64> %res4
   7390 }
   7391 
   7392 define <2 x i64>@test_int_x86_avx512_mask_psrav_q_128_const(i8 %x3) {
   7393 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
   7394 ; X86:       # %bb.0:
   7395 ; X86-NEXT:    vmovdqa {{\.LCPI.*}}, %xmm0 # EVEX TO VEX Compression xmm0 = [2,0,4294967287,4294967295]
   7396 ; X86-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   7397 ; X86-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}, kind: FK_Data_4
   7398 ; X86-NEXT:    vpsravq {{\.LCPI.*}}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
   7399 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
   7400 ; X86-NEXT:    retl # encoding: [0xc3]
   7401 ;
   7402 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_128_const:
   7403 ; X64:       # %bb.0:
   7404 ; X64-NEXT:    vmovdqa {{.*}}(%rip), %xmm0 # EVEX TO VEX Compression xmm0 = [2,18446744073709551607]
   7405 ; X64-NEXT:    # encoding: [0xc5,0xf9,0x6f,0x05,A,A,A,A]
   7406 ; X64-NEXT:    # fixup A - offset: 4, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   7407 ; X64-NEXT:    vpsravq {{.*}}(%rip), %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x08,0x46,0x05,A,A,A,A]
   7408 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   7409 ; X64-NEXT:    retq # encoding: [0xc3]
   7410   %res = call <2 x i64> @llvm.x86.avx512.mask.psrav.q.128(<2 x i64> <i64 2, i64 -9>, <2 x i64> <i64 1, i64 90>, <2 x i64> zeroinitializer, i8 -1)
   7411   ret <2 x i64> %res
   7412 }
   7413 
   7414 declare <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   7415 
   7416 define <4 x i64>@test_int_x86_avx512_mask_psrav_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   7417 ; X86-LABEL: test_int_x86_avx512_mask_psrav_q_256:
   7418 ; X86:       # %bb.0:
   7419 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
   7420 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7421 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7422 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
   7423 ; X86-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
   7424 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   7425 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   7426 ; X86-NEXT:    retl # encoding: [0xc3]
   7427 ;
   7428 ; X64-LABEL: test_int_x86_avx512_mask_psrav_q_256:
   7429 ; X64:       # %bb.0:
   7430 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x46,0xd9]
   7431 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7432 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x46,0xd1]
   7433 ; X64-NEXT:    vpsravq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x46,0xc1]
   7434 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   7435 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   7436 ; X64-NEXT:    retq # encoding: [0xc3]
   7437   %res = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   7438   %res1 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   7439   %res2 = call <4 x i64> @llvm.x86.avx512.mask.psrav.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   7440   %res3 = add <4 x i64> %res, %res1
   7441   %res4 = add <4 x i64> %res3, %res2
   7442   ret <4 x i64> %res4
   7443 }
   7444 
   7445 declare <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32>, <2 x double>, i8)
   7446 
   7447 define <2 x double>@test_int_x86_avx512_mask_cvt_dq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
   7448 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
   7449 ; X86:       # %bb.0:
   7450 ; X86-NEXT:    vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
   7451 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7452 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7453 ; X86-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
   7454 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   7455 ; X86-NEXT:    retl # encoding: [0xc3]
   7456 ;
   7457 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_128:
   7458 ; X64:       # %bb.0:
   7459 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0xe6,0xd0]
   7460 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7461 ; X64-NEXT:    vcvtdq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0xe6,0xc8]
   7462 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   7463 ; X64-NEXT:    retq # encoding: [0xc3]
   7464   %res = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
   7465   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtdq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
   7466   %res2 = fadd <2 x double> %res, %res1
   7467   ret <2 x double> %res2
   7468 }
   7469 
   7470 declare <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32>, <4 x double>, i8)
   7471 
   7472 define <4 x double>@test_int_x86_avx512_mask_cvt_dq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
   7473 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
   7474 ; X86:       # %bb.0:
   7475 ; X86-NEXT:    vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
   7476 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7477 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7478 ; X86-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
   7479 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   7480 ; X86-NEXT:    retl # encoding: [0xc3]
   7481 ;
   7482 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2pd_256:
   7483 ; X64:       # %bb.0:
   7484 ; X64-NEXT:    vcvtdq2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0xe6,0xd0]
   7485 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7486 ; X64-NEXT:    vcvtdq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0xe6,0xc8]
   7487 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   7488 ; X64-NEXT:    retq # encoding: [0xc3]
   7489   %res = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
   7490   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtdq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
   7491   %res2 = fadd <4 x double> %res, %res1
   7492   ret <4 x double> %res2
   7493 }
   7494 
   7495 declare <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32>, <2 x double>, i8)
   7496 
   7497 define <2 x double>@test_int_x86_avx512_mask_cvt_udq2pd_128(<4 x i32> %x0, <2 x double> %x1, i8 %x2) {
   7498 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
   7499 ; X86:       # %bb.0:
   7500 ; X86-NEXT:    vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
   7501 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7502 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7503 ; X86-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
   7504 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   7505 ; X86-NEXT:    retl # encoding: [0xc3]
   7506 ;
   7507 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_128:
   7508 ; X64:       # %bb.0:
   7509 ; X64-NEXT:    vcvtudq2pd %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7e,0x08,0x7a,0xd0]
   7510 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7511 ; X64-NEXT:    vcvtudq2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x7a,0xc8]
   7512 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   7513 ; X64-NEXT:    retq # encoding: [0xc3]
   7514   %res = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 %x2)
   7515   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtudq2pd.128(<4 x i32> %x0, <2 x double> %x1, i8 -1)
   7516   %res2 = fadd <2 x double> %res, %res1
   7517   ret <2 x double> %res2
   7518 }
   7519 
   7520 declare <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32>, <4 x double>, i8)
   7521 
   7522 define <4 x double>@test_int_x86_avx512_mask_cvt_udq2pd_256(<4 x i32> %x0, <4 x double> %x1, i8 %x2) {
   7523 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
   7524 ; X86:       # %bb.0:
   7525 ; X86-NEXT:    vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
   7526 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7527 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7528 ; X86-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
   7529 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   7530 ; X86-NEXT:    retl # encoding: [0xc3]
   7531 ;
   7532 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2pd_256:
   7533 ; X64:       # %bb.0:
   7534 ; X64-NEXT:    vcvtudq2pd %xmm0, %ymm2 # encoding: [0x62,0xf1,0x7e,0x28,0x7a,0xd0]
   7535 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7536 ; X64-NEXT:    vcvtudq2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x7a,0xc8]
   7537 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   7538 ; X64-NEXT:    retq # encoding: [0xc3]
   7539   %res = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 %x2)
   7540   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtudq2pd.256(<4 x i32> %x0, <4 x double> %x1, i8 -1)
   7541   %res2 = fadd <4 x double> %res, %res1
   7542   ret <4 x double> %res2
   7543 }
   7544 
   7545 declare <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32>, <4 x i32>, i32, <4 x i32>, i8)
   7546 
   7547 define <4 x i32>@test_int_x86_avx512_mask_valign_d_128(<4 x i32> %x0, <4 x i32> %x1,<4 x i32> %x3, i8 %x4) {
   7548 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_128:
   7549 ; X86:       # %bb.0:
   7550 ; X86-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
   7551 ; X86-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   7552 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7553 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7554 ; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
   7555 ; X86-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
   7556 ; X86-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
   7557 ; X86-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
   7558 ; X86-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
   7559 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   7560 ; X86-NEXT:    retl # encoding: [0xc3]
   7561 ;
   7562 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_128:
   7563 ; X64:       # %bb.0:
   7564 ; X64-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
   7565 ; X64-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   7566 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7567 ; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x09,0x03,0xd1,0x02]
   7568 ; X64-NEXT:    # xmm2 {%k1} = xmm1[2,3],xmm0[0,1]
   7569 ; X64-NEXT:    valignd $2, %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0x89,0x03,0xc1,0x02]
   7570 ; X64-NEXT:    # xmm0 {%k1} {z} = xmm1[2,3],xmm0[0,1]
   7571 ; X64-NEXT:    vpaddd %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe1,0xfe,0xc0]
   7572 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   7573 ; X64-NEXT:    retq # encoding: [0xc3]
   7574   %res = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 %x4)
   7575   %res1 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> %x3, i8 -1)
   7576   %res2 = call <4 x i32> @llvm.x86.avx512.mask.valign.d.128(<4 x i32> %x0, <4 x i32> %x1, i32 2, <4 x i32> zeroinitializer,i8 %x4)
   7577   %res3 = add <4 x i32> %res, %res1
   7578   %res4 = add <4 x i32> %res3, %res2
   7579   ret <4 x i32> %res4
   7580 }
   7581 
   7582 declare <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32>, <8 x i32>, i32, <8 x i32>, i8)
   7583 
   7584 define <8 x i32>@test_int_x86_avx512_mask_valign_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x3, i8 %x4) {
   7585 ; X86-LABEL: test_int_x86_avx512_mask_valign_d_256:
   7586 ; X86:       # %bb.0:
   7587 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
   7588 ; X86-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
   7589 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7590 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7591 ; X86-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
   7592 ; X86-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
   7593 ; X86-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   7594 ; X86-NEXT:    retl # encoding: [0xc3]
   7595 ;
   7596 ; X64-LABEL: test_int_x86_avx512_mask_valign_d_256:
   7597 ; X64:       # %bb.0:
   7598 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
   7599 ; X64-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
   7600 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7601 ; X64-NEXT:    valignd $6, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x03,0xd1,0x06]
   7602 ; X64-NEXT:    # ymm2 {%k1} = ymm1[6,7],ymm0[0,1,2,3,4,5]
   7603 ; X64-NEXT:    vpaddd %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc3]
   7604 ; X64-NEXT:    retq # encoding: [0xc3]
   7605   %res = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 %x4)
   7606   %res1 = call <8 x i32> @llvm.x86.avx512.mask.valign.d.256(<8 x i32> %x0, <8 x i32> %x1, i32 6, <8 x i32> %x3, i8 -1)
   7607   %res2 = add <8 x i32> %res, %res1
   7608   ret <8 x i32> %res2
   7609 }
   7610 
   7611 declare <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64>, <2 x i64>, i32, <2 x i64>, i8)
   7612 
   7613 define <2 x i64>@test_int_x86_avx512_mask_valign_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x3, i8 %x4) {
   7614 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_128:
   7615 ; X86:       # %bb.0:
   7616 ; X86-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
   7617 ; X86-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   7618 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7619 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7620 ; X86-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
   7621 ; X86-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
   7622 ; X86-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   7623 ; X86-NEXT:    retl # encoding: [0xc3]
   7624 ;
   7625 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_128:
   7626 ; X64:       # %bb.0:
   7627 ; X64-NEXT:    vpalignr $8, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x79,0x0f,0xd9,0x08]
   7628 ; X64-NEXT:    # xmm3 = xmm1[8,9,10,11,12,13,14,15],xmm0[0,1,2,3,4,5,6,7]
   7629 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7630 ; X64-NEXT:    valignq $1, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0x03,0xd1,0x01]
   7631 ; X64-NEXT:    # xmm2 {%k1} = xmm1[1],xmm0[0]
   7632 ; X64-NEXT:    vpaddq %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc3]
   7633 ; X64-NEXT:    retq # encoding: [0xc3]
   7634   %res = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 %x4)
   7635   %res1 = call <2 x i64> @llvm.x86.avx512.mask.valign.q.128(<2 x i64> %x0, <2 x i64> %x1, i32 1, <2 x i64> %x3, i8 -1)
   7636   %res2 = add <2 x i64> %res, %res1
   7637   ret <2 x i64> %res2
   7638 }
   7639 
   7640 declare <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64>, <4 x i64>, i32, <4 x i64>, i8)
   7641 
   7642 define <4 x i64>@test_int_x86_avx512_mask_valign_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x3, i8 %x4) {
   7643 ; X86-LABEL: test_int_x86_avx512_mask_valign_q_256:
   7644 ; X86:       # %bb.0:
   7645 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
   7646 ; X86-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
   7647 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7648 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7649 ; X86-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
   7650 ; X86-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
   7651 ; X86-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   7652 ; X86-NEXT:    retl # encoding: [0xc3]
   7653 ;
   7654 ; X64-LABEL: test_int_x86_avx512_mask_valign_q_256:
   7655 ; X64:       # %bb.0:
   7656 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf3,0xfd,0x28,0x03,0xd9,0x03]
   7657 ; X64-NEXT:    # ymm3 = ymm1[3],ymm0[0,1,2]
   7658 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7659 ; X64-NEXT:    valignq $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0x03,0xd1,0x03]
   7660 ; X64-NEXT:    # ymm2 {%k1} = ymm1[3],ymm0[0,1,2]
   7661 ; X64-NEXT:    vpaddq %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc3]
   7662 ; X64-NEXT:    retq # encoding: [0xc3]
   7663   %res = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 %x4)
   7664   %res1 = call <4 x i64> @llvm.x86.avx512.mask.valign.q.256(<4 x i64> %x0, <4 x i64> %x1, i32 3, <4 x i64> %x3, i8 -1)
   7665   %res2 = add <4 x i64> %res, %res1
   7666   ret <4 x i64> %res2
   7667 }
   7668 
   7669 declare <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
   7670 
   7671 define <4 x double>@test_int_x86_avx512_mask_vpermilvar_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
   7672 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
   7673 ; X86:       # %bb.0:
   7674 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
   7675 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7676 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7677 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
   7678 ; X86-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
   7679 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   7680 ; X86-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
   7681 ; X86-NEXT:    retl # encoding: [0xc3]
   7682 ;
   7683 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_256:
   7684 ; X64:       # %bb.0:
   7685 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0d,0xd9]
   7686 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7687 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x0d,0xd1]
   7688 ; X64-NEXT:    vpermilpd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x0d,0xc1]
   7689 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   7690 ; X64-NEXT:    vaddpd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0x58,0xc0]
   7691 ; X64-NEXT:    retq # encoding: [0xc3]
   7692   %res = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
   7693   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
   7694   %res2 = call <4 x double> @llvm.x86.avx512.mask.vpermilvar.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
   7695   %res3 = fadd <4 x double> %res, %res1
   7696   %res4 = fadd <4 x double> %res2, %res3
   7697   ret <4 x double> %res4
   7698 }
   7699 
   7700 declare <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
   7701 
   7702 define <2 x double>@test_int_x86_avx512_mask_vpermilvar_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
   7703 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
   7704 ; X86:       # %bb.0:
   7705 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
   7706 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7707 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7708 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
   7709 ; X86-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
   7710 ; X86-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
   7711 ; X86-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
   7712 ; X86-NEXT:    retl # encoding: [0xc3]
   7713 ;
   7714 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_pd_128:
   7715 ; X64:       # %bb.0:
   7716 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0d,0xd9]
   7717 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7718 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x0d,0xd1]
   7719 ; X64-NEXT:    vpermilpd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x0d,0xc1]
   7720 ; X64-NEXT:    vaddpd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0x58,0xc0]
   7721 ; X64-NEXT:    vaddpd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x58,0xc3]
   7722 ; X64-NEXT:    retq # encoding: [0xc3]
   7723   %res = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
   7724   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> zeroinitializer, i8 %x3)
   7725   %res2 = call <2 x double> @llvm.x86.avx512.mask.vpermilvar.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
   7726   %res3 = fadd <2 x double> %res, %res1
   7727   %res4 = fadd <2 x double> %res3, %res2
   7728   ret <2 x double> %res4
   7729 }
   7730 
   7731 declare <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
   7732 
   7733 define <8 x float>@test_int_x86_avx512_mask_vpermilvar_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
   7734 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
   7735 ; X86:       # %bb.0:
   7736 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
   7737 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7738 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7739 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
   7740 ; X86-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
   7741 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   7742 ; X86-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
   7743 ; X86-NEXT:    retl # encoding: [0xc3]
   7744 ;
   7745 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_256:
   7746 ; X64:       # %bb.0:
   7747 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x0c,0xd9]
   7748 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7749 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x0c,0xd1]
   7750 ; X64-NEXT:    vpermilps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x0c,0xc1]
   7751 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   7752 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
   7753 ; X64-NEXT:    retq # encoding: [0xc3]
   7754   %res = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
   7755   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
   7756   %res2 = call <8 x float> @llvm.x86.avx512.mask.vpermilvar.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
   7757   %res3 = fadd <8 x float> %res, %res1
   7758   %res4 = fadd <8 x float> %res3, %res2
   7759   ret <8 x float> %res4
   7760 }
   7761 
   7762 declare <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
   7763 
   7764 define <4 x float>@test_int_x86_avx512_mask_vpermilvar_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
   7765 ; X86-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
   7766 ; X86:       # %bb.0:
   7767 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
   7768 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7769 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7770 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
   7771 ; X86-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
   7772 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
   7773 ; X86-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
   7774 ; X86-NEXT:    retl # encoding: [0xc3]
   7775 ;
   7776 ; X64-LABEL: test_int_x86_avx512_mask_vpermilvar_ps_128:
   7777 ; X64:       # %bb.0:
   7778 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x0c,0xd9]
   7779 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7780 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x0c,0xd1]
   7781 ; X64-NEXT:    vpermilps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x0c,0xc1]
   7782 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
   7783 ; X64-NEXT:    vaddps %xmm0, %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe0,0x58,0xc0]
   7784 ; X64-NEXT:    retq # encoding: [0xc3]
   7785   %res = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
   7786   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> zeroinitializer, i8 %x3)
   7787   %res2 = call <4 x float> @llvm.x86.avx512.mask.vpermilvar.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
   7788   %res3 = fadd <4 x float> %res, %res1
   7789   %res4 = fadd <4 x float> %res2, %res3
   7790   ret <4 x float> %res4
   7791 }
   7792 
   7793 declare <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float>, i32, <4 x float>, i8)
   7794 
   7795 define <4 x float>@test_int_x86_avx512_mask_vextractf32x4_256(<8 x float> %x0, <4 x float> %x2, i8 %x3) {
   7796 ; X86-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
   7797 ; X86:       # %bb.0:
   7798 ; X86-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
   7799 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7800 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7801 ; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
   7802 ; X86-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
   7803 ; X86-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   7804 ; X86-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
   7805 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   7806 ; X86-NEXT:    retl # encoding: [0xc3]
   7807 ;
   7808 ; X64-LABEL: test_int_x86_avx512_mask_vextractf32x4_256:
   7809 ; X64:       # %bb.0:
   7810 ; X64-NEXT:    vextractf128 $1, %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x19,0xc2,0x01]
   7811 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7812 ; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x19,0xc1,0x01]
   7813 ; X64-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x19,0xc0,0x01]
   7814 ; X64-NEXT:    vaddps %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc0]
   7815 ; X64-NEXT:    vaddps %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe8,0x58,0xc0]
   7816 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   7817 ; X64-NEXT:    retq # encoding: [0xc3]
   7818   %res  = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> %x2, i8 %x3)
   7819   %res1 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 %x3)
   7820   %res2 = call <4 x float> @llvm.x86.avx512.mask.vextractf32x4.256(<8 x float> %x0, i32 1, <4 x float> zeroinitializer, i8 -1)
   7821   %res3 = fadd <4 x float> %res, %res1
   7822   %res4 = fadd <4 x float> %res2, %res3
   7823   ret <4 x float> %res4
   7824 }
   7825 
   7826 declare <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float>, <4 x float>, i32, <8 x float>, i8)
   7827 
   7828 define <8 x float>@test_int_x86_avx512_mask_insertf32x4_256(<8 x float> %x0, <4 x float> %x1, <8 x float> %x3, i8 %x4) {
   7829 ; X86-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
   7830 ; X86:       # %bb.0:
   7831 ; X86-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
   7832 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7833 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7834 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
   7835 ; X86-NEXT:    vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
   7836 ; X86-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
   7837 ; X86-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
   7838 ; X86-NEXT:    retl # encoding: [0xc3]
   7839 ;
   7840 ; X64-LABEL: test_int_x86_avx512_mask_insertf32x4_256:
   7841 ; X64:       # %bb.0:
   7842 ; X64-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd9,0x01]
   7843 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7844 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xd1,0x01]
   7845 ; X64-NEXT:    vaddps %ymm3, %ymm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xd3]
   7846 ; X64-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc1,0x01]
   7847 ; X64-NEXT:    vaddps %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc2]
   7848 ; X64-NEXT:    retq # encoding: [0xc3]
   7849   %res = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 %x4)
   7850   %res1 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> %x3, i8 -1)
   7851   %res2 = call <8 x float> @llvm.x86.avx512.mask.insertf32x4.256(<8 x float> %x0, <4 x float> %x1, i32 1, <8 x float> zeroinitializer, i8 %x4)
   7852   %res3 = fadd <8 x float> %res, %res1
   7853   %res4 = fadd <8 x float> %res2, %res3
   7854   ret <8 x float> %res4
   7855 }
   7856 
   7857 declare <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32>, <4 x i32>, i32, <8 x i32>, i8)
   7858 
   7859 define <8 x i32>@test_int_x86_avx512_mask_inserti32x4_256(<8 x i32> %x0, <4 x i32> %x1, <8 x i32> %x3, i8 %x4) {
   7860 ; X86-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
   7861 ; X86:       # %bb.0:
   7862 ; X86-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
   7863 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7864 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7865 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
   7866 ; X86-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
   7867 ; X86-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
   7868 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   7869 ; X86-NEXT:    retl # encoding: [0xc3]
   7870 ;
   7871 ; X64-LABEL: test_int_x86_avx512_mask_inserti32x4_256:
   7872 ; X64:       # %bb.0:
   7873 ; X64-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd9,0x01]
   7874 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7875 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xd1,0x01]
   7876 ; X64-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc1,0x01]
   7877 ; X64-NEXT:    vpaddd %ymm0, %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xe5,0xfe,0xc0]
   7878 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   7879 ; X64-NEXT:    retq # encoding: [0xc3]
   7880 
   7881   %res = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 %x4)
   7882   %res1 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> %x3, i8 -1)
   7883   %res2 = call <8 x i32> @llvm.x86.avx512.mask.inserti32x4.256(<8 x i32> %x0, <4 x i32> %x1, i32 1, <8 x i32> zeroinitializer, i8 %x4)
   7884   %res3 = add <8 x i32> %res, %res1
   7885   %res4 = add <8 x i32> %res2, %res3
   7886   ret <8 x i32> %res4
   7887 }
   7888 
   7889 define <8 x float> @test_mm512_maskz_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   7890 ; X86-LABEL: test_mm512_maskz_max_ps_256:
   7891 ; X86:       # %bb.0:
   7892 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7893 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7894 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
   7895 ; X86-NEXT:    retl # encoding: [0xc3]
   7896 ;
   7897 ; X64-LABEL: test_mm512_maskz_max_ps_256:
   7898 ; X64:       # %bb.0:
   7899 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7900 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5f,0xc1]
   7901 ; X64-NEXT:    retq # encoding: [0xc3]
   7902   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   7903   ret <8 x float> %res
   7904 }
   7905 
   7906 define <8 x float> @test_mm512_mask_max_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   7907 ; X86-LABEL: test_mm512_mask_max_ps_256:
   7908 ; X86:       # %bb.0:
   7909 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7910 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7911 ; X86-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
   7912 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   7913 ; X86-NEXT:    retl # encoding: [0xc3]
   7914 ;
   7915 ; X64-LABEL: test_mm512_mask_max_ps_256:
   7916 ; X64:       # %bb.0:
   7917 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7918 ; X64-NEXT:    vmaxps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5f,0xd1]
   7919 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   7920 ; X64-NEXT:    retq # encoding: [0xc3]
   7921   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   7922   ret <8 x float> %res
   7923 }
   7924 
   7925 define <8 x float> @test_mm512_max_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   7926 ; CHECK-LABEL: test_mm512_max_ps_256:
   7927 ; CHECK:       # %bb.0:
   7928 ; CHECK-NEXT:    vmaxps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5f,0xc1]
   7929 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   7930   %res = call <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   7931   ret <8 x float> %res
   7932 }
   7933 declare <8 x float> @llvm.x86.avx512.mask.max.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   7934 
   7935 define <4 x float> @test_mm512_maskz_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   7936 ; X86-LABEL: test_mm512_maskz_max_ps_128:
   7937 ; X86:       # %bb.0:
   7938 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7939 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7940 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
   7941 ; X86-NEXT:    retl # encoding: [0xc3]
   7942 ;
   7943 ; X64-LABEL: test_mm512_maskz_max_ps_128:
   7944 ; X64:       # %bb.0:
   7945 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7946 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5f,0xc1]
   7947 ; X64-NEXT:    retq # encoding: [0xc3]
   7948   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   7949   ret <4 x float> %res
   7950 }
   7951 
   7952 define <4 x float> @test_mm512_mask_max_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   7953 ; X86-LABEL: test_mm512_mask_max_ps_128:
   7954 ; X86:       # %bb.0:
   7955 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7956 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7957 ; X86-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
   7958 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   7959 ; X86-NEXT:    retl # encoding: [0xc3]
   7960 ;
   7961 ; X64-LABEL: test_mm512_mask_max_ps_128:
   7962 ; X64:       # %bb.0:
   7963 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7964 ; X64-NEXT:    vmaxps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5f,0xd1]
   7965 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   7966 ; X64-NEXT:    retq # encoding: [0xc3]
   7967   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   7968   ret <4 x float> %res
   7969 }
   7970 
   7971 define <4 x float> @test_mm512_max_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   7972 ; CHECK-LABEL: test_mm512_max_ps_128:
   7973 ; CHECK:       # %bb.0:
   7974 ; CHECK-NEXT:    vmaxps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5f,0xc1]
   7975 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   7976   %res = call <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   7977   ret <4 x float> %res
   7978 }
   7979 declare <4 x float> @llvm.x86.avx512.mask.max.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   7980 
   7981 define <8 x float> @test_mm512_maskz_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   7982 ; X86-LABEL: test_mm512_maskz_min_ps_256:
   7983 ; X86:       # %bb.0:
   7984 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   7985 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   7986 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
   7987 ; X86-NEXT:    retl # encoding: [0xc3]
   7988 ;
   7989 ; X64-LABEL: test_mm512_maskz_min_ps_256:
   7990 ; X64:       # %bb.0:
   7991 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   7992 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x5d,0xc1]
   7993 ; X64-NEXT:    retq # encoding: [0xc3]
   7994   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 %mask)
   7995   ret <8 x float> %res
   7996 }
   7997 
   7998 define <8 x float> @test_mm512_mask_min_ps_256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask) {
   7999 ; X86-LABEL: test_mm512_mask_min_ps_256:
   8000 ; X86:       # %bb.0:
   8001 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8002 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   8003 ; X86-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
   8004 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   8005 ; X86-NEXT:    retl # encoding: [0xc3]
   8006 ;
   8007 ; X64-LABEL: test_mm512_mask_min_ps_256:
   8008 ; X64:       # %bb.0:
   8009 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   8010 ; X64-NEXT:    vminps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5d,0xd1]
   8011 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   8012 ; X64-NEXT:    retq # encoding: [0xc3]
   8013   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %src, i8 %mask)
   8014   ret <8 x float> %res
   8015 }
   8016 
   8017 define <8 x float> @test_mm512_min_ps_256(<8 x float> %a0, <8 x float> %a1, i8 %mask) {
   8018 ; CHECK-LABEL: test_mm512_min_ps_256:
   8019 ; CHECK:       # %bb.0:
   8020 ; CHECK-NEXT:    vminps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5d,0xc1]
   8021 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8022   %res = call <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float>zeroinitializer, i8 -1)
   8023   ret <8 x float> %res
   8024 }
   8025 declare <8 x float> @llvm.x86.avx512.mask.min.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   8026 
   8027 define <4 x float> @test_mm512_maskz_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   8028 ; X86-LABEL: test_mm512_maskz_min_ps_128:
   8029 ; X86:       # %bb.0:
   8030 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8031 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   8032 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
   8033 ; X86-NEXT:    retl # encoding: [0xc3]
   8034 ;
   8035 ; X64-LABEL: test_mm512_maskz_min_ps_128:
   8036 ; X64:       # %bb.0:
   8037 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   8038 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0x89,0x5d,0xc1]
   8039 ; X64-NEXT:    retq # encoding: [0xc3]
   8040   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 %mask)
   8041   ret <4 x float> %res
   8042 }
   8043 
   8044 define <4 x float> @test_mm512_mask_min_ps_128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask) {
   8045 ; X86-LABEL: test_mm512_mask_min_ps_128:
   8046 ; X86:       # %bb.0:
   8047 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8048 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   8049 ; X86-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
   8050 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   8051 ; X86-NEXT:    retl # encoding: [0xc3]
   8052 ;
   8053 ; X64-LABEL: test_mm512_mask_min_ps_128:
   8054 ; X64:       # %bb.0:
   8055 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   8056 ; X64-NEXT:    vminps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5d,0xd1]
   8057 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   8058 ; X64-NEXT:    retq # encoding: [0xc3]
   8059   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %src, i8 %mask)
   8060   ret <4 x float> %res
   8061 }
   8062 
   8063 define <4 x float> @test_mm512_min_ps_128(<4 x float> %a0, <4 x float> %a1, i8 %mask) {
   8064 ; CHECK-LABEL: test_mm512_min_ps_128:
   8065 ; CHECK:       # %bb.0:
   8066 ; CHECK-NEXT:    vminps %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5d,0xc1]
   8067 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8068   %res = call <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float>zeroinitializer, i8 -1)
   8069   ret <4 x float> %res
   8070 }
   8071 declare <4 x float> @llvm.x86.avx512.mask.min.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   8072 
   8073 define <8 x i8> @test_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   8074 ; CHECK-LABEL: test_cmp_d_256:
   8075 ; CHECK:       # %bb.0:
   8076 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
   8077 ; CHECK-NEXT:    vpcmpgtd %ymm0, %ymm1, %k1 # encoding: [0x62,0xf1,0x75,0x28,0x66,0xc8]
   8078 ; CHECK-NEXT:    vpcmpled %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd1,0x02]
   8079 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
   8080 ; CHECK-NEXT:    vpcmpnltd %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xe1,0x05]
   8081 ; CHECK-NEXT:    vpcmpgtd %ymm1, %ymm0, %k5 # encoding: [0x62,0xf1,0x7d,0x28,0x66,0xe9]
   8082 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8083 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8084 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8085 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8086 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8087 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8088 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8089 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8090 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8091 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8092 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8093 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8094 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8095 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
   8096 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8097 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8098 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8099   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
   8100   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8101   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
   8102   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8103   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
   8104   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8105   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
   8106   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8107   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
   8108   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8109   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
   8110   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8111   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
   8112   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8113   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
   8114   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8115   ret <8 x i8> %vec7
   8116 }
   8117 
   8118 define <8 x i8> @test_mask_cmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
   8119 ; X86-LABEL: test_mask_cmp_d_256:
   8120 ; X86:       # %bb.0:
   8121 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   8122 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   8123 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
   8124 ; X86-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
   8125 ; X86-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
   8126 ; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
   8127 ; X86-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
   8128 ; X86-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
   8129 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
   8130 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8131 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   8132 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
   8133 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   8134 ; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
   8135 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   8136 ; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
   8137 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   8138 ; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
   8139 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   8140 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   8141 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   8142 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8143 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8144 ; X86-NEXT:    retl # encoding: [0xc3]
   8145 ;
   8146 ; X64-LABEL: test_mask_cmp_d_256:
   8147 ; X64:       # %bb.0:
   8148 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   8149 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
   8150 ; X64-NEXT:    vpcmpgtd %ymm0, %ymm1, %k2 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x66,0xd0]
   8151 ; X64-NEXT:    vpcmpled %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xd9,0x02]
   8152 ; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
   8153 ; X64-NEXT:    vpcmpnltd %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe9,0x05]
   8154 ; X64-NEXT:    vpcmpgtd %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x66,0xc9]
   8155 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8156 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8157 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8158 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8159 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8160 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8161 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8162 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8163 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8164 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8165 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8166 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8167 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8168 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   8169 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8170 ; X64-NEXT:    retq # encoding: [0xc3]
   8171   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
   8172   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8173   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
   8174   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8175   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
   8176   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8177   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
   8178   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8179   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
   8180   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8181   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
   8182   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8183   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
   8184   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8185   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
   8186   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8187   ret <8 x i8> %vec7
   8188 }
   8189 
   8190 declare i8 @llvm.x86.avx512.mask.cmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
   8191 
   8192 define <8 x i8> @test_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1) {
   8193 ; CHECK-LABEL: test_ucmp_d_256:
   8194 ; CHECK:       # %bb.0:
   8195 ; CHECK-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7d,0x28,0x76,0xc1]
   8196 ; CHECK-NEXT:    vpcmpltud %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xc9,0x01]
   8197 ; CHECK-NEXT:    vpcmpleud %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xd1,0x02]
   8198 ; CHECK-NEXT:    vpcmpneqd %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0x7d,0x28,0x1f,0xd9,0x04]
   8199 ; CHECK-NEXT:    vpcmpnltud %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe1,0x05]
   8200 ; CHECK-NEXT:    vpcmpnleud %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0x7d,0x28,0x1e,0xe9,0x06]
   8201 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8202 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8203 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8204 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8205 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8206 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8207 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8208 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8209 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8210 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8211 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8212 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8213 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8214 ; CHECK-NEXT:    movl $255, %eax # encoding: [0xb8,0xff,0x00,0x00,0x00]
   8215 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8216 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8217 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8218   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 -1)
   8219   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8220   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 -1)
   8221   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8222   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 -1)
   8223   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8224   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 -1)
   8225   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8226   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 -1)
   8227   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8228   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 -1)
   8229   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8230   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 -1)
   8231   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8232   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 -1)
   8233   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8234   ret <8 x i8> %vec7
   8235 }
   8236 
   8237 define <8 x i8> @test_mask_ucmp_d_256(<8 x i32> %a0, <8 x i32> %a1, i8 %mask) {
   8238 ; X86-LABEL: test_mask_ucmp_d_256:
   8239 ; X86:       # %bb.0:
   8240 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb7,0x44,0x24,0x04]
   8241 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   8242 ; X86-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
   8243 ; X86-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
   8244 ; X86-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
   8245 ; X86-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
   8246 ; X86-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
   8247 ; X86-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
   8248 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
   8249 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8250 ; X86-NEXT:    vpinsrw $0, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x00]
   8251 ; X86-NEXT:    kmovw %k2, %ecx # encoding: [0xc5,0xf8,0x93,0xca]
   8252 ; X86-NEXT:    vpinsrw $1, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x01]
   8253 ; X86-NEXT:    kmovw %k3, %ecx # encoding: [0xc5,0xf8,0x93,0xcb]
   8254 ; X86-NEXT:    vpinsrw $2, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x02]
   8255 ; X86-NEXT:    kmovw %k4, %ecx # encoding: [0xc5,0xf8,0x93,0xcc]
   8256 ; X86-NEXT:    vpinsrw $4, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x04]
   8257 ; X86-NEXT:    kmovw %k5, %ecx # encoding: [0xc5,0xf8,0x93,0xcd]
   8258 ; X86-NEXT:    vpinsrw $5, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x05]
   8259 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   8260 ; X86-NEXT:    vpinsrw $6, %ecx, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc1,0x06]
   8261 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8262 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8263 ; X86-NEXT:    retl # encoding: [0xc3]
   8264 ;
   8265 ; X64-LABEL: test_mask_ucmp_d_256:
   8266 ; X64:       # %bb.0:
   8267 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   8268 ; X64-NEXT:    vpcmpeqd %ymm1, %ymm0, %k0 {%k1} # encoding: [0x62,0xf1,0x7d,0x29,0x76,0xc1]
   8269 ; X64-NEXT:    vpcmpltud %ymm1, %ymm0, %k2 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd1,0x01]
   8270 ; X64-NEXT:    vpcmpleud %ymm1, %ymm0, %k3 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xd9,0x02]
   8271 ; X64-NEXT:    vpcmpneqd %ymm1, %ymm0, %k4 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1f,0xe1,0x04]
   8272 ; X64-NEXT:    vpcmpnltud %ymm1, %ymm0, %k5 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xe9,0x05]
   8273 ; X64-NEXT:    vpcmpnleud %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x1e,0xc9,0x06]
   8274 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8275 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8276 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8277 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8278 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8279 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8280 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8281 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8282 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8283 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8284 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8285 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8286 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8287 ; X64-NEXT:    vpinsrw $7, %edi, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc7,0x07]
   8288 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8289 ; X64-NEXT:    retq # encoding: [0xc3]
   8290   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 0, i8 %mask)
   8291   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8292   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 1, i8 %mask)
   8293   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8294   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 2, i8 %mask)
   8295   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8296   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 3, i8 %mask)
   8297   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8298   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 4, i8 %mask)
   8299   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8300   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 5, i8 %mask)
   8301   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8302   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 6, i8 %mask)
   8303   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8304   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32> %a0, <8 x i32> %a1, i32 7, i8 %mask)
   8305   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8306   ret <8 x i8> %vec7
   8307 }
   8308 
   8309 declare i8 @llvm.x86.avx512.mask.ucmp.d.256(<8 x i32>, <8 x i32>, i32, i8) nounwind readnone
   8310 
   8311 define <8 x i8> @test_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
   8312 ; CHECK-LABEL: test_cmp_q_256:
   8313 ; CHECK:       # %bb.0:
   8314 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
   8315 ; CHECK-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 # encoding: [0x62,0xf2,0xf5,0x28,0x37,0xc8]
   8316 ; CHECK-NEXT:    vpcmpleq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd1,0x02]
   8317 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
   8318 ; CHECK-NEXT:    vpcmpnltq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xe1,0x05]
   8319 ; CHECK-NEXT:    vpcmpgtq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf2,0xfd,0x28,0x37,0xe9]
   8320 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8321 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8322 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8323 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8324 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8325 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8326 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8327 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8328 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8329 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8330 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8331 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8332 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8333 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
   8334 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8335 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8336 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8337   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
   8338   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8339   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
   8340   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8341   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
   8342   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8343   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
   8344   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8345   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
   8346   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8347   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
   8348   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8349   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
   8350   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8351   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
   8352   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8353   ret <8 x i8> %vec7
   8354 }
   8355 
   8356 define <8 x i8> @test_mask_cmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
   8357 ; X86-LABEL: test_mask_cmp_q_256:
   8358 ; X86:       # %bb.0:
   8359 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8360 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8361 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
   8362 ; X86-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
   8363 ; X86-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
   8364 ; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
   8365 ; X86-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
   8366 ; X86-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
   8367 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8368 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8369 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8370 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8371 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8372 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8373 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8374 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8375 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8376 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8377 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8378 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8379 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8380 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8381 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8382 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8383 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8384 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8385 ; X86-NEXT:    retl # encoding: [0xc3]
   8386 ;
   8387 ; X64-LABEL: test_mask_cmp_q_256:
   8388 ; X64:       # %bb.0:
   8389 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   8390 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
   8391 ; X64-NEXT:    vpcmpgtq %ymm0, %ymm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x2a,0x37,0xc8]
   8392 ; X64-NEXT:    vpcmpleq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xd9,0x02]
   8393 ; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
   8394 ; X64-NEXT:    vpcmpnltq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe9,0x05]
   8395 ; X64-NEXT:    vpcmpgtq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x37,0xf1]
   8396 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8397 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8398 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8399 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8400 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8401 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8402 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8403 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8404 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8405 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8406 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8407 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8408 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8409 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8410 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8411 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8412 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8413 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8414 ; X64-NEXT:    retq # encoding: [0xc3]
   8415   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
   8416   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8417   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
   8418   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8419   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
   8420   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8421   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
   8422   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8423   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
   8424   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8425   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
   8426   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8427   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
   8428   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8429   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
   8430   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8431   ret <8 x i8> %vec7
   8432 }
   8433 
   8434 declare i8 @llvm.x86.avx512.mask.cmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
   8435 
   8436 define <8 x i8> @test_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1) {
   8437 ; CHECK-LABEL: test_ucmp_q_256:
   8438 ; CHECK:       # %bb.0:
   8439 ; CHECK-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x29,0xc1]
   8440 ; CHECK-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xc9,0x01]
   8441 ; CHECK-NEXT:    vpcmpleuq %ymm1, %ymm0, %k2 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xd1,0x02]
   8442 ; CHECK-NEXT:    vpcmpneqq %ymm1, %ymm0, %k3 # encoding: [0x62,0xf3,0xfd,0x28,0x1f,0xd9,0x04]
   8443 ; CHECK-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k4 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe1,0x05]
   8444 ; CHECK-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k5 # encoding: [0x62,0xf3,0xfd,0x28,0x1e,0xe9,0x06]
   8445 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8446 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8447 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8448 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8449 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8450 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8451 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8452 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8453 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8454 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8455 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8456 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8457 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8458 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
   8459 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8460 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8461 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8462   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 -1)
   8463   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8464   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 -1)
   8465   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8466   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 -1)
   8467   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8468   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 -1)
   8469   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8470   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 -1)
   8471   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8472   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 -1)
   8473   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8474   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 -1)
   8475   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8476   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 -1)
   8477   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8478   ret <8 x i8> %vec7
   8479 }
   8480 
   8481 define <8 x i8> @test_mask_ucmp_q_256(<4 x i64> %a0, <4 x i64> %a1, i8 %mask) {
   8482 ; X86-LABEL: test_mask_ucmp_q_256:
   8483 ; X86:       # %bb.0:
   8484 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8485 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8486 ; X86-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
   8487 ; X86-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
   8488 ; X86-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
   8489 ; X86-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
   8490 ; X86-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
   8491 ; X86-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
   8492 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8493 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8494 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8495 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8496 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8497 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8498 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8499 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8500 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8501 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8502 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8503 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8504 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8505 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8506 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8507 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8508 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8509 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8510 ; X86-NEXT:    retl # encoding: [0xc3]
   8511 ;
   8512 ; X64-LABEL: test_mask_ucmp_q_256:
   8513 ; X64:       # %bb.0:
   8514 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   8515 ; X64-NEXT:    vpcmpeqq %ymm1, %ymm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x2a,0x29,0xc1]
   8516 ; X64-NEXT:    vpcmpltuq %ymm1, %ymm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xc9,0x01]
   8517 ; X64-NEXT:    vpcmpleuq %ymm1, %ymm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xd9,0x02]
   8518 ; X64-NEXT:    vpcmpneqq %ymm1, %ymm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1f,0xe1,0x04]
   8519 ; X64-NEXT:    vpcmpnltuq %ymm1, %ymm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xe9,0x05]
   8520 ; X64-NEXT:    vpcmpnleuq %ymm1, %ymm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x2a,0x1e,0xf1,0x06]
   8521 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8522 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8523 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8524 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8525 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8526 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8527 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8528 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8529 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8530 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8531 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8532 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8533 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8534 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8535 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8536 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8537 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8538 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   8539 ; X64-NEXT:    retq # encoding: [0xc3]
   8540   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 0, i8 %mask)
   8541   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8542   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 1, i8 %mask)
   8543   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8544   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 2, i8 %mask)
   8545   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8546   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 3, i8 %mask)
   8547   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8548   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 4, i8 %mask)
   8549   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8550   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 5, i8 %mask)
   8551   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8552   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 6, i8 %mask)
   8553   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8554   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64> %a0, <4 x i64> %a1, i32 7, i8 %mask)
   8555   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8556   ret <8 x i8> %vec7
   8557 }
   8558 
   8559 declare i8 @llvm.x86.avx512.mask.ucmp.q.256(<4 x i64>, <4 x i64>, i32, i8) nounwind readnone
   8560 
   8561 define <8 x i8> @test_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   8562 ; CHECK-LABEL: test_cmp_d_128:
   8563 ; CHECK:       # %bb.0:
   8564 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
   8565 ; CHECK-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 # encoding: [0x62,0xf1,0x75,0x08,0x66,0xc8]
   8566 ; CHECK-NEXT:    vpcmpled %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd1,0x02]
   8567 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
   8568 ; CHECK-NEXT:    vpcmpnltd %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xe1,0x05]
   8569 ; CHECK-NEXT:    vpcmpgtd %xmm1, %xmm0, %k5 # encoding: [0x62,0xf1,0x7d,0x08,0x66,0xe9]
   8570 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8571 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8572 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8573 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8574 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8575 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8576 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8577 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8578 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8579 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8580 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8581 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8582 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8583 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
   8584 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8585 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8586   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
   8587   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8588   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
   8589   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8590   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
   8591   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8592   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
   8593   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8594   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
   8595   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8596   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
   8597   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8598   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
   8599   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8600   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
   8601   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8602   ret <8 x i8> %vec7
   8603 }
   8604 
   8605 define <8 x i8> @test_mask_cmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
   8606 ; X86-LABEL: test_mask_cmp_d_128:
   8607 ; X86:       # %bb.0:
   8608 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8609 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8610 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
   8611 ; X86-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
   8612 ; X86-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
   8613 ; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
   8614 ; X86-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
   8615 ; X86-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
   8616 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8617 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8618 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8619 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8620 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8621 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8622 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8623 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8624 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8625 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8626 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8627 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8628 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8629 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8630 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8631 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8632 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8633 ; X86-NEXT:    retl # encoding: [0xc3]
   8634 ;
   8635 ; X64-LABEL: test_mask_cmp_d_128:
   8636 ; X64:       # %bb.0:
   8637 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   8638 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
   8639 ; X64-NEXT:    vpcmpgtd %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf1,0x75,0x0a,0x66,0xc8]
   8640 ; X64-NEXT:    vpcmpled %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xd9,0x02]
   8641 ; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
   8642 ; X64-NEXT:    vpcmpnltd %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe9,0x05]
   8643 ; X64-NEXT:    vpcmpgtd %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x66,0xf1]
   8644 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8645 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8646 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8647 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8648 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8649 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8650 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8651 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8652 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8653 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8654 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8655 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8656 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8657 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8658 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8659 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8660 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8661 ; X64-NEXT:    retq # encoding: [0xc3]
   8662   %res0 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   8663   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8664   %res1 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
   8665   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8666   %res2 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
   8667   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8668   %res3 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
   8669   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8670   %res4 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
   8671   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8672   %res5 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
   8673   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8674   %res6 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
   8675   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8676   %res7 = call i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
   8677   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8678   ret <8 x i8> %vec7
   8679 }
   8680 
   8681 declare i8 @llvm.x86.avx512.mask.cmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
   8682 
   8683 define <8 x i8> @test_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1) {
   8684 ; CHECK-LABEL: test_ucmp_d_128:
   8685 ; CHECK:       # %bb.0:
   8686 ; CHECK-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7d,0x08,0x76,0xc1]
   8687 ; CHECK-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xc9,0x01]
   8688 ; CHECK-NEXT:    vpcmpleud %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xd1,0x02]
   8689 ; CHECK-NEXT:    vpcmpneqd %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0x7d,0x08,0x1f,0xd9,0x04]
   8690 ; CHECK-NEXT:    vpcmpnltud %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe1,0x05]
   8691 ; CHECK-NEXT:    vpcmpnleud %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0x7d,0x08,0x1e,0xe9,0x06]
   8692 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8693 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8694 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8695 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8696 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8697 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8698 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8699 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8700 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8701 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8702 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8703 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8704 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8705 ; CHECK-NEXT:    movl $15, %eax # encoding: [0xb8,0x0f,0x00,0x00,0x00]
   8706 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8707 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8708   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 -1)
   8709   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8710   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 -1)
   8711   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8712   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 -1)
   8713   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8714   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 -1)
   8715   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8716   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 -1)
   8717   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8718   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 -1)
   8719   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8720   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 -1)
   8721   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8722   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 -1)
   8723   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8724   ret <8 x i8> %vec7
   8725 }
   8726 
   8727 define <8 x i8> @test_mask_ucmp_d_128(<4 x i32> %a0, <4 x i32> %a1, i8 %mask) {
   8728 ; X86-LABEL: test_mask_ucmp_d_128:
   8729 ; X86:       # %bb.0:
   8730 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8731 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8732 ; X86-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
   8733 ; X86-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
   8734 ; X86-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
   8735 ; X86-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
   8736 ; X86-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
   8737 ; X86-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
   8738 ; X86-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8739 ; X86-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8740 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8741 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8742 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8743 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8744 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8745 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8746 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8747 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8748 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8749 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8750 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8751 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8752 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8753 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8754 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8755 ; X86-NEXT:    retl # encoding: [0xc3]
   8756 ;
   8757 ; X64-LABEL: test_mask_ucmp_d_128:
   8758 ; X64:       # %bb.0:
   8759 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   8760 ; X64-NEXT:    vpcmpeqd %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf1,0x7d,0x0a,0x76,0xc1]
   8761 ; X64-NEXT:    vpcmpltud %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xc9,0x01]
   8762 ; X64-NEXT:    vpcmpleud %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xd9,0x02]
   8763 ; X64-NEXT:    vpcmpneqd %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1f,0xe1,0x04]
   8764 ; X64-NEXT:    vpcmpnltud %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xe9,0x05]
   8765 ; X64-NEXT:    vpcmpnleud %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0x7d,0x0a,0x1e,0xf1,0x06]
   8766 ; X64-NEXT:    kshiftlw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0c]
   8767 ; X64-NEXT:    kshiftrw $12, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0c]
   8768 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8769 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8770 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8771 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8772 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8773 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8774 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8775 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8776 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8777 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8778 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8779 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8780 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8781 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8782 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8783 ; X64-NEXT:    retq # encoding: [0xc3]
   8784   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 0, i8 %mask)
   8785   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8786   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 1, i8 %mask)
   8787   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8788   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 2, i8 %mask)
   8789   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8790   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 3, i8 %mask)
   8791   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8792   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 4, i8 %mask)
   8793   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8794   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 5, i8 %mask)
   8795   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8796   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 6, i8 %mask)
   8797   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8798   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32> %a0, <4 x i32> %a1, i32 7, i8 %mask)
   8799   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8800   ret <8 x i8> %vec7
   8801 }
   8802 
   8803 declare i8 @llvm.x86.avx512.mask.ucmp.d.128(<4 x i32>, <4 x i32>, i32, i8) nounwind readnone
   8804 
   8805 define <8 x i8> @test_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
   8806 ; CHECK-LABEL: test_cmp_q_128:
   8807 ; CHECK:       # %bb.0:
   8808 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
   8809 ; CHECK-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 # encoding: [0x62,0xf2,0xf5,0x08,0x37,0xc8]
   8810 ; CHECK-NEXT:    vpcmpleq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd1,0x02]
   8811 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
   8812 ; CHECK-NEXT:    vpcmpnltq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xe1,0x05]
   8813 ; CHECK-NEXT:    vpcmpgtq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf2,0xfd,0x08,0x37,0xe9]
   8814 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8815 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8816 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8817 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8818 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8819 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8820 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8821 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8822 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8823 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8824 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8825 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8826 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8827 ; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
   8828 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8829 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8830   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
   8831   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8832   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
   8833   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8834   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
   8835   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8836   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
   8837   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8838   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
   8839   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8840   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
   8841   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8842   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
   8843   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8844   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
   8845   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8846   ret <8 x i8> %vec7
   8847 }
   8848 
   8849 define <8 x i8> @test_mask_cmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
   8850 ; X86-LABEL: test_mask_cmp_q_128:
   8851 ; X86:       # %bb.0:
   8852 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8853 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8854 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
   8855 ; X86-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
   8856 ; X86-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
   8857 ; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
   8858 ; X86-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
   8859 ; X86-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
   8860 ; X86-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
   8861 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
   8862 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8863 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8864 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8865 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8866 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8867 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8868 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8869 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8870 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8871 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8872 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8873 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8874 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8875 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8876 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8877 ; X86-NEXT:    retl # encoding: [0xc3]
   8878 ;
   8879 ; X64-LABEL: test_mask_cmp_q_128:
   8880 ; X64:       # %bb.0:
   8881 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   8882 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
   8883 ; X64-NEXT:    vpcmpgtq %xmm0, %xmm1, %k1 {%k2} # encoding: [0x62,0xf2,0xf5,0x0a,0x37,0xc8]
   8884 ; X64-NEXT:    vpcmpleq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xd9,0x02]
   8885 ; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
   8886 ; X64-NEXT:    vpcmpnltq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe9,0x05]
   8887 ; X64-NEXT:    vpcmpgtq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x37,0xf1]
   8888 ; X64-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
   8889 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
   8890 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8891 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8892 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8893 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8894 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8895 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8896 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8897 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8898 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8899 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8900 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8901 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8902 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8903 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8904 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8905 ; X64-NEXT:    retq # encoding: [0xc3]
   8906   %res0 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
   8907   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8908   %res1 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
   8909   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8910   %res2 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
   8911   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8912   %res3 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
   8913   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8914   %res4 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
   8915   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8916   %res5 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
   8917   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8918   %res6 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
   8919   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8920   %res7 = call i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
   8921   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8922   ret <8 x i8> %vec7
   8923 }
   8924 
   8925 declare i8 @llvm.x86.avx512.mask.cmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
   8926 
   8927 define <8 x i8> @test_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1) {
   8928 ; CHECK-LABEL: test_ucmp_q_128:
   8929 ; CHECK:       # %bb.0:
   8930 ; CHECK-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x29,0xc1]
   8931 ; CHECK-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xc9,0x01]
   8932 ; CHECK-NEXT:    vpcmpleuq %xmm1, %xmm0, %k2 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xd1,0x02]
   8933 ; CHECK-NEXT:    vpcmpneqq %xmm1, %xmm0, %k3 # encoding: [0x62,0xf3,0xfd,0x08,0x1f,0xd9,0x04]
   8934 ; CHECK-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k4 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe1,0x05]
   8935 ; CHECK-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k5 # encoding: [0x62,0xf3,0xfd,0x08,0x1e,0xe9,0x06]
   8936 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8937 ; CHECK-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8938 ; CHECK-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8939 ; CHECK-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8940 ; CHECK-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8941 ; CHECK-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8942 ; CHECK-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8943 ; CHECK-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8944 ; CHECK-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8945 ; CHECK-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8946 ; CHECK-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8947 ; CHECK-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8948 ; CHECK-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8949 ; CHECK-NEXT:    movl $3, %eax # encoding: [0xb8,0x03,0x00,0x00,0x00]
   8950 ; CHECK-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8951 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   8952   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 -1)
   8953   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   8954   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 -1)
   8955   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   8956   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 -1)
   8957   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   8958   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 -1)
   8959   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   8960   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 -1)
   8961   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   8962   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 -1)
   8963   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   8964   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 -1)
   8965   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   8966   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 -1)
   8967   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   8968   ret <8 x i8> %vec7
   8969 }
   8970 
   8971 define <8 x i8> @test_mask_ucmp_q_128(<2 x i64> %a0, <2 x i64> %a1, i8 %mask) {
   8972 ; X86-LABEL: test_mask_ucmp_q_128:
   8973 ; X86:       # %bb.0:
   8974 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   8975 ; X86-NEXT:    kmovw %eax, %k2 # encoding: [0xc5,0xf8,0x92,0xd0]
   8976 ; X86-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
   8977 ; X86-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
   8978 ; X86-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
   8979 ; X86-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
   8980 ; X86-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
   8981 ; X86-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
   8982 ; X86-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
   8983 ; X86-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
   8984 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   8985 ; X86-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   8986 ; X86-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   8987 ; X86-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   8988 ; X86-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   8989 ; X86-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   8990 ; X86-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   8991 ; X86-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   8992 ; X86-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   8993 ; X86-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   8994 ; X86-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   8995 ; X86-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   8996 ; X86-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   8997 ; X86-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   8998 ; X86-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   8999 ; X86-NEXT:    retl # encoding: [0xc3]
   9000 ;
   9001 ; X64-LABEL: test_mask_ucmp_q_128:
   9002 ; X64:       # %bb.0:
   9003 ; X64-NEXT:    kmovw %edi, %k2 # encoding: [0xc5,0xf8,0x92,0xd7]
   9004 ; X64-NEXT:    vpcmpeqq %xmm1, %xmm0, %k0 {%k2} # encoding: [0x62,0xf2,0xfd,0x0a,0x29,0xc1]
   9005 ; X64-NEXT:    vpcmpltuq %xmm1, %xmm0, %k1 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xc9,0x01]
   9006 ; X64-NEXT:    vpcmpleuq %xmm1, %xmm0, %k3 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xd9,0x02]
   9007 ; X64-NEXT:    vpcmpneqq %xmm1, %xmm0, %k4 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1f,0xe1,0x04]
   9008 ; X64-NEXT:    vpcmpnltuq %xmm1, %xmm0, %k5 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xe9,0x05]
   9009 ; X64-NEXT:    vpcmpnleuq %xmm1, %xmm0, %k6 {%k2} # encoding: [0x62,0xf3,0xfd,0x0a,0x1e,0xf1,0x06]
   9010 ; X64-NEXT:    kshiftlw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x32,0xd2,0x0e]
   9011 ; X64-NEXT:    kshiftrw $14, %k2, %k2 # encoding: [0xc4,0xe3,0xf9,0x30,0xd2,0x0e]
   9012 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9013 ; X64-NEXT:    vpxor %xmm0, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xef,0xc0]
   9014 ; X64-NEXT:    vpinsrw $0, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x00]
   9015 ; X64-NEXT:    kmovw %k1, %eax # encoding: [0xc5,0xf8,0x93,0xc1]
   9016 ; X64-NEXT:    vpinsrw $1, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x01]
   9017 ; X64-NEXT:    kmovw %k3, %eax # encoding: [0xc5,0xf8,0x93,0xc3]
   9018 ; X64-NEXT:    vpinsrw $2, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x02]
   9019 ; X64-NEXT:    kmovw %k4, %eax # encoding: [0xc5,0xf8,0x93,0xc4]
   9020 ; X64-NEXT:    vpinsrw $4, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x04]
   9021 ; X64-NEXT:    kmovw %k5, %eax # encoding: [0xc5,0xf8,0x93,0xc5]
   9022 ; X64-NEXT:    vpinsrw $5, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x05]
   9023 ; X64-NEXT:    kmovw %k6, %eax # encoding: [0xc5,0xf8,0x93,0xc6]
   9024 ; X64-NEXT:    vpinsrw $6, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x06]
   9025 ; X64-NEXT:    kmovw %k2, %eax # encoding: [0xc5,0xf8,0x93,0xc2]
   9026 ; X64-NEXT:    vpinsrw $7, %eax, %xmm0, %xmm0 # encoding: [0xc5,0xf9,0xc4,0xc0,0x07]
   9027 ; X64-NEXT:    retq # encoding: [0xc3]
   9028   %res0 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 0, i8 %mask)
   9029   %vec0 = insertelement <8 x i8> undef, i8 %res0, i32 0
   9030   %res1 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 1, i8 %mask)
   9031   %vec1 = insertelement <8 x i8> %vec0, i8 %res1, i32 1
   9032   %res2 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 2, i8 %mask)
   9033   %vec2 = insertelement <8 x i8> %vec1, i8 %res2, i32 2
   9034   %res3 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 3, i8 %mask)
   9035   %vec3 = insertelement <8 x i8> %vec2, i8 %res3, i32 3
   9036   %res4 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 4, i8 %mask)
   9037   %vec4 = insertelement <8 x i8> %vec3, i8 %res4, i32 4
   9038   %res5 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 5, i8 %mask)
   9039   %vec5 = insertelement <8 x i8> %vec4, i8 %res5, i32 5
   9040   %res6 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 6, i8 %mask)
   9041   %vec6 = insertelement <8 x i8> %vec5, i8 %res6, i32 6
   9042   %res7 = call i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64> %a0, <2 x i64> %a1, i32 7, i8 %mask)
   9043   %vec7 = insertelement <8 x i8> %vec6, i8 %res7, i32 7
   9044   ret <8 x i8> %vec7
   9045 }
   9046 
   9047 declare i8 @llvm.x86.avx512.mask.ucmp.q.128(<2 x i64>, <2 x i64>, i32, i8) nounwind readnone
   9048 
   9049 declare <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float>, <8 x float>, i8)
   9050 
   9051 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256(<4 x float> %x0, <8 x float> %x2, i8 %mask) {
   9052 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
   9053 ; X86:       # %bb.0:
   9054 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   9055 ; X86-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   9056 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9057 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9058 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
   9059 ; X86-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
   9060 ; X86-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
   9061 ; X86-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
   9062 ; X86-NEXT:    retl # encoding: [0xc3]
   9063 ;
   9064 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256:
   9065 ; X64:       # %bb.0:
   9066 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   9067 ; X64-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x18,0xd0,0x01]
   9068 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9069 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x18,0xc8,0x01]
   9070 ; X64-NEXT:    vaddps %ymm1, %ymm2, %ymm1 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc9]
   9071 ; X64-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x18,0xc0,0x01]
   9072 ; X64-NEXT:    vaddps %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc1]
   9073 ; X64-NEXT:    retq # encoding: [0xc3]
   9074   %res1 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 -1)
   9075   %res2 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
   9076   %res3 = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> zeroinitializer, i8 %mask)
   9077   %res4 = fadd <8 x float> %res1, %res2
   9078   %res5 = fadd <8 x float> %res3, %res4
   9079   ret <8 x float> %res5
   9080 }
   9081 
   9082 define <8 x float>@test_int_x86_avx512_mask_broadcastf32x4_256_load(<4 x float>* %x0ptr, <8 x float> %x2, i8 %mask) {
   9083 ; X86-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
   9084 ; X86:       # %bb.0:
   9085 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9086 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9087 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9088 ; X86-NEXT:    vbroadcastf32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x00]
   9089 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   9090 ; X86-NEXT:    retl # encoding: [0xc3]
   9091 ;
   9092 ; X64-LABEL: test_int_x86_avx512_mask_broadcastf32x4_256_load:
   9093 ; X64:       # %bb.0:
   9094 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9095 ; X64-NEXT:    vbroadcastf32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1a,0x07]
   9096 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   9097 ; X64-NEXT:    retq # encoding: [0xc3]
   9098   %x0 = load <4 x float>, <4 x float>* %x0ptr
   9099   %res = call <8 x float> @llvm.x86.avx512.mask.broadcastf32x4.256(<4 x float> %x0, <8 x float> %x2, i8 %mask)
   9100   ret <8 x float> %res
   9101 }
   9102 
   9103 declare <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32>, <8 x i32>, i8)
   9104 
   9105 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask) {
   9106 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
   9107 ; X86:       # %bb.0:
   9108 ; X86-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   9109 ; X86-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   9110 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9111 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9112 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
   9113 ; X86-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
   9114 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   9115 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   9116 ; X86-NEXT:    retl # encoding: [0xc3]
   9117 ;
   9118 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256:
   9119 ; X64:       # %bb.0:
   9120 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
   9121 ; X64-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0x7d,0x38,0xd0,0x01]
   9122 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9123 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf3,0x7d,0x29,0x38,0xc8,0x01]
   9124 ; X64-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xa9,0x38,0xc0,0x01]
   9125 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   9126 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   9127 ; X64-NEXT:    retq # encoding: [0xc3]
   9128   %res1 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 -1)
   9129   %res2 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
   9130   %res3 = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> zeroinitializer, i8 %mask)
   9131   %res4 = add <8 x i32> %res1, %res2
   9132   %res5 = add <8 x i32> %res3, %res4
   9133   ret <8 x i32> %res5
   9134 }
   9135 
   9136 define <8 x i32>@test_int_x86_avx512_mask_broadcasti32x4_256_load(<4 x i32>* %x0ptr, <8 x i32> %x2, i8 %mask) {
   9137 ; X86-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
   9138 ; X86:       # %bb.0:
   9139 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9140 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9141 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9142 ; X86-NEXT:    vbroadcasti32x4 (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x00]
   9143 ; X86-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   9144 ; X86-NEXT:    retl # encoding: [0xc3]
   9145 ;
   9146 ; X64-LABEL: test_int_x86_avx512_mask_broadcasti32x4_256_load:
   9147 ; X64:       # %bb.0:
   9148 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9149 ; X64-NEXT:    vbroadcasti32x4 (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x5a,0x07]
   9150 ; X64-NEXT:    # ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   9151 ; X64-NEXT:    retq # encoding: [0xc3]
   9152   %x0 = load <4 x i32>, <4 x i32>* %x0ptr
   9153   %res = call <8 x i32> @llvm.x86.avx512.mask.broadcasti32x4.256(<4 x i32> %x0, <8 x i32> %x2, i8 %mask)
   9154   ret <8 x i32> %res
   9155 }
   9156 
   9157 declare <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64>, <2 x i64>, i8)
   9158 
   9159 define <2 x i64>@test_int_x86_avx512_mask_pabs_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
   9160 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_128:
   9161 ; X86:       # %bb.0:
   9162 ; X86-NEXT:    vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
   9163 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9164 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9165 ; X86-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
   9166 ; X86-NEXT:    vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
   9167 ; X86-NEXT:    retl # encoding: [0xc3]
   9168 ;
   9169 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_128:
   9170 ; X64:       # %bb.0:
   9171 ; X64-NEXT:    vpabsq %xmm0, %xmm2 # encoding: [0x62,0xf2,0xfd,0x08,0x1f,0xd0]
   9172 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9173 ; X64-NEXT:    vpabsq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x1f,0xc8]
   9174 ; X64-NEXT:    vpaddq %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc2]
   9175 ; X64-NEXT:    retq # encoding: [0xc3]
   9176   %res = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
   9177   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pabs.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 -1)
   9178   %res2 = add <2 x i64> %res, %res1
   9179   ret <2 x i64> %res2
   9180 }
   9181 
   9182 declare <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64>, <4 x i64>, i8)
   9183 
   9184 define <4 x i64>@test_int_x86_avx512_mask_pabs_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
   9185 ; X86-LABEL: test_int_x86_avx512_mask_pabs_q_256:
   9186 ; X86:       # %bb.0:
   9187 ; X86-NEXT:    vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
   9188 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9189 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9190 ; X86-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
   9191 ; X86-NEXT:    vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
   9192 ; X86-NEXT:    retl # encoding: [0xc3]
   9193 ;
   9194 ; X64-LABEL: test_int_x86_avx512_mask_pabs_q_256:
   9195 ; X64:       # %bb.0:
   9196 ; X64-NEXT:    vpabsq %ymm0, %ymm2 # encoding: [0x62,0xf2,0xfd,0x28,0x1f,0xd0]
   9197 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9198 ; X64-NEXT:    vpabsq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x1f,0xc8]
   9199 ; X64-NEXT:    vpaddq %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc2]
   9200 ; X64-NEXT:    retq # encoding: [0xc3]
   9201   %res = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
   9202   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pabs.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 -1)
   9203   %res2 = add <4 x i64> %res, %res1
   9204   ret <4 x i64> %res2
   9205 }
   9206 
   9207 declare <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32>, <4 x i32>, i8)
   9208 
   9209 define <4 x i32>@test_int_x86_avx512_mask_pabs_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
   9210 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_128:
   9211 ; X86:       # %bb.0:
   9212 ; X86-NEXT:    vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
   9213 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9214 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9215 ; X86-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
   9216 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   9217 ; X86-NEXT:    retl # encoding: [0xc3]
   9218 ;
   9219 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_128:
   9220 ; X64:       # %bb.0:
   9221 ; X64-NEXT:    vpabsd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x1e,0xd0]
   9222 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9223 ; X64-NEXT:    vpabsd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x1e,0xc8]
   9224 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   9225 ; X64-NEXT:    retq # encoding: [0xc3]
   9226   %res = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
   9227   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pabs.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 -1)
   9228   %res2 = add <4 x i32> %res, %res1
   9229   ret <4 x i32> %res2
   9230 }
   9231 
   9232 declare <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32>, <8 x i32>, i8)
   9233 
   9234 define <8 x i32>@test_int_x86_avx512_mask_pabs_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
   9235 ; X86-LABEL: test_int_x86_avx512_mask_pabs_d_256:
   9236 ; X86:       # %bb.0:
   9237 ; X86-NEXT:    vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
   9238 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9239 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9240 ; X86-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
   9241 ; X86-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
   9242 ; X86-NEXT:    retl # encoding: [0xc3]
   9243 ;
   9244 ; X64-LABEL: test_int_x86_avx512_mask_pabs_d_256:
   9245 ; X64:       # %bb.0:
   9246 ; X64-NEXT:    vpabsd %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x1e,0xd0]
   9247 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9248 ; X64-NEXT:    vpabsd %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x1e,0xc8]
   9249 ; X64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
   9250 ; X64-NEXT:    retq # encoding: [0xc3]
   9251   %res = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
   9252   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pabs.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 -1)
   9253   %res2 = add <8 x i32> %res, %res1
   9254   ret <8 x i32> %res2
   9255 }
   9256 
   9257 declare i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32>, <4 x i32>,i8)
   9258 
   9259 define i8@test_int_x86_avx512_ptestm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
   9260 ; X86-LABEL: test_int_x86_avx512_ptestm_d_128:
   9261 ; X86:       # %bb.0:
   9262 ; X86-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
   9263 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9264 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9265 ; X86-NEXT:    vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
   9266 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9267 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9268 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9269 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9270 ; X86-NEXT:    retl # encoding: [0xc3]
   9271 ;
   9272 ; X64-LABEL: test_int_x86_avx512_ptestm_d_128:
   9273 ; X64:       # %bb.0:
   9274 ; X64-NEXT:    vptestmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7d,0x08,0x27,0xc1]
   9275 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9276 ; X64-NEXT:    vptestmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x27,0xc9]
   9277 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9278 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9279 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9280 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9281 ; X64-NEXT:    retq # encoding: [0xc3]
   9282   %res = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
   9283   %res1 = call i8 @llvm.x86.avx512.ptestm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
   9284   %res2 = add i8 %res, %res1
   9285   ret i8 %res2
   9286 }
   9287 
   9288 declare i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32>, <8 x i32>, i8)
   9289 
   9290 define i8@test_int_x86_avx512_ptestm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
   9291 ; X86-LABEL: test_int_x86_avx512_ptestm_d_256:
   9292 ; X86:       # %bb.0:
   9293 ; X86-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
   9294 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
   9295 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
   9296 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
   9297 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9298 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9299 ; X86-NEXT:    retl # encoding: [0xc3]
   9300 ;
   9301 ; X64-LABEL: test_int_x86_avx512_ptestm_d_256:
   9302 ; X64:       # %bb.0:
   9303 ; X64-NEXT:    vptestmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7d,0x28,0x27,0xc1]
   9304 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9305 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
   9306 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
   9307 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9308 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9309 ; X64-NEXT:    retq # encoding: [0xc3]
   9310   %res = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
   9311   %res1 = call i8 @llvm.x86.avx512.ptestm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
   9312   %res2 = add i8 %res, %res1
   9313   ret i8 %res2
   9314 }
   9315 
   9316 declare i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64>, <2 x i64>, i8)
   9317 
   9318 define i8@test_int_x86_avx512_ptestm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
   9319 ; X86-LABEL: test_int_x86_avx512_ptestm_q_128:
   9320 ; X86:       # %bb.0:
   9321 ; X86-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
   9322 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9323 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9324 ; X86-NEXT:    vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
   9325 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9326 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9327 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9328 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9329 ; X86-NEXT:    retl # encoding: [0xc3]
   9330 ;
   9331 ; X64-LABEL: test_int_x86_avx512_ptestm_q_128:
   9332 ; X64:       # %bb.0:
   9333 ; X64-NEXT:    vptestmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfd,0x08,0x27,0xc1]
   9334 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9335 ; X64-NEXT:    vptestmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x27,0xc9]
   9336 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9337 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9338 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9339 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9340 ; X64-NEXT:    retq # encoding: [0xc3]
   9341   %res = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
   9342   %res1 = call i8 @llvm.x86.avx512.ptestm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
   9343   %res2 = add i8 %res, %res1
   9344   ret i8 %res2
   9345 }
   9346 
   9347 declare i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64>, <4 x i64>, i8)
   9348 
   9349 define i8@test_int_x86_avx512_ptestm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
   9350 ; X86-LABEL: test_int_x86_avx512_ptestm_q_256:
   9351 ; X86:       # %bb.0:
   9352 ; X86-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
   9353 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9354 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9355 ; X86-NEXT:    vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
   9356 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9357 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9358 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9359 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9360 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9361 ; X86-NEXT:    retl # encoding: [0xc3]
   9362 ;
   9363 ; X64-LABEL: test_int_x86_avx512_ptestm_q_256:
   9364 ; X64:       # %bb.0:
   9365 ; X64-NEXT:    vptestmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfd,0x28,0x27,0xc1]
   9366 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9367 ; X64-NEXT:    vptestmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x27,0xc9]
   9368 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9369 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9370 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9371 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9372 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9373 ; X64-NEXT:    retq # encoding: [0xc3]
   9374   %res = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
   9375   %res1 = call i8 @llvm.x86.avx512.ptestm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
   9376   %res2 = add i8 %res, %res1
   9377   ret i8 %res2
   9378 }
   9379 
   9380 declare i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32>, <4 x i32>, i8 %x2)
   9381 
   9382 define i8@test_int_x86_avx512_ptestnm_d_128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2) {
   9383 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_128:
   9384 ; X86:       # %bb.0:
   9385 ; X86-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
   9386 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9387 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9388 ; X86-NEXT:    vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
   9389 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9390 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9391 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9392 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9393 ; X86-NEXT:    retl # encoding: [0xc3]
   9394 ;
   9395 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_128:
   9396 ; X64:       # %bb.0:
   9397 ; X64-NEXT:    vptestnmd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0x7e,0x08,0x27,0xc1]
   9398 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9399 ; X64-NEXT:    vptestnmd %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0x7e,0x09,0x27,0xc9]
   9400 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9401 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9402 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9403 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9404 ; X64-NEXT:    retq # encoding: [0xc3]
   9405   %res = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8 %x2)
   9406   %res1 = call i8 @llvm.x86.avx512.ptestnm.d.128(<4 x i32> %x0, <4 x i32> %x1, i8-1)
   9407   %res2 = add i8 %res, %res1
   9408   ret i8 %res2
   9409 }
   9410 
   9411 declare i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32>, <8 x i32>, i8 %x2)
   9412 
   9413 define i8@test_int_x86_avx512_ptestnm_d_256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2) {
   9414 ; X86-LABEL: test_int_x86_avx512_ptestnm_d_256:
   9415 ; X86:       # %bb.0:
   9416 ; X86-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
   9417 ; X86-NEXT:    kmovw %k0, %ecx # encoding: [0xc5,0xf8,0x93,0xc8]
   9418 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al # encoding: [0x8a,0x44,0x24,0x04]
   9419 ; X86-NEXT:    andb %cl, %al # encoding: [0x20,0xc8]
   9420 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9421 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9422 ; X86-NEXT:    retl # encoding: [0xc3]
   9423 ;
   9424 ; X64-LABEL: test_int_x86_avx512_ptestnm_d_256:
   9425 ; X64:       # %bb.0:
   9426 ; X64-NEXT:    vptestnmd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0x7e,0x28,0x27,0xc1]
   9427 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9428 ; X64-NEXT:    andb %al, %dil # encoding: [0x40,0x20,0xc7]
   9429 ; X64-NEXT:    addb %dil, %al # encoding: [0x40,0x00,0xf8]
   9430 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9431 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9432 ; X64-NEXT:    retq # encoding: [0xc3]
   9433   %res = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8 %x2)
   9434   %res1 = call i8 @llvm.x86.avx512.ptestnm.d.256(<8 x i32> %x0, <8 x i32> %x1, i8-1)
   9435   %res2 = add i8 %res, %res1
   9436   ret i8 %res2
   9437 }
   9438 
   9439 declare i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64>, <2 x i64>, i8 %x2)
   9440 
   9441 define i8@test_int_x86_avx512_ptestnm_q_128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2) {
   9442 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_128:
   9443 ; X86:       # %bb.0:
   9444 ; X86-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
   9445 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9446 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9447 ; X86-NEXT:    vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
   9448 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9449 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9450 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9451 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9452 ; X86-NEXT:    retl # encoding: [0xc3]
   9453 ;
   9454 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_128:
   9455 ; X64:       # %bb.0:
   9456 ; X64-NEXT:    vptestnmq %xmm1, %xmm0, %k0 # encoding: [0x62,0xf2,0xfe,0x08,0x27,0xc1]
   9457 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9458 ; X64-NEXT:    vptestnmq %xmm1, %xmm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x09,0x27,0xc9]
   9459 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9460 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9461 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9462 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9463 ; X64-NEXT:    retq # encoding: [0xc3]
   9464   %res = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8 %x2)
   9465   %res1 = call i8 @llvm.x86.avx512.ptestnm.q.128(<2 x i64> %x0, <2 x i64> %x1, i8-1)
   9466   %res2 = add i8 %res, %res1
   9467   ret i8 %res2
   9468 }
   9469 
   9470 declare i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64>, <4 x i64>, i8 %x2)
   9471 
   9472 define i8@test_int_x86_avx512_ptestnm_q_256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2) {
   9473 ; X86-LABEL: test_int_x86_avx512_ptestnm_q_256:
   9474 ; X86:       # %bb.0:
   9475 ; X86-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
   9476 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9477 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9478 ; X86-NEXT:    vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
   9479 ; X86-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9480 ; X86-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9481 ; X86-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9482 ; X86-NEXT:    # kill: def $al killed $al killed $eax
   9483 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9484 ; X86-NEXT:    retl # encoding: [0xc3]
   9485 ;
   9486 ; X64-LABEL: test_int_x86_avx512_ptestnm_q_256:
   9487 ; X64:       # %bb.0:
   9488 ; X64-NEXT:    vptestnmq %ymm1, %ymm0, %k0 # encoding: [0x62,0xf2,0xfe,0x28,0x27,0xc1]
   9489 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9490 ; X64-NEXT:    vptestnmq %ymm1, %ymm0, %k1 {%k1} # encoding: [0x62,0xf2,0xfe,0x29,0x27,0xc9]
   9491 ; X64-NEXT:    kmovw %k1, %ecx # encoding: [0xc5,0xf8,0x93,0xc9]
   9492 ; X64-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9493 ; X64-NEXT:    addb %cl, %al # encoding: [0x00,0xc8]
   9494 ; X64-NEXT:    # kill: def $al killed $al killed $eax
   9495 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9496 ; X64-NEXT:    retq # encoding: [0xc3]
   9497   %res = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8 %x2)
   9498   %res1 = call i8 @llvm.x86.avx512.ptestnm.q.256(<4 x i64> %x0, <4 x i64> %x1, i8-1)
   9499   %res2 = add i8 %res, %res1
   9500   ret i8 %res2
   9501 }
   9502 
   9503 define i8 @test_cmpps_256(<8 x float> %a, <8 x float> %b) {
   9504 ; CHECK-LABEL: test_cmpps_256:
   9505 ; CHECK:       # %bb.0:
   9506 ; CHECK-NEXT:    vcmpleps %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0x7c,0x28,0xc2,0xc1,0x02]
   9507 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9508 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   9509 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9510 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9511   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> %a, <8 x float> %b, i32 2, i8 -1)
   9512   ret i8 %res
   9513 }
   9514 declare i8 @llvm.x86.avx512.mask.cmp.ps.256(<8 x float> , <8 x float> , i32, i8)
   9515 
   9516 define i8 @test_cmpps_128(<4 x float> %a, <4 x float> %b) {
   9517 ; CHECK-LABEL: test_cmpps_128:
   9518 ; CHECK:       # %bb.0:
   9519 ; CHECK-NEXT:    vcmpleps %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0x7c,0x08,0xc2,0xc1,0x02]
   9520 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9521 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   9522 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9523   %res = call i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> %a, <4 x float> %b, i32 2, i8 -1)
   9524   ret i8 %res
   9525 }
   9526 declare i8 @llvm.x86.avx512.mask.cmp.ps.128(<4 x float> , <4 x float> , i32, i8)
   9527 
   9528 define i8 @test_cmppd_256(<4 x double> %a, <4 x double> %b) {
   9529 ; CHECK-LABEL: test_cmppd_256:
   9530 ; CHECK:       # %bb.0:
   9531 ; CHECK-NEXT:    vcmplepd %ymm1, %ymm0, %k0 # encoding: [0x62,0xf1,0xfd,0x28,0xc2,0xc1,0x02]
   9532 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9533 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   9534 ; CHECK-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   9535 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9536   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> %a, <4 x double> %b, i32 2, i8 -1)
   9537   ret i8 %res
   9538 }
   9539 declare i8 @llvm.x86.avx512.mask.cmp.pd.256(<4 x double> , <4 x double> , i32, i8)
   9540 
   9541 define i8 @test_cmppd_128(<2 x double> %a, <2 x double> %b) {
   9542 ; CHECK-LABEL: test_cmppd_128:
   9543 ; CHECK:       # %bb.0:
   9544 ; CHECK-NEXT:    vcmplepd %xmm1, %xmm0, %k0 # encoding: [0x62,0xf1,0xfd,0x08,0xc2,0xc1,0x02]
   9545 ; CHECK-NEXT:    kmovw %k0, %eax # encoding: [0xc5,0xf8,0x93,0xc0]
   9546 ; CHECK-NEXT:    # kill: def $al killed $al killed $eax
   9547 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9548   %res = call i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> %a, <2 x double> %b, i32 2, i8 -1)
   9549   ret i8 %res
   9550 }
   9551 declare i8 @llvm.x86.avx512.mask.cmp.pd.128(<2 x double> , <2 x double> , i32, i8)
   9552 
   9553 define < 2 x i64> @test_mask_mul_epi32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
   9554 ; CHECK-LABEL: test_mask_mul_epi32_rr_128:
   9555 ; CHECK:       # %bb.0:
   9556 ; CHECK-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
   9557 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9558   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   9559   ret < 2 x i64> %res
   9560 }
   9561 
   9562 define < 2 x i64> @test_mask_mul_epi32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
   9563 ; X86-LABEL: test_mask_mul_epi32_rrk_128:
   9564 ; X86:       # %bb.0:
   9565 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9566 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9567 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
   9568 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   9569 ; X86-NEXT:    retl # encoding: [0xc3]
   9570 ;
   9571 ; X64-LABEL: test_mask_mul_epi32_rrk_128:
   9572 ; X64:       # %bb.0:
   9573 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9574 ; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xd1]
   9575 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   9576 ; X64-NEXT:    retq # encoding: [0xc3]
   9577   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   9578   ret < 2 x i64> %res
   9579 }
   9580 
   9581 define < 2 x i64> @test_mask_mul_epi32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
   9582 ; X86-LABEL: test_mask_mul_epi32_rrkz_128:
   9583 ; X86:       # %bb.0:
   9584 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9585 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9586 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
   9587 ; X86-NEXT:    retl # encoding: [0xc3]
   9588 ;
   9589 ; X64-LABEL: test_mask_mul_epi32_rrkz_128:
   9590 ; X64:       # %bb.0:
   9591 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9592 ; X64-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
   9593 ; X64-NEXT:    retq # encoding: [0xc3]
   9594   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   9595   ret < 2 x i64> %res
   9596 }
   9597 
   9598 define < 2 x i64> @test_mask_mul_epi32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
   9599 ; X86-LABEL: test_mask_mul_epi32_rm_128:
   9600 ; X86:       # %bb.0:
   9601 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9602 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x00]
   9603 ; X86-NEXT:    retl # encoding: [0xc3]
   9604 ;
   9605 ; X64-LABEL: test_mask_mul_epi32_rm_128:
   9606 ; X64:       # %bb.0:
   9607 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0x07]
   9608 ; X64-NEXT:    retq # encoding: [0xc3]
   9609   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9610   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   9611   ret < 2 x i64> %res
   9612 }
   9613 
   9614 define < 2 x i64> @test_mask_mul_epi32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
   9615 ; X86-LABEL: test_mask_mul_epi32_rmk_128:
   9616 ; X86:       # %bb.0:
   9617 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9618 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9619 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9620 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x08]
   9621 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9622 ; X86-NEXT:    retl # encoding: [0xc3]
   9623 ;
   9624 ; X64-LABEL: test_mask_mul_epi32_rmk_128:
   9625 ; X64:       # %bb.0:
   9626 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9627 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0x0f]
   9628 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9629 ; X64-NEXT:    retq # encoding: [0xc3]
   9630   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9631   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   9632   ret < 2 x i64> %res
   9633 }
   9634 
   9635 define < 2 x i64> @test_mask_mul_epi32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
   9636 ; X86-LABEL: test_mask_mul_epi32_rmkz_128:
   9637 ; X86:       # %bb.0:
   9638 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9639 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9640 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9641 ; X86-NEXT:    vpmuldq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x00]
   9642 ; X86-NEXT:    retl # encoding: [0xc3]
   9643 ;
   9644 ; X64-LABEL: test_mask_mul_epi32_rmkz_128:
   9645 ; X64:       # %bb.0:
   9646 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9647 ; X64-NEXT:    vpmuldq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0x07]
   9648 ; X64-NEXT:    retq # encoding: [0xc3]
   9649   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9650   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   9651   ret < 2 x i64> %res
   9652 }
   9653 
   9654 define < 2 x i64> @test_mask_mul_epi32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
   9655 ; X86-LABEL: test_mask_mul_epi32_rmb_128:
   9656 ; X86:       # %bb.0:
   9657 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9658 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   9659 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x28,0xc1]
   9660 ; X86-NEXT:    retl # encoding: [0xc3]
   9661 ;
   9662 ; X64-LABEL: test_mask_mul_epi32_rmb_128:
   9663 ; X64:       # %bb.0:
   9664 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf2,0xfd,0x18,0x28,0x07]
   9665 ; X64-NEXT:    retq # encoding: [0xc3]
   9666   %q = load i64, i64* %ptr_b
   9667   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   9668   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
   9669   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   9670   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   9671   ret < 2 x i64> %res
   9672 }
   9673 
   9674 define < 2 x i64> @test_mask_mul_epi32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
   9675 ; X86-LABEL: test_mask_mul_epi32_rmbk_128:
   9676 ; X86:       # %bb.0:
   9677 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9678 ; X86-NEXT:    vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
   9679 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   9680 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9681 ; X86-NEXT:    vpmuldq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x28,0xca]
   9682 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9683 ; X86-NEXT:    retl # encoding: [0xc3]
   9684 ;
   9685 ; X64-LABEL: test_mask_mul_epi32_rmbk_128:
   9686 ; X64:       # %bb.0:
   9687 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9688 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x19,0x28,0x0f]
   9689 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9690 ; X64-NEXT:    retq # encoding: [0xc3]
   9691   %q = load i64, i64* %ptr_b
   9692   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   9693   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
   9694   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   9695   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   9696   ret < 2 x i64> %res
   9697 }
   9698 
   9699 define < 2 x i64> @test_mask_mul_epi32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
   9700 ; X86-LABEL: test_mask_mul_epi32_rmbkz_128:
   9701 ; X86:       # %bb.0:
   9702 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9703 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   9704 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   9705 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9706 ; X86-NEXT:    vpmuldq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x28,0xc1]
   9707 ; X86-NEXT:    retl # encoding: [0xc3]
   9708 ;
   9709 ; X64-LABEL: test_mask_mul_epi32_rmbkz_128:
   9710 ; X64:       # %bb.0:
   9711 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9712 ; X64-NEXT:    vpmuldq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x99,0x28,0x07]
   9713 ; X64-NEXT:    retq # encoding: [0xc3]
   9714   %q = load i64, i64* %ptr_b
   9715   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   9716   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
   9717   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   9718   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   9719   ret < 2 x i64> %res
   9720 }
   9721 
   9722 declare < 2 x i64> @llvm.x86.avx512.mask.pmul.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
   9723 
   9724 define < 4 x i64> @test_mask_mul_epi32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
   9725 ; CHECK-LABEL: test_mask_mul_epi32_rr_256:
   9726 ; CHECK:       # %bb.0:
   9727 ; CHECK-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
   9728 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9729   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   9730   ret < 4 x i64> %res
   9731 }
   9732 
   9733 define < 4 x i64> @test_mask_mul_epi32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
   9734 ; X86-LABEL: test_mask_mul_epi32_rrk_256:
   9735 ; X86:       # %bb.0:
   9736 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9737 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9738 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
   9739 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   9740 ; X86-NEXT:    retl # encoding: [0xc3]
   9741 ;
   9742 ; X64-LABEL: test_mask_mul_epi32_rrk_256:
   9743 ; X64:       # %bb.0:
   9744 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9745 ; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xd1]
   9746 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   9747 ; X64-NEXT:    retq # encoding: [0xc3]
   9748   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   9749   ret < 4 x i64> %res
   9750 }
   9751 
   9752 define < 4 x i64> @test_mask_mul_epi32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
   9753 ; X86-LABEL: test_mask_mul_epi32_rrkz_256:
   9754 ; X86:       # %bb.0:
   9755 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9756 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9757 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
   9758 ; X86-NEXT:    retl # encoding: [0xc3]
   9759 ;
   9760 ; X64-LABEL: test_mask_mul_epi32_rrkz_256:
   9761 ; X64:       # %bb.0:
   9762 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9763 ; X64-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
   9764 ; X64-NEXT:    retq # encoding: [0xc3]
   9765   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   9766   ret < 4 x i64> %res
   9767 }
   9768 
   9769 define < 4 x i64> @test_mask_mul_epi32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
   9770 ; X86-LABEL: test_mask_mul_epi32_rm_256:
   9771 ; X86:       # %bb.0:
   9772 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9773 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x00]
   9774 ; X86-NEXT:    retl # encoding: [0xc3]
   9775 ;
   9776 ; X64-LABEL: test_mask_mul_epi32_rm_256:
   9777 ; X64:       # %bb.0:
   9778 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0x07]
   9779 ; X64-NEXT:    retq # encoding: [0xc3]
   9780   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   9781   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   9782   ret < 4 x i64> %res
   9783 }
   9784 
   9785 define < 4 x i64> @test_mask_mul_epi32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
   9786 ; X86-LABEL: test_mask_mul_epi32_rmk_256:
   9787 ; X86:       # %bb.0:
   9788 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9789 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9790 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9791 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x08]
   9792 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   9793 ; X86-NEXT:    retl # encoding: [0xc3]
   9794 ;
   9795 ; X64-LABEL: test_mask_mul_epi32_rmk_256:
   9796 ; X64:       # %bb.0:
   9797 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9798 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0x0f]
   9799 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   9800 ; X64-NEXT:    retq # encoding: [0xc3]
   9801   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   9802   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   9803   ret < 4 x i64> %res
   9804 }
   9805 
   9806 define < 4 x i64> @test_mask_mul_epi32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
   9807 ; X86-LABEL: test_mask_mul_epi32_rmkz_256:
   9808 ; X86:       # %bb.0:
   9809 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9810 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9811 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9812 ; X86-NEXT:    vpmuldq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x00]
   9813 ; X86-NEXT:    retl # encoding: [0xc3]
   9814 ;
   9815 ; X64-LABEL: test_mask_mul_epi32_rmkz_256:
   9816 ; X64:       # %bb.0:
   9817 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9818 ; X64-NEXT:    vpmuldq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0x07]
   9819 ; X64-NEXT:    retq # encoding: [0xc3]
   9820   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   9821   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   9822   ret < 4 x i64> %res
   9823 }
   9824 
   9825 define < 4 x i64> @test_mask_mul_epi32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
   9826 ; X86-LABEL: test_mask_mul_epi32_rmb_256:
   9827 ; X86:       # %bb.0:
   9828 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9829 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   9830 ; X86-NEXT:    # xmm1 = mem[0],zero
   9831 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   9832 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x28,0xc1]
   9833 ; X86-NEXT:    retl # encoding: [0xc3]
   9834 ;
   9835 ; X64-LABEL: test_mask_mul_epi32_rmb_256:
   9836 ; X64:       # %bb.0:
   9837 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf2,0xfd,0x38,0x28,0x07]
   9838 ; X64-NEXT:    retq # encoding: [0xc3]
   9839   %q = load i64, i64* %ptr_b
   9840   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   9841   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   9842   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   9843   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   9844   ret < 4 x i64> %res
   9845 }
   9846 
   9847 define < 4 x i64> @test_mask_mul_epi32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
   9848 ; X86-LABEL: test_mask_mul_epi32_rmbk_256:
   9849 ; X86:       # %bb.0:
   9850 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9851 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   9852 ; X86-NEXT:    # xmm2 = mem[0],zero
   9853 ; X86-NEXT:    vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
   9854 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   9855 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9856 ; X86-NEXT:    vpmuldq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x28,0xca]
   9857 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   9858 ; X86-NEXT:    retl # encoding: [0xc3]
   9859 ;
   9860 ; X64-LABEL: test_mask_mul_epi32_rmbk_256:
   9861 ; X64:       # %bb.0:
   9862 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9863 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x39,0x28,0x0f]
   9864 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   9865 ; X64-NEXT:    retq # encoding: [0xc3]
   9866   %q = load i64, i64* %ptr_b
   9867   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   9868   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   9869   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   9870   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   9871   ret < 4 x i64> %res
   9872 }
   9873 
   9874 define < 4 x i64> @test_mask_mul_epi32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
   9875 ; X86-LABEL: test_mask_mul_epi32_rmbkz_256:
   9876 ; X86:       # %bb.0:
   9877 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9878 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   9879 ; X86-NEXT:    # xmm1 = mem[0],zero
   9880 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   9881 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   9882 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9883 ; X86-NEXT:    vpmuldq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x28,0xc1]
   9884 ; X86-NEXT:    retl # encoding: [0xc3]
   9885 ;
   9886 ; X64-LABEL: test_mask_mul_epi32_rmbkz_256:
   9887 ; X64:       # %bb.0:
   9888 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9889 ; X64-NEXT:    vpmuldq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xb9,0x28,0x07]
   9890 ; X64-NEXT:    retq # encoding: [0xc3]
   9891   %q = load i64, i64* %ptr_b
   9892   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   9893   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   9894   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   9895   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   9896   ret < 4 x i64> %res
   9897 }
   9898 
   9899 declare < 4 x i64> @llvm.x86.avx512.mask.pmul.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
   9900 
   9901 define < 2 x i64> @test_mask_mul_epu32_rr_128(< 4 x i32> %a, < 4 x i32> %b) {
   9902 ; CHECK-LABEL: test_mask_mul_epu32_rr_128:
   9903 ; CHECK:       # %bb.0:
   9904 ; CHECK-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
   9905 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   9906   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   9907   ret < 2 x i64> %res
   9908 }
   9909 
   9910 define < 2 x i64> @test_mask_mul_epu32_rrk_128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask) {
   9911 ; X86-LABEL: test_mask_mul_epu32_rrk_128:
   9912 ; X86:       # %bb.0:
   9913 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9914 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9915 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
   9916 ; X86-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   9917 ; X86-NEXT:    retl # encoding: [0xc3]
   9918 ;
   9919 ; X64-LABEL: test_mask_mul_epu32_rrk_128:
   9920 ; X64:       # %bb.0:
   9921 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9922 ; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xd1]
   9923 ; X64-NEXT:    vmovdqa %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc2]
   9924 ; X64-NEXT:    retq # encoding: [0xc3]
   9925   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   9926   ret < 2 x i64> %res
   9927 }
   9928 
   9929 define < 2 x i64> @test_mask_mul_epu32_rrkz_128(< 4 x i32> %a, < 4 x i32> %b, i8 %mask) {
   9930 ; X86-LABEL: test_mask_mul_epu32_rrkz_128:
   9931 ; X86:       # %bb.0:
   9932 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   9933 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   9934 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
   9935 ; X86-NEXT:    retl # encoding: [0xc3]
   9936 ;
   9937 ; X64-LABEL: test_mask_mul_epu32_rrkz_128:
   9938 ; X64:       # %bb.0:
   9939 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   9940 ; X64-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
   9941 ; X64-NEXT:    retq # encoding: [0xc3]
   9942   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   9943   ret < 2 x i64> %res
   9944 }
   9945 
   9946 define < 2 x i64> @test_mask_mul_epu32_rm_128(< 4 x i32> %a, < 4 x i32>* %ptr_b) {
   9947 ; X86-LABEL: test_mask_mul_epu32_rm_128:
   9948 ; X86:       # %bb.0:
   9949 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9950 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x00]
   9951 ; X86-NEXT:    retl # encoding: [0xc3]
   9952 ;
   9953 ; X64-LABEL: test_mask_mul_epu32_rm_128:
   9954 ; X64:       # %bb.0:
   9955 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0x07]
   9956 ; X64-NEXT:    retq # encoding: [0xc3]
   9957   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9958   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   9959   ret < 2 x i64> %res
   9960 }
   9961 
   9962 define < 2 x i64> @test_mask_mul_epu32_rmk_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
   9963 ; X86-LABEL: test_mask_mul_epu32_rmk_128:
   9964 ; X86:       # %bb.0:
   9965 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9966 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9967 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9968 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x08]
   9969 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9970 ; X86-NEXT:    retl # encoding: [0xc3]
   9971 ;
   9972 ; X64-LABEL: test_mask_mul_epu32_rmk_128:
   9973 ; X64:       # %bb.0:
   9974 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9975 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0x0f]
   9976 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   9977 ; X64-NEXT:    retq # encoding: [0xc3]
   9978   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9979   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   9980   ret < 2 x i64> %res
   9981 }
   9982 
   9983 define < 2 x i64> @test_mask_mul_epu32_rmkz_128(< 4 x i32> %a, < 4 x i32>* %ptr_b, i8 %mask) {
   9984 ; X86-LABEL: test_mask_mul_epu32_rmkz_128:
   9985 ; X86:       # %bb.0:
   9986 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   9987 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   9988 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   9989 ; X86-NEXT:    vpmuludq (%eax), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x00]
   9990 ; X86-NEXT:    retl # encoding: [0xc3]
   9991 ;
   9992 ; X64-LABEL: test_mask_mul_epu32_rmkz_128:
   9993 ; X64:       # %bb.0:
   9994 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   9995 ; X64-NEXT:    vpmuludq (%rdi), %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0x07]
   9996 ; X64-NEXT:    retq # encoding: [0xc3]
   9997   %b = load < 4 x i32>, < 4 x i32>* %ptr_b
   9998   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   9999   ret < 2 x i64> %res
   10000 }
   10001 
   10002 define < 2 x i64> @test_mask_mul_epu32_rmb_128(< 4 x i32> %a, i64* %ptr_b) {
   10003 ; X86-LABEL: test_mask_mul_epu32_rmb_128:
   10004 ; X86:       # %bb.0:
   10005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10006 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   10007 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xf4,0xc1]
   10008 ; X86-NEXT:    retl # encoding: [0xc3]
   10009 ;
   10010 ; X64-LABEL: test_mask_mul_epu32_rmb_128:
   10011 ; X64:       # %bb.0:
   10012 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 # encoding: [0x62,0xf1,0xfd,0x18,0xf4,0x07]
   10013 ; X64-NEXT:    retq # encoding: [0xc3]
   10014   %q = load i64, i64* %ptr_b
   10015   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   10016   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
   10017   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   10018   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 -1)
   10019   ret < 2 x i64> %res
   10020 }
   10021 
   10022 define < 2 x i64> @test_mask_mul_epu32_rmbk_128(< 4 x i32> %a, i64* %ptr_b, < 2 x i64> %passThru, i8 %mask) {
   10023 ; X86-LABEL: test_mask_mul_epu32_rmbk_128:
   10024 ; X86:       # %bb.0:
   10025 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10026 ; X86-NEXT:    vpbroadcastq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x10]
   10027 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   10028 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10029 ; X86-NEXT:    vpmuludq %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x09,0xf4,0xca]
   10030 ; X86-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   10031 ; X86-NEXT:    retl # encoding: [0xc3]
   10032 ;
   10033 ; X64-LABEL: test_mask_mul_epu32_rmbk_128:
   10034 ; X64:       # %bb.0:
   10035 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10036 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x19,0xf4,0x0f]
   10037 ; X64-NEXT:    vmovdqa %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc1]
   10038 ; X64-NEXT:    retq # encoding: [0xc3]
   10039   %q = load i64, i64* %ptr_b
   10040   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   10041   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, <2 x i32> zeroinitializer
   10042   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   10043   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> %passThru, i8 %mask)
   10044   ret < 2 x i64> %res
   10045 }
   10046 
   10047 define < 2 x i64> @test_mask_mul_epu32_rmbkz_128(< 4 x i32> %a, i64* %ptr_b, i8 %mask) {
   10048 ; X86-LABEL: test_mask_mul_epu32_rmbkz_128:
   10049 ; X86:       # %bb.0:
   10050 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10051 ; X86-NEXT:    vpbroadcastq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0x59,0x08]
   10052 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   10053 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10054 ; X86-NEXT:    vpmuludq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0xf4,0xc1]
   10055 ; X86-NEXT:    retl # encoding: [0xc3]
   10056 ;
   10057 ; X64-LABEL: test_mask_mul_epu32_rmbkz_128:
   10058 ; X64:       # %bb.0:
   10059 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10060 ; X64-NEXT:    vpmuludq (%rdi){1to2}, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x99,0xf4,0x07]
   10061 ; X64-NEXT:    retq # encoding: [0xc3]
   10062   %q = load i64, i64* %ptr_b
   10063   %vecinit.i = insertelement < 2 x i64> undef, i64 %q, i32 0
   10064   %b64 = shufflevector < 2 x i64> %vecinit.i, < 2 x i64> undef, < 2 x i32> zeroinitializer
   10065   %b = bitcast < 2 x i64> %b64 to < 4 x i32>
   10066   %res = call < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32> %a, < 4 x i32> %b, < 2 x i64> zeroinitializer, i8 %mask)
   10067   ret < 2 x i64> %res
   10068 }
   10069 
   10070 declare < 2 x i64> @llvm.x86.avx512.mask.pmulu.dq.128(< 4 x i32>, < 4 x i32>, < 2 x i64>, i8)
   10071 
   10072 define < 4 x i64> @test_mask_mul_epu32_rr_256(< 8 x i32> %a, < 8 x i32> %b) {
   10073 ; CHECK-LABEL: test_mask_mul_epu32_rr_256:
   10074 ; CHECK:       # %bb.0:
   10075 ; CHECK-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
   10076 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   10077   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   10078   ret < 4 x i64> %res
   10079 }
   10080 
   10081 define < 4 x i64> @test_mask_mul_epu32_rrk_256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask) {
   10082 ; X86-LABEL: test_mask_mul_epu32_rrk_256:
   10083 ; X86:       # %bb.0:
   10084 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10085 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10086 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
   10087 ; X86-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   10088 ; X86-NEXT:    retl # encoding: [0xc3]
   10089 ;
   10090 ; X64-LABEL: test_mask_mul_epu32_rrk_256:
   10091 ; X64:       # %bb.0:
   10092 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10093 ; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xd1]
   10094 ; X64-NEXT:    vmovdqa %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc2]
   10095 ; X64-NEXT:    retq # encoding: [0xc3]
   10096   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   10097   ret < 4 x i64> %res
   10098 }
   10099 
   10100 define < 4 x i64> @test_mask_mul_epu32_rrkz_256(< 8 x i32> %a, < 8 x i32> %b, i8 %mask) {
   10101 ; X86-LABEL: test_mask_mul_epu32_rrkz_256:
   10102 ; X86:       # %bb.0:
   10103 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10104 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10105 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
   10106 ; X86-NEXT:    retl # encoding: [0xc3]
   10107 ;
   10108 ; X64-LABEL: test_mask_mul_epu32_rrkz_256:
   10109 ; X64:       # %bb.0:
   10110 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10111 ; X64-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
   10112 ; X64-NEXT:    retq # encoding: [0xc3]
   10113   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   10114   ret < 4 x i64> %res
   10115 }
   10116 
   10117 define < 4 x i64> @test_mask_mul_epu32_rm_256(< 8 x i32> %a, < 8 x i32>* %ptr_b) {
   10118 ; X86-LABEL: test_mask_mul_epu32_rm_256:
   10119 ; X86:       # %bb.0:
   10120 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10121 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x00]
   10122 ; X86-NEXT:    retl # encoding: [0xc3]
   10123 ;
   10124 ; X64-LABEL: test_mask_mul_epu32_rm_256:
   10125 ; X64:       # %bb.0:
   10126 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0x07]
   10127 ; X64-NEXT:    retq # encoding: [0xc3]
   10128   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   10129   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   10130   ret < 4 x i64> %res
   10131 }
   10132 
   10133 define < 4 x i64> @test_mask_mul_epu32_rmk_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
   10134 ; X86-LABEL: test_mask_mul_epu32_rmk_256:
   10135 ; X86:       # %bb.0:
   10136 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10137 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   10138 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   10139 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x08]
   10140 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   10141 ; X86-NEXT:    retl # encoding: [0xc3]
   10142 ;
   10143 ; X64-LABEL: test_mask_mul_epu32_rmk_256:
   10144 ; X64:       # %bb.0:
   10145 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10146 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0x0f]
   10147 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   10148 ; X64-NEXT:    retq # encoding: [0xc3]
   10149   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   10150   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   10151   ret < 4 x i64> %res
   10152 }
   10153 
   10154 define < 4 x i64> @test_mask_mul_epu32_rmkz_256(< 8 x i32> %a, < 8 x i32>* %ptr_b, i8 %mask) {
   10155 ; X86-LABEL: test_mask_mul_epu32_rmkz_256:
   10156 ; X86:       # %bb.0:
   10157 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10158 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   10159 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   10160 ; X86-NEXT:    vpmuludq (%eax), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x00]
   10161 ; X86-NEXT:    retl # encoding: [0xc3]
   10162 ;
   10163 ; X64-LABEL: test_mask_mul_epu32_rmkz_256:
   10164 ; X64:       # %bb.0:
   10165 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10166 ; X64-NEXT:    vpmuludq (%rdi), %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0x07]
   10167 ; X64-NEXT:    retq # encoding: [0xc3]
   10168   %b = load < 8 x i32>, < 8 x i32>* %ptr_b
   10169   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   10170   ret < 4 x i64> %res
   10171 }
   10172 
   10173 define < 4 x i64> @test_mask_mul_epu32_rmb_256(< 8 x i32> %a, i64* %ptr_b) {
   10174 ; X86-LABEL: test_mask_mul_epu32_rmb_256:
   10175 ; X86:       # %bb.0:
   10176 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10177 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   10178 ; X86-NEXT:    # xmm1 = mem[0],zero
   10179 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   10180 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xf4,0xc1]
   10181 ; X86-NEXT:    retl # encoding: [0xc3]
   10182 ;
   10183 ; X64-LABEL: test_mask_mul_epu32_rmb_256:
   10184 ; X64:       # %bb.0:
   10185 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 # encoding: [0x62,0xf1,0xfd,0x38,0xf4,0x07]
   10186 ; X64-NEXT:    retq # encoding: [0xc3]
   10187   %q = load i64, i64* %ptr_b
   10188   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   10189   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   10190   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   10191   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 -1)
   10192   ret < 4 x i64> %res
   10193 }
   10194 
   10195 define < 4 x i64> @test_mask_mul_epu32_rmbk_256(< 8 x i32> %a, i64* %ptr_b, < 4 x i64> %passThru, i8 %mask) {
   10196 ; X86-LABEL: test_mask_mul_epu32_rmbk_256:
   10197 ; X86:       # %bb.0:
   10198 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10199 ; X86-NEXT:    vmovq (%eax), %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x10]
   10200 ; X86-NEXT:    # xmm2 = mem[0],zero
   10201 ; X86-NEXT:    vpbroadcastq %xmm2, %ymm2 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xd2]
   10202 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   10203 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10204 ; X86-NEXT:    vpmuludq %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xf4,0xca]
   10205 ; X86-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   10206 ; X86-NEXT:    retl # encoding: [0xc3]
   10207 ;
   10208 ; X64-LABEL: test_mask_mul_epu32_rmbk_256:
   10209 ; X64:       # %bb.0:
   10210 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10211 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x39,0xf4,0x0f]
   10212 ; X64-NEXT:    vmovdqa %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc1]
   10213 ; X64-NEXT:    retq # encoding: [0xc3]
   10214   %q = load i64, i64* %ptr_b
   10215   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   10216   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   10217   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   10218   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> %passThru, i8 %mask)
   10219   ret < 4 x i64> %res
   10220 }
   10221 
   10222 define < 4 x i64> @test_mask_mul_epu32_rmbkz_256(< 8 x i32> %a, i64* %ptr_b, i8 %mask) {
   10223 ; X86-LABEL: test_mask_mul_epu32_rmbkz_256:
   10224 ; X86:       # %bb.0:
   10225 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   10226 ; X86-NEXT:    vmovq (%eax), %xmm1 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x7e,0x08]
   10227 ; X86-NEXT:    # xmm1 = mem[0],zero
   10228 ; X86-NEXT:    vpbroadcastq %xmm1, %ymm1 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0x59,0xc9]
   10229 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   10230 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10231 ; X86-NEXT:    vpmuludq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0xf4,0xc1]
   10232 ; X86-NEXT:    retl # encoding: [0xc3]
   10233 ;
   10234 ; X64-LABEL: test_mask_mul_epu32_rmbkz_256:
   10235 ; X64:       # %bb.0:
   10236 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   10237 ; X64-NEXT:    vpmuludq (%rdi){1to4}, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xb9,0xf4,0x07]
   10238 ; X64-NEXT:    retq # encoding: [0xc3]
   10239   %q = load i64, i64* %ptr_b
   10240   %vecinit.i = insertelement < 4 x i64> undef, i64 %q, i32 0
   10241   %b64 = shufflevector < 4 x i64> %vecinit.i, < 4 x i64> undef, < 4 x i32> zeroinitializer
   10242   %b = bitcast < 4 x i64> %b64 to < 8 x i32>
   10243   %res = call < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32> %a, < 8 x i32> %b, < 4 x i64> zeroinitializer, i8 %mask)
   10244   ret < 4 x i64> %res
   10245 }
   10246 
   10247 declare < 4 x i64> @llvm.x86.avx512.mask.pmulu.dq.256(< 8 x i32>, < 8 x i32>, < 4 x i64>, i8)
   10248 
   10249 declare <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32>, <4 x float>, i8)
   10250 
   10251 define <4 x float>@test_int_x86_avx512_mask_cvt_dq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
   10252 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
   10253 ; X86:       # %bb.0:
   10254 ; X86-NEXT:    vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
   10255 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10256 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10257 ; X86-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
   10258 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10259 ; X86-NEXT:    retl # encoding: [0xc3]
   10260 ;
   10261 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_128:
   10262 ; X64:       # %bb.0:
   10263 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5b,0xd0]
   10264 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10265 ; X64-NEXT:    vcvtdq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5b,0xc8]
   10266 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10267 ; X64-NEXT:    retq # encoding: [0xc3]
   10268   %res = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
   10269   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtdq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
   10270   %res2 = fadd <4 x float> %res, %res1
   10271   ret <4 x float> %res2
   10272 }
   10273 
   10274 declare <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32>, <8 x float>, i8)
   10275 
   10276 define <8 x float>@test_int_x86_avx512_mask_cvt_dq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
   10277 ; X86-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
   10278 ; X86:       # %bb.0:
   10279 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
   10280 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10281 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10282 ; X86-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
   10283 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
   10284 ; X86-NEXT:    retl # encoding: [0xc3]
   10285 ;
   10286 ; X64-LABEL: test_int_x86_avx512_mask_cvt_dq2ps_256:
   10287 ; X64:       # %bb.0:
   10288 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5b,0xd0]
   10289 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10290 ; X64-NEXT:    vcvtdq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5b,0xc8]
   10291 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
   10292 ; X64-NEXT:    retq # encoding: [0xc3]
   10293   %res = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
   10294   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtdq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
   10295   %res2 = fadd <8 x float> %res, %res1
   10296   ret <8 x float> %res2
   10297 }
   10298 
   10299 declare <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double>, <4 x i32>, i8)
   10300 
   10301 define <4 x i32>@test_int_x86_avx512_mask_cvt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
   10302 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
   10303 ; X86:       # %bb.0:
   10304 ; X86-NEXT:    vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
   10305 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10306 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10307 ; X86-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
   10308 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10309 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10310 ; X86-NEXT:    retl # encoding: [0xc3]
   10311 ;
   10312 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2dq_256:
   10313 ; X64:       # %bb.0:
   10314 ; X64-NEXT:    vcvtpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xff,0xe6,0xd0]
   10315 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10316 ; X64-NEXT:    vcvtpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xff,0x29,0xe6,0xc8]
   10317 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10318 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10319 ; X64-NEXT:    retq # encoding: [0xc3]
   10320   %res = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
   10321   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvtpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
   10322   %res2 = add <4 x i32> %res, %res1
   10323   ret <4 x i32> %res2
   10324 }
   10325 
   10326 declare <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double>, <4 x float>, i8)
   10327 
   10328 define <4 x float>@test_int_x86_avx512_mask_cvt_pd2ps_256(<4 x double> %x0, <4 x float> %x1, i8 %x2) {
   10329 ; X86-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
   10330 ; X86:       # %bb.0:
   10331 ; X86-NEXT:    vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
   10332 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10333 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10334 ; X86-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
   10335 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10336 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10337 ; X86-NEXT:    retl # encoding: [0xc3]
   10338 ;
   10339 ; X64-LABEL: test_int_x86_avx512_mask_cvt_pd2ps_256:
   10340 ; X64:       # %bb.0:
   10341 ; X64-NEXT:    vcvtpd2ps %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x5a,0xd0]
   10342 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10343 ; X64-NEXT:    vcvtpd2ps %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0x5a,0xc8]
   10344 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10345 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10346 ; X64-NEXT:    retq # encoding: [0xc3]
   10347   %res = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 %x2)
   10348   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtpd2ps.256(<4 x double> %x0, <4 x float> %x1, i8 -1)
   10349   %res2 = fadd <4 x float> %res, %res1
   10350   ret <4 x float> %res2
   10351 }
   10352 
   10353 declare <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float>, <4 x double>, i8)
   10354 
   10355 define <4 x double>@test_int_x86_avx512_mask_cvt_ps2pd_256(<4 x float> %x0, <4 x double> %x1, i8 %x2) {
   10356 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
   10357 ; X86:       # %bb.0:
   10358 ; X86-NEXT:    vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
   10359 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10360 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10361 ; X86-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
   10362 ; X86-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   10363 ; X86-NEXT:    retl # encoding: [0xc3]
   10364 ;
   10365 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_256:
   10366 ; X64:       # %bb.0:
   10367 ; X64-NEXT:    vcvtps2pd %xmm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x5a,0xd0]
   10368 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10369 ; X64-NEXT:    vcvtps2pd %xmm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x29,0x5a,0xc8]
   10370 ; X64-NEXT:    vaddpd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc2]
   10371 ; X64-NEXT:    retq # encoding: [0xc3]
   10372   %res = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 %x2)
   10373   %res1 = call <4 x double> @llvm.x86.avx512.mask.cvtps2pd.256(<4 x float> %x0, <4 x double> %x1, i8 -1)
   10374   %res2 = fadd <4 x double> %res, %res1
   10375   ret <4 x double> %res2
   10376 }
   10377 
   10378 declare <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float>, <2 x double>, i8)
   10379 
   10380 define <2 x double>@test_int_x86_avx512_mask_cvt_ps2pd_128(<4 x float> %x0, <2 x double> %x1, i8 %x2) {
   10381 ; X86-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
   10382 ; X86:       # %bb.0:
   10383 ; X86-NEXT:    vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
   10384 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10385 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10386 ; X86-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
   10387 ; X86-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   10388 ; X86-NEXT:    retl # encoding: [0xc3]
   10389 ;
   10390 ; X64-LABEL: test_int_x86_avx512_mask_cvt_ps2pd_128:
   10391 ; X64:       # %bb.0:
   10392 ; X64-NEXT:    vcvtps2pd %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x5a,0xd0]
   10393 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10394 ; X64-NEXT:    vcvtps2pd %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7c,0x09,0x5a,0xc8]
   10395 ; X64-NEXT:    vaddpd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc2]
   10396 ; X64-NEXT:    retq # encoding: [0xc3]
   10397   %res = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 %x2)
   10398   %res1 = call <2 x double> @llvm.x86.avx512.mask.cvtps2pd.128(<4 x float> %x0, <2 x double> %x1, i8 -1)
   10399   %res2 = fadd <2 x double> %res, %res1
   10400   ret <2 x double> %res2
   10401 }
   10402 
   10403 declare <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double>, <4 x i32>, i8)
   10404 
   10405 define <4 x i32>@test_int_x86_avx512_mask_cvtt_pd2dq_256(<4 x double> %x0, <4 x i32> %x1, i8 %x2) {
   10406 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
   10407 ; X86:       # %bb.0:
   10408 ; X86-NEXT:    vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
   10409 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10410 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10411 ; X86-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
   10412 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10413 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10414 ; X86-NEXT:    retl # encoding: [0xc3]
   10415 ;
   10416 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_pd2dq_256:
   10417 ; X64:       # %bb.0:
   10418 ; X64-NEXT:    vcvttpd2dq %ymm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xe6,0xd0]
   10419 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10420 ; X64-NEXT:    vcvttpd2dq %ymm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xfd,0x29,0xe6,0xc8]
   10421 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10422 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   10423 ; X64-NEXT:    retq # encoding: [0xc3]
   10424   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 %x2)
   10425   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttpd2dq.256(<4 x double> %x0, <4 x i32> %x1, i8 -1)
   10426   %res2 = add <4 x i32> %res, %res1
   10427   ret <4 x i32> %res2
   10428 }
   10429 
   10430 declare <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float>, <4 x i32>, i8)
   10431 
   10432 define <4 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_128(<4 x float> %x0, <4 x i32> %x1, i8 %x2) {
   10433 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
   10434 ; X86:       # %bb.0:
   10435 ; X86-NEXT:    vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
   10436 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10437 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10438 ; X86-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
   10439 ; X86-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10440 ; X86-NEXT:    retl # encoding: [0xc3]
   10441 ;
   10442 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_128:
   10443 ; X64:       # %bb.0:
   10444 ; X64-NEXT:    vcvttps2dq %xmm0, %xmm2 # EVEX TO VEX Compression encoding: [0xc5,0xfa,0x5b,0xd0]
   10445 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10446 ; X64-NEXT:    vcvttps2dq %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x09,0x5b,0xc8]
   10447 ; X64-NEXT:    vpaddd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc2]
   10448 ; X64-NEXT:    retq # encoding: [0xc3]
   10449   %res = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 %x2)
   10450   %res1 = call <4 x i32> @llvm.x86.avx512.mask.cvttps2dq.128(<4 x float> %x0, <4 x i32> %x1, i8 -1)
   10451   %res2 = add <4 x i32> %res, %res1
   10452   ret <4 x i32> %res2
   10453 }
   10454 
   10455 declare <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float>, <8 x i32>, i8)
   10456 
   10457 define <8 x i32>@test_int_x86_avx512_mask_cvtt_ps2dq_256(<8 x float> %x0, <8 x i32> %x1, i8 %x2) {
   10458 ; X86-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
   10459 ; X86:       # %bb.0:
   10460 ; X86-NEXT:    vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
   10461 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10462 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10463 ; X86-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
   10464 ; X86-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
   10465 ; X86-NEXT:    retl # encoding: [0xc3]
   10466 ;
   10467 ; X64-LABEL: test_int_x86_avx512_mask_cvtt_ps2dq_256:
   10468 ; X64:       # %bb.0:
   10469 ; X64-NEXT:    vcvttps2dq %ymm0, %ymm2 # EVEX TO VEX Compression encoding: [0xc5,0xfe,0x5b,0xd0]
   10470 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10471 ; X64-NEXT:    vcvttps2dq %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7e,0x29,0x5b,0xc8]
   10472 ; X64-NEXT:    vpaddd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc2]
   10473 ; X64-NEXT:    retq # encoding: [0xc3]
   10474   %res = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 %x2)
   10475   %res1 = call <8 x i32> @llvm.x86.avx512.mask.cvttps2dq.256(<8 x float> %x0, <8 x i32> %x1, i8 -1)
   10476   %res2 = add <8 x i32> %res, %res1
   10477   ret <8 x i32> %res2
   10478 }
   10479 
   10480 declare <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float>, <8 x i32>, <8 x float>, i8)
   10481 
   10482 define <8 x float>@test_int_x86_avx512_mask_permvar_sf_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
   10483 ; X86-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
   10484 ; X86:       # %bb.0:
   10485 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
   10486 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10487 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10488 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
   10489 ; X86-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
   10490 ; X86-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   10491 ; X86-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
   10492 ; X86-NEXT:    retl # encoding: [0xc3]
   10493 ;
   10494 ; X64-LABEL: test_int_x86_avx512_mask_permvar_sf_256:
   10495 ; X64:       # %bb.0:
   10496 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x16,0xd8]
   10497 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10498 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x16,0xd0]
   10499 ; X64-NEXT:    vpermps %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x16,0xc0]
   10500 ; X64-NEXT:    vaddps %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xec,0x58,0xc0]
   10501 ; X64-NEXT:    vaddps %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x58,0xc3]
   10502 ; X64-NEXT:    retq # encoding: [0xc3]
   10503   %res = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
   10504   %res1 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> zeroinitializer, i8 %x3)
   10505   %res2 = call <8 x float> @llvm.x86.avx512.mask.permvar.sf.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
   10506   %res3 = fadd <8 x float> %res, %res1
   10507   %res4 = fadd <8 x float> %res3, %res2
   10508   ret <8 x float> %res4
   10509 }
   10510 
   10511 declare <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   10512 
   10513 define <8 x i32>@test_int_x86_avx512_mask_permvar_si_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   10514 ; X86-LABEL: test_int_x86_avx512_mask_permvar_si_256:
   10515 ; X86:       # %bb.0:
   10516 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
   10517 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10518 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10519 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
   10520 ; X86-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
   10521 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10522 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   10523 ; X86-NEXT:    retl # encoding: [0xc3]
   10524 ;
   10525 ; X64-LABEL: test_int_x86_avx512_mask_permvar_si_256:
   10526 ; X64:       # %bb.0:
   10527 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0x36,0xd8]
   10528 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10529 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0x75,0x29,0x36,0xd0]
   10530 ; X64-NEXT:    vpermd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0x36,0xc0]
   10531 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10532 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   10533 ; X64-NEXT:    retq # encoding: [0xc3]
   10534   %res = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   10535   %res1 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   10536   %res2 = call <8 x i32> @llvm.x86.avx512.mask.permvar.si.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   10537   %res3 = add <8 x i32> %res, %res1
   10538   %res4 = add <8 x i32> %res3, %res2
   10539   ret <8 x i32> %res4
   10540 }
   10541 
   10542 declare <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double>, <4 x i64>, <4 x double>, i8)
   10543 
   10544 define <4 x double>@test_int_x86_avx512_mask_permvar_df_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
   10545 ; X86-LABEL: test_int_x86_avx512_mask_permvar_df_256:
   10546 ; X86:       # %bb.0:
   10547 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
   10548 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10549 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10550 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
   10551 ; X86-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
   10552 ; X86-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   10553 ; X86-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
   10554 ; X86-NEXT:    retl # encoding: [0xc3]
   10555 ;
   10556 ; X64-LABEL: test_int_x86_avx512_mask_permvar_df_256:
   10557 ; X64:       # %bb.0:
   10558 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x16,0xd8]
   10559 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10560 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x16,0xd0]
   10561 ; X64-NEXT:    vpermpd %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x16,0xc0]
   10562 ; X64-NEXT:    vaddpd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0x58,0xc0]
   10563 ; X64-NEXT:    vaddpd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x58,0xc3]
   10564 ; X64-NEXT:    retq # encoding: [0xc3]
   10565   %res = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
   10566   %res1 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> zeroinitializer, i8 %x3)
   10567   %res2 = call <4 x double> @llvm.x86.avx512.mask.permvar.df.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
   10568   %res3 = fadd <4 x double> %res, %res1
   10569   %res4 = fadd <4 x double> %res3, %res2
   10570   ret <4 x double> %res4
   10571 }
   10572 
   10573 declare <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   10574 
   10575 define <4 x i64>@test_int_x86_avx512_mask_permvar_di_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   10576 ; X86-LABEL: test_int_x86_avx512_mask_permvar_di_256:
   10577 ; X86:       # %bb.0:
   10578 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
   10579 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10580 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10581 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
   10582 ; X86-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
   10583 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10584 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   10585 ; X86-NEXT:    retl # encoding: [0xc3]
   10586 ;
   10587 ; X64-LABEL: test_int_x86_avx512_mask_permvar_di_256:
   10588 ; X64:       # %bb.0:
   10589 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x36,0xd8]
   10590 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10591 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0x36,0xd0]
   10592 ; X64-NEXT:    vpermq %ymm0, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0x36,0xc0]
   10593 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10594 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   10595 ; X64-NEXT:    retq # encoding: [0xc3]
   10596   %res = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   10597   %res1 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   10598   %res2 = call <4 x i64> @llvm.x86.avx512.mask.permvar.di.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   10599   %res3 = add <4 x i64> %res, %res1
   10600   %res4 = add <4 x i64> %res3, %res2
   10601   ret <4 x i64> %res4
   10602 }
   10603 
   10604 declare <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
   10605 
   10606 define <4 x i32>@test_int_x86_avx512_mask_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
   10607 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
   10608 ; X86:       # %bb.0:
   10609 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10610 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
   10611 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10612 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10613 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
   10614 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   10615 ; X86-NEXT:    retl # encoding: [0xc3]
   10616 ;
   10617 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_128:
   10618 ; X64:       # %bb.0:
   10619 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10620 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
   10621 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10622 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0x75,0x09,0x25,0xc2,0x21]
   10623 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   10624 ; X64-NEXT:    retq # encoding: [0xc3]
   10625   %res = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
   10626   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
   10627   %res2 = add <4 x i32> %res, %res1
   10628   ret <4 x i32> %res2
   10629 }
   10630 
   10631 declare <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i32, i8)
   10632 
   10633 define <4 x i32>@test_int_x86_avx512_maskz_pternlog_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x4) {
   10634 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
   10635 ; X86:       # %bb.0:
   10636 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10637 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
   10638 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10639 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10640 ; X86-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
   10641 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   10642 ; X86-NEXT:    retl # encoding: [0xc3]
   10643 ;
   10644 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_128:
   10645 ; X64:       # %bb.0:
   10646 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10647 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0x75,0x08,0x25,0xda,0x21]
   10648 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10649 ; X64-NEXT:    vpternlogd $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0x89,0x25,0xc2,0x21]
   10650 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   10651 ; X64-NEXT:    retq # encoding: [0xc3]
   10652   %res = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 %x4)
   10653   %res1 = call <4 x i32> @llvm.x86.avx512.maskz.pternlog.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i32 33, i8 -1)
   10654   %res2 = add <4 x i32> %res, %res1
   10655   ret <4 x i32> %res2
   10656 }
   10657 
   10658 declare <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
   10659 
   10660 define <8 x i32>@test_int_x86_avx512_mask_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
   10661 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
   10662 ; X86:       # %bb.0:
   10663 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10664 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
   10665 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10666 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10667 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
   10668 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10669 ; X86-NEXT:    retl # encoding: [0xc3]
   10670 ;
   10671 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_d_256:
   10672 ; X64:       # %bb.0:
   10673 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10674 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
   10675 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10676 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0x75,0x29,0x25,0xc2,0x21]
   10677 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10678 ; X64-NEXT:    retq # encoding: [0xc3]
   10679   %res = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
   10680   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
   10681   %res2 = add <8 x i32> %res, %res1
   10682   ret <8 x i32> %res2
   10683 }
   10684 
   10685 declare <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i32, i8)
   10686 
   10687 define <8 x i32>@test_int_x86_avx512_maskz_pternlog_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x4) {
   10688 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
   10689 ; X86:       # %bb.0:
   10690 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10691 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
   10692 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10693 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10694 ; X86-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
   10695 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10696 ; X86-NEXT:    retl # encoding: [0xc3]
   10697 ;
   10698 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_d_256:
   10699 ; X64:       # %bb.0:
   10700 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10701 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0x75,0x28,0x25,0xda,0x21]
   10702 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10703 ; X64-NEXT:    vpternlogd $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0x75,0xa9,0x25,0xc2,0x21]
   10704 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   10705 ; X64-NEXT:    retq # encoding: [0xc3]
   10706   %res = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 %x4)
   10707   %res1 = call <8 x i32> @llvm.x86.avx512.maskz.pternlog.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i32 33, i8 -1)
   10708   %res2 = add <8 x i32> %res, %res1
   10709   ret <8 x i32> %res2
   10710 }
   10711 
   10712 declare <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
   10713 
   10714 define <2 x i64>@test_int_x86_avx512_mask_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
   10715 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
   10716 ; X86:       # %bb.0:
   10717 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10718 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
   10719 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10720 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10721 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
   10722 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   10723 ; X86-NEXT:    retl # encoding: [0xc3]
   10724 ;
   10725 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_128:
   10726 ; X64:       # %bb.0:
   10727 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10728 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
   10729 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10730 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x09,0x25,0xc2,0x21]
   10731 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   10732 ; X64-NEXT:    retq # encoding: [0xc3]
   10733   %res = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
   10734   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
   10735   %res2 = add <2 x i64> %res, %res1
   10736   ret <2 x i64> %res2
   10737 }
   10738 
   10739 declare <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i32, i8)
   10740 
   10741 define <2 x i64>@test_int_x86_avx512_maskz_pternlog_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x4) {
   10742 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
   10743 ; X86:       # %bb.0:
   10744 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10745 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
   10746 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10747 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10748 ; X86-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
   10749 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   10750 ; X86-NEXT:    retl # encoding: [0xc3]
   10751 ;
   10752 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_128:
   10753 ; X64:       # %bb.0:
   10754 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10755 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf3,0xf5,0x08,0x25,0xda,0x21]
   10756 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10757 ; X64-NEXT:    vpternlogq $33, %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0x89,0x25,0xc2,0x21]
   10758 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   10759 ; X64-NEXT:    retq # encoding: [0xc3]
   10760   %res = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 %x4)
   10761   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.pternlog.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i32 33, i8 -1)
   10762   %res2 = add <2 x i64> %res, %res1
   10763   ret <2 x i64> %res2
   10764 }
   10765 
   10766 declare <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
   10767 
   10768 define <4 x i64>@test_int_x86_avx512_mask_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
   10769 ; X86-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
   10770 ; X86:       # %bb.0:
   10771 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10772 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
   10773 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10774 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10775 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
   10776 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10777 ; X86-NEXT:    retl # encoding: [0xc3]
   10778 ;
   10779 ; X64-LABEL: test_int_x86_avx512_mask_pternlog_q_256:
   10780 ; X64:       # %bb.0:
   10781 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10782 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
   10783 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10784 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf3,0xf5,0x29,0x25,0xc2,0x21]
   10785 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10786 ; X64-NEXT:    retq # encoding: [0xc3]
   10787   %res = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
   10788   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
   10789   %res2 = add <4 x i64> %res, %res1
   10790   ret <4 x i64> %res2
   10791 }
   10792 
   10793 declare <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i32, i8)
   10794 
   10795 define <4 x i64>@test_int_x86_avx512_maskz_pternlog_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x4) {
   10796 ; X86-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
   10797 ; X86:       # %bb.0:
   10798 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10799 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
   10800 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10801 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10802 ; X86-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
   10803 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10804 ; X86-NEXT:    retl # encoding: [0xc3]
   10805 ;
   10806 ; X64-LABEL: test_int_x86_avx512_maskz_pternlog_q_256:
   10807 ; X64:       # %bb.0:
   10808 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10809 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf3,0xf5,0x28,0x25,0xda,0x21]
   10810 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10811 ; X64-NEXT:    vpternlogq $33, %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf3,0xf5,0xa9,0x25,0xc2,0x21]
   10812 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   10813 ; X64-NEXT:    retq # encoding: [0xc3]
   10814   %res = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 %x4)
   10815   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.pternlog.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i32 33, i8 -1)
   10816   %res2 = add <4 x i64> %res, %res1
   10817   ret <4 x i64> %res2
   10818 }
   10819 
   10820 declare <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32>, <4 x float>, i8)
   10821 
   10822 define <4 x float>@test_int_x86_avx512_mask_cvt_udq2ps_128(<4 x i32> %x0, <4 x float> %x1, i8 %x2) {
   10823 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
   10824 ; X86:       # %bb.0:
   10825 ; X86-NEXT:    vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
   10826 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10827 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10828 ; X86-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
   10829 ; X86-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10830 ; X86-NEXT:    retl # encoding: [0xc3]
   10831 ;
   10832 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_128:
   10833 ; X64:       # %bb.0:
   10834 ; X64-NEXT:    vcvtudq2ps %xmm0, %xmm2 # encoding: [0x62,0xf1,0x7f,0x08,0x7a,0xd0]
   10835 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10836 ; X64-NEXT:    vcvtudq2ps %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x09,0x7a,0xc8]
   10837 ; X64-NEXT:    vaddps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc2]
   10838 ; X64-NEXT:    retq # encoding: [0xc3]
   10839   %res = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 %x2)
   10840   %res1 = call <4 x float> @llvm.x86.avx512.mask.cvtudq2ps.128(<4 x i32> %x0, <4 x float> %x1, i8 -1)
   10841   %res2 = fadd <4 x float> %res, %res1
   10842   ret <4 x float> %res2
   10843 }
   10844 
   10845 declare <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32>, <8 x float>, i8)
   10846 
   10847 define <8 x float>@test_int_x86_avx512_mask_cvt_udq2ps_256(<8 x i32> %x0, <8 x float> %x1, i8 %x2) {
   10848 ; X86-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
   10849 ; X86:       # %bb.0:
   10850 ; X86-NEXT:    vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
   10851 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10852 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10853 ; X86-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
   10854 ; X86-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
   10855 ; X86-NEXT:    retl # encoding: [0xc3]
   10856 ;
   10857 ; X64-LABEL: test_int_x86_avx512_mask_cvt_udq2ps_256:
   10858 ; X64:       # %bb.0:
   10859 ; X64-NEXT:    vcvtudq2ps %ymm0, %ymm2 # encoding: [0x62,0xf1,0x7f,0x28,0x7a,0xd0]
   10860 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10861 ; X64-NEXT:    vcvtudq2ps %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x7f,0x29,0x7a,0xc8]
   10862 ; X64-NEXT:    vaddps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc2]
   10863 ; X64-NEXT:    retq # encoding: [0xc3]
   10864   %res = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 %x2)
   10865   %res1 = call <8 x float> @llvm.x86.avx512.mask.cvtudq2ps.256(<8 x i32> %x0, <8 x float> %x1, i8 -1)
   10866   %res2 = fadd <8 x float> %res, %res1
   10867   ret <8 x float> %res2
   10868 }
   10869 
   10870 declare <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   10871 
   10872 define <4 x i32>@test_int_x86_avx512_mask_vpermi2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   10873 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
   10874 ; X86:       # %bb.0:
   10875 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10876 ; X86-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
   10877 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10878 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10879 ; X86-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
   10880 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10881 ; X86-NEXT:    retl # encoding: [0xc3]
   10882 ;
   10883 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_128:
   10884 ; X64:       # %bb.0:
   10885 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   10886 ; X64-NEXT:    vpermt2d %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7e,0xda]
   10887 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10888 ; X64-NEXT:    vpermi2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x76,0xca]
   10889 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10890 ; X64-NEXT:    retq # encoding: [0xc3]
   10891   %res = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   10892   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermi2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   10893   %res2 = add <4 x i32> %res, %res1
   10894   ret <4 x i32> %res2
   10895 }
   10896 
   10897 declare <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   10898 
   10899 define <4 x i32>@test_int_x86_avx512_mask_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   10900 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
   10901 ; X86:       # %bb.0:
   10902 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   10903 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
   10904 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10905 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10906 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
   10907 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10908 ; X86-NEXT:    retl # encoding: [0xc3]
   10909 ;
   10910 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_128:
   10911 ; X64:       # %bb.0:
   10912 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   10913 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
   10914 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10915 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x7e,0xca]
   10916 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10917 ; X64-NEXT:    retq # encoding: [0xc3]
   10918   %res = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   10919   %res1 = call <4 x i32> @llvm.x86.avx512.mask.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   10920   %res2 = add <4 x i32> %res, %res1
   10921   ret <4 x i32> %res2
   10922 }
   10923 
   10924 declare <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   10925 
   10926 define <4 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   10927 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
   10928 ; X86:       # %bb.0:
   10929 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   10930 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
   10931 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10932 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10933 ; X86-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
   10934 ; X86-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10935 ; X86-NEXT:    retl # encoding: [0xc3]
   10936 ;
   10937 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_128:
   10938 ; X64:       # %bb.0:
   10939 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   10940 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x7e,0xda]
   10941 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10942 ; X64-NEXT:    vpermt2d %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x7e,0xca]
   10943 ; X64-NEXT:    vpaddd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc3]
   10944 ; X64-NEXT:    retq # encoding: [0xc3]
   10945   %res = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   10946   %res1 = call <4 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   10947   %res2 = add <4 x i32> %res, %res1
   10948   ret <4 x i32> %res2
   10949 }
   10950 
   10951 declare <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   10952 
   10953 define <8 x i32>@test_int_x86_avx512_mask_vpermi2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   10954 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
   10955 ; X86:       # %bb.0:
   10956 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10957 ; X86-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
   10958 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10959 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10960 ; X86-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
   10961 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   10962 ; X86-NEXT:    retl # encoding: [0xc3]
   10963 ;
   10964 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_d_256:
   10965 ; X64:       # %bb.0:
   10966 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   10967 ; X64-NEXT:    vpermt2d %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7e,0xda]
   10968 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10969 ; X64-NEXT:    vpermi2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x76,0xca]
   10970 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   10971 ; X64-NEXT:    retq # encoding: [0xc3]
   10972   %res = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   10973   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermi2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   10974   %res2 = add <8 x i32> %res, %res1
   10975   ret <8 x i32> %res2
   10976 }
   10977 
   10978 declare <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   10979 
   10980 define <8 x i32>@test_int_x86_avx512_mask_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   10981 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
   10982 ; X86:       # %bb.0:
   10983 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   10984 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
   10985 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   10986 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   10987 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
   10988 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   10989 ; X86-NEXT:    retl # encoding: [0xc3]
   10990 ;
   10991 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_d_256:
   10992 ; X64:       # %bb.0:
   10993 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   10994 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
   10995 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   10996 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x7e,0xca]
   10997 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   10998 ; X64-NEXT:    retq # encoding: [0xc3]
   10999   %res = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   11000   %res1 = call <8 x i32> @llvm.x86.avx512.mask.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   11001   %res2 = add <8 x i32> %res, %res1
   11002   ret <8 x i32> %res2
   11003 }
   11004 
   11005 declare <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   11006 
   11007 define <8 x i32>@test_int_x86_avx512_maskz_vpermt2var_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   11008 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
   11009 ; X86:       # %bb.0:
   11010 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11011 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
   11012 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11013 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11014 ; X86-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
   11015 ; X86-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   11016 ; X86-NEXT:    retl # encoding: [0xc3]
   11017 ;
   11018 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_d_256:
   11019 ; X64:       # %bb.0:
   11020 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11021 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x7e,0xda]
   11022 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11023 ; X64-NEXT:    vpermt2d %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x7e,0xca]
   11024 ; X64-NEXT:    vpaddd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc3]
   11025 ; X64-NEXT:    retq # encoding: [0xc3]
   11026   %res = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   11027   %res1 = call <8 x i32> @llvm.x86.avx512.maskz.vpermt2var.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   11028   %res2 = add <8 x i32> %res, %res1
   11029   ret <8 x i32> %res2
   11030 }
   11031 
   11032 declare <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double>, <2 x i64>, <2 x double>, i8)
   11033 
   11034 define <2 x double>@test_int_x86_avx512_mask_vpermi2var_pd_128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3) {
   11035 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
   11036 ; X86:       # %bb.0:
   11037 ; X86-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
   11038 ; X86-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
   11039 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11040 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11041 ; X86-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
   11042 ; X86-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
   11043 ; X86-NEXT:    retl # encoding: [0xc3]
   11044 ;
   11045 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_128:
   11046 ; X64:       # %bb.0:
   11047 ; X64-NEXT:    vmovapd %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xd8]
   11048 ; X64-NEXT:    vpermt2pd %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7f,0xda]
   11049 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11050 ; X64-NEXT:    vpermi2pd %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x77,0xca]
   11051 ; X64-NEXT:    vaddpd %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0x58,0xc3]
   11052 ; X64-NEXT:    retq # encoding: [0xc3]
   11053   %res = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 %x3)
   11054   %res1 = call <2 x double> @llvm.x86.avx512.mask.vpermi2var.pd.128(<2 x double> %x0, <2 x i64> %x1, <2 x double> %x2, i8 -1)
   11055   %res2 = fadd <2 x double> %res, %res1
   11056   ret <2 x double> %res2
   11057 }
   11058 
   11059 declare <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double>, <4 x i64>, <4 x double>, i8)
   11060 
   11061 define <4 x double>@test_int_x86_avx512_mask_vpermi2var_pd_256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3) {
   11062 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
   11063 ; X86:       # %bb.0:
   11064 ; X86-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
   11065 ; X86-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
   11066 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11067 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11068 ; X86-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
   11069 ; X86-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
   11070 ; X86-NEXT:    retl # encoding: [0xc3]
   11071 ;
   11072 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_pd_256:
   11073 ; X64:       # %bb.0:
   11074 ; X64-NEXT:    vmovapd %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xd8]
   11075 ; X64-NEXT:    vpermt2pd %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7f,0xda]
   11076 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11077 ; X64-NEXT:    vpermi2pd %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x77,0xca]
   11078 ; X64-NEXT:    vaddpd %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0x58,0xc3]
   11079 ; X64-NEXT:    retq # encoding: [0xc3]
   11080   %res = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 %x3)
   11081   %res1 = call <4 x double> @llvm.x86.avx512.mask.vpermi2var.pd.256(<4 x double> %x0, <4 x i64> %x1, <4 x double> %x2, i8 -1)
   11082   %res2 = fadd <4 x double> %res, %res1
   11083   ret <4 x double> %res2
   11084 }
   11085 
   11086 declare <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float>, <4 x i32>, <4 x float>, i8)
   11087 
   11088 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3) {
   11089 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
   11090 ; X86:       # %bb.0:
   11091 ; X86-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
   11092 ; X86-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
   11093 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11094 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11095 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
   11096 ; X86-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
   11097 ; X86-NEXT:    retl # encoding: [0xc3]
   11098 ;
   11099 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128:
   11100 ; X64:       # %bb.0:
   11101 ; X64-NEXT:    vmovaps %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xd8]
   11102 ; X64-NEXT:    vpermt2ps %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0x75,0x08,0x7f,0xda]
   11103 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11104 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
   11105 ; X64-NEXT:    vaddps %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf0,0x58,0xc3]
   11106 ; X64-NEXT:    retq # encoding: [0xc3]
   11107   %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 %x3)
   11108   %res1 = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1, <4 x float> %x2, i8 -1)
   11109   %res2 = fadd <4 x float> %res, %res1
   11110   ret <4 x float> %res2
   11111 }
   11112 
   11113 define <4 x float>@test_int_x86_avx512_mask_vpermi2var_ps_128_cast(<4 x float> %x0, <2 x i64> %x1, <4 x float> %x2, i8 %x3) {
   11114 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
   11115 ; X86:       # %bb.0:
   11116 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11117 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11118 ; X86-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
   11119 ; X86-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   11120 ; X86-NEXT:    retl # encoding: [0xc3]
   11121 ;
   11122 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_128_cast:
   11123 ; X64:       # %bb.0:
   11124 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11125 ; X64-NEXT:    vpermi2ps %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x77,0xca]
   11126 ; X64-NEXT:    vmovaps %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc1]
   11127 ; X64-NEXT:    retq # encoding: [0xc3]
   11128   %x1cast = bitcast <2 x i64> %x1 to <4 x i32>
   11129   %res = call <4 x float> @llvm.x86.avx512.mask.vpermi2var.ps.128(<4 x float> %x0, <4 x i32> %x1cast, <4 x float> %x2, i8 %x3)
   11130   ret <4 x float> %res
   11131 }
   11132 
   11133 declare <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float>, <8 x i32>, <8 x float>, i8)
   11134 
   11135 define <8 x float>@test_int_x86_avx512_mask_vpermi2var_ps_256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3) {
   11136 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
   11137 ; X86:       # %bb.0:
   11138 ; X86-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
   11139 ; X86-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
   11140 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11141 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11142 ; X86-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
   11143 ; X86-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
   11144 ; X86-NEXT:    retl # encoding: [0xc3]
   11145 ;
   11146 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_ps_256:
   11147 ; X64:       # %bb.0:
   11148 ; X64-NEXT:    vmovaps %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xd8]
   11149 ; X64-NEXT:    vpermt2ps %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0x75,0x28,0x7f,0xda]
   11150 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11151 ; X64-NEXT:    vpermi2ps %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x77,0xca]
   11152 ; X64-NEXT:    vaddps %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf4,0x58,0xc3]
   11153 ; X64-NEXT:    retq # encoding: [0xc3]
   11154   %res = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 %x3)
   11155   %res1 = call <8 x float> @llvm.x86.avx512.mask.vpermi2var.ps.256(<8 x float> %x0, <8 x i32> %x1, <8 x float> %x2, i8 -1)
   11156   %res2 = fadd <8 x float> %res, %res1
   11157   ret <8 x float> %res2
   11158 }
   11159 
   11160 declare <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   11161 
   11162 define <2 x i64>@test_int_x86_avx512_mask_vpermi2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   11163 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
   11164 ; X86:       # %bb.0:
   11165 ; X86-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   11166 ; X86-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
   11167 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11168 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11169 ; X86-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
   11170 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11171 ; X86-NEXT:    retl # encoding: [0xc3]
   11172 ;
   11173 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_128:
   11174 ; X64:       # %bb.0:
   11175 ; X64-NEXT:    vmovdqa %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd8]
   11176 ; X64-NEXT:    vpermt2q %xmm2, %xmm1, %xmm3 # encoding: [0x62,0xf2,0xf5,0x08,0x7e,0xda]
   11177 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11178 ; X64-NEXT:    vpermi2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x76,0xca]
   11179 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11180 ; X64-NEXT:    retq # encoding: [0xc3]
   11181   %res = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   11182   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermi2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   11183   %res2 = add <2 x i64> %res, %res1
   11184   ret <2 x i64> %res2
   11185 }
   11186 
   11187 declare <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   11188 
   11189 define <2 x i64>@test_int_x86_avx512_mask_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   11190 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
   11191 ; X86:       # %bb.0:
   11192 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   11193 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
   11194 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11195 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11196 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
   11197 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11198 ; X86-NEXT:    retl # encoding: [0xc3]
   11199 ;
   11200 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_128:
   11201 ; X64:       # %bb.0:
   11202 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   11203 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
   11204 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11205 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x7e,0xca]
   11206 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11207 ; X64-NEXT:    retq # encoding: [0xc3]
   11208   %res = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   11209   %res1 = call <2 x i64> @llvm.x86.avx512.mask.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   11210   %res2 = add <2 x i64> %res, %res1
   11211   ret <2 x i64> %res2
   11212 }
   11213 
   11214 declare <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   11215 
   11216 define <2 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   11217 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
   11218 ; X86:       # %bb.0:
   11219 ; X86-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   11220 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
   11221 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11222 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11223 ; X86-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
   11224 ; X86-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11225 ; X86-NEXT:    retl # encoding: [0xc3]
   11226 ;
   11227 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_128:
   11228 ; X64:       # %bb.0:
   11229 ; X64-NEXT:    vmovdqa %xmm1, %xmm3 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xd9]
   11230 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x7e,0xda]
   11231 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11232 ; X64-NEXT:    vpermt2q %xmm2, %xmm0, %xmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x7e,0xca]
   11233 ; X64-NEXT:    vpaddq %xmm3, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc3]
   11234 ; X64-NEXT:    retq # encoding: [0xc3]
   11235   %res = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   11236   %res1 = call <2 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   11237   %res2 = add <2 x i64> %res, %res1
   11238   ret <2 x i64> %res2
   11239 }
   11240 
   11241 declare <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   11242 
   11243 define <4 x i64>@test_int_x86_avx512_mask_vpermi2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   11244 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
   11245 ; X86:       # %bb.0:
   11246 ; X86-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   11247 ; X86-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
   11248 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11249 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11250 ; X86-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
   11251 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11252 ; X86-NEXT:    retl # encoding: [0xc3]
   11253 ;
   11254 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_q_256:
   11255 ; X64:       # %bb.0:
   11256 ; X64-NEXT:    vmovdqa %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd8]
   11257 ; X64-NEXT:    vpermt2q %ymm2, %ymm1, %ymm3 # encoding: [0x62,0xf2,0xf5,0x28,0x7e,0xda]
   11258 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11259 ; X64-NEXT:    vpermi2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x76,0xca]
   11260 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11261 ; X64-NEXT:    retq # encoding: [0xc3]
   11262   %res = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   11263   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermi2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   11264   %res2 = add <4 x i64> %res, %res1
   11265   ret <4 x i64> %res2
   11266 }
   11267 
   11268 declare <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   11269 
   11270 define <4 x i64>@test_int_x86_avx512_mask_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   11271 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
   11272 ; X86:       # %bb.0:
   11273 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11274 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
   11275 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11276 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11277 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
   11278 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11279 ; X86-NEXT:    retl # encoding: [0xc3]
   11280 ;
   11281 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_q_256:
   11282 ; X64:       # %bb.0:
   11283 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11284 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
   11285 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11286 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x7e,0xca]
   11287 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11288 ; X64-NEXT:    retq # encoding: [0xc3]
   11289   %res = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   11290   %res1 = call <4 x i64> @llvm.x86.avx512.mask.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   11291   %res2 = add <4 x i64> %res, %res1
   11292   ret <4 x i64> %res2
   11293 }
   11294 
   11295 declare <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   11296 
   11297 define <4 x i64>@test_int_x86_avx512_maskz_vpermt2var_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   11298 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
   11299 ; X86:       # %bb.0:
   11300 ; X86-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11301 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
   11302 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   11303 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   11304 ; X86-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
   11305 ; X86-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11306 ; X86-NEXT:    retl # encoding: [0xc3]
   11307 ;
   11308 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_q_256:
   11309 ; X64:       # %bb.0:
   11310 ; X64-NEXT:    vmovdqa %ymm1, %ymm3 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xd9]
   11311 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x7e,0xda]
   11312 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   11313 ; X64-NEXT:    vpermt2q %ymm2, %ymm0, %ymm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x7e,0xca]
   11314 ; X64-NEXT:    vpaddq %ymm3, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc3]
   11315 ; X64-NEXT:    retq # encoding: [0xc3]
   11316   %res = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   11317   %res1 = call <4 x i64> @llvm.x86.avx512.maskz.vpermt2var.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   11318   %res2 = add <4 x i64> %res, %res1
   11319   ret <4 x i64> %res2
   11320 }
   11321 
   11322 define void @test_mask_compress_store_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
   11323 ; X86-LABEL: test_mask_compress_store_pd_128:
   11324 ; X86:       # %bb.0:
   11325 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11326 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11327 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11328 ; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
   11329 ; X86-NEXT:    retl # encoding: [0xc3]
   11330 ;
   11331 ; X64-LABEL: test_mask_compress_store_pd_128:
   11332 ; X64:       # %bb.0:
   11333 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11334 ; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
   11335 ; X64-NEXT:    retq # encoding: [0xc3]
   11336   call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
   11337   ret void
   11338 }
   11339 
   11340 declare void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
   11341 
   11342 define void @test_compress_store_pd_128(i8* %addr, <2 x double> %data) {
   11343 ; X86-LABEL: test_compress_store_pd_128:
   11344 ; X86:       # %bb.0:
   11345 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11346 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11347 ; X86-NEXT:    vcompresspd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x00]
   11348 ; X86-NEXT:    retl # encoding: [0xc3]
   11349 ;
   11350 ; X64-LABEL: test_compress_store_pd_128:
   11351 ; X64:       # %bb.0:
   11352 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11353 ; X64-NEXT:    vcompresspd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8a,0x07]
   11354 ; X64-NEXT:    retq # encoding: [0xc3]
   11355   call void @llvm.x86.avx512.mask.compress.store.pd.128(i8* %addr, <2 x double> %data, i8 -1)
   11356   ret void
   11357 }
   11358 
   11359 define void @test_mask_compress_store_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
   11360 ; X86-LABEL: test_mask_compress_store_ps_128:
   11361 ; X86:       # %bb.0:
   11362 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11363 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11364 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11365 ; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
   11366 ; X86-NEXT:    retl # encoding: [0xc3]
   11367 ;
   11368 ; X64-LABEL: test_mask_compress_store_ps_128:
   11369 ; X64:       # %bb.0:
   11370 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11371 ; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
   11372 ; X64-NEXT:    retq # encoding: [0xc3]
   11373   call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
   11374   ret void
   11375 }
   11376 
   11377 declare void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
   11378 
   11379 define void @test_compress_store_ps_128(i8* %addr, <4 x float> %data) {
   11380 ; X86-LABEL: test_compress_store_ps_128:
   11381 ; X86:       # %bb.0:
   11382 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11383 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11384 ; X86-NEXT:    vcompressps %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x00]
   11385 ; X86-NEXT:    retl # encoding: [0xc3]
   11386 ;
   11387 ; X64-LABEL: test_compress_store_ps_128:
   11388 ; X64:       # %bb.0:
   11389 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11390 ; X64-NEXT:    vcompressps %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8a,0x07]
   11391 ; X64-NEXT:    retq # encoding: [0xc3]
   11392   call void @llvm.x86.avx512.mask.compress.store.ps.128(i8* %addr, <4 x float> %data, i8 -1)
   11393   ret void
   11394 }
   11395 
   11396 define void @test_mask_compress_store_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
   11397 ; X86-LABEL: test_mask_compress_store_q_128:
   11398 ; X86:       # %bb.0:
   11399 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11400 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11401 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11402 ; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
   11403 ; X86-NEXT:    retl # encoding: [0xc3]
   11404 ;
   11405 ; X64-LABEL: test_mask_compress_store_q_128:
   11406 ; X64:       # %bb.0:
   11407 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11408 ; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
   11409 ; X64-NEXT:    retq # encoding: [0xc3]
   11410   call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
   11411   ret void
   11412 }
   11413 
   11414 declare void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
   11415 
   11416 define void @test_compress_store_q_128(i8* %addr, <2 x i64> %data) {
   11417 ; X86-LABEL: test_compress_store_q_128:
   11418 ; X86:       # %bb.0:
   11419 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11420 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11421 ; X86-NEXT:    vpcompressq %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x00]
   11422 ; X86-NEXT:    retl # encoding: [0xc3]
   11423 ;
   11424 ; X64-LABEL: test_compress_store_q_128:
   11425 ; X64:       # %bb.0:
   11426 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11427 ; X64-NEXT:    vpcompressq %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x8b,0x07]
   11428 ; X64-NEXT:    retq # encoding: [0xc3]
   11429   call void @llvm.x86.avx512.mask.compress.store.q.128(i8* %addr, <2 x i64> %data, i8 -1)
   11430   ret void
   11431 }
   11432 
   11433 define void @test_mask_compress_store_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
   11434 ; X86-LABEL: test_mask_compress_store_d_128:
   11435 ; X86:       # %bb.0:
   11436 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11437 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11438 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11439 ; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
   11440 ; X86-NEXT:    retl # encoding: [0xc3]
   11441 ;
   11442 ; X64-LABEL: test_mask_compress_store_d_128:
   11443 ; X64:       # %bb.0:
   11444 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11445 ; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
   11446 ; X64-NEXT:    retq # encoding: [0xc3]
   11447   call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
   11448   ret void
   11449 }
   11450 
   11451 declare void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
   11452 
   11453 define void @test_compress_store_d_128(i8* %addr, <4 x i32> %data) {
   11454 ; X86-LABEL: test_compress_store_d_128:
   11455 ; X86:       # %bb.0:
   11456 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11457 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11458 ; X86-NEXT:    vpcompressd %xmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x00]
   11459 ; X86-NEXT:    retl # encoding: [0xc3]
   11460 ;
   11461 ; X64-LABEL: test_compress_store_d_128:
   11462 ; X64:       # %bb.0:
   11463 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11464 ; X64-NEXT:    vpcompressd %xmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x8b,0x07]
   11465 ; X64-NEXT:    retq # encoding: [0xc3]
   11466   call void @llvm.x86.avx512.mask.compress.store.d.128(i8* %addr, <4 x i32> %data, i8 -1)
   11467   ret void
   11468 }
   11469 
   11470 define <2 x double> @test_mask_expand_load_pd_128(i8* %addr, <2 x double> %data, i8 %mask) {
   11471 ; X86-LABEL: test_mask_expand_load_pd_128:
   11472 ; X86:       # %bb.0:
   11473 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11474 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11475 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11476 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
   11477 ; X86-NEXT:    retl # encoding: [0xc3]
   11478 ;
   11479 ; X64-LABEL: test_mask_expand_load_pd_128:
   11480 ; X64:       # %bb.0:
   11481 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11482 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
   11483 ; X64-NEXT:    retq # encoding: [0xc3]
   11484   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
   11485   ret <2 x double> %res
   11486 }
   11487 
   11488 define <2 x double> @test_maskz_expand_load_pd_128(i8* %addr, i8 %mask) {
   11489 ; X86-LABEL: test_maskz_expand_load_pd_128:
   11490 ; X86:       # %bb.0:
   11491 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11492 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11493 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11494 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x00]
   11495 ; X86-NEXT:    retl # encoding: [0xc3]
   11496 ;
   11497 ; X64-LABEL: test_maskz_expand_load_pd_128:
   11498 ; X64:       # %bb.0:
   11499 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11500 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x88,0x07]
   11501 ; X64-NEXT:    retq # encoding: [0xc3]
   11502   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> zeroinitializer, i8 %mask)
   11503   ret <2 x double> %res
   11504 }
   11505 
   11506 declare <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 %mask)
   11507 
   11508 define <2 x double> @test_expand_load_pd_128(i8* %addr, <2 x double> %data) {
   11509 ; X86-LABEL: test_expand_load_pd_128:
   11510 ; X86:       # %bb.0:
   11511 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11512 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11513 ; X86-NEXT:    vexpandpd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x00]
   11514 ; X86-NEXT:    retl # encoding: [0xc3]
   11515 ;
   11516 ; X64-LABEL: test_expand_load_pd_128:
   11517 ; X64:       # %bb.0:
   11518 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11519 ; X64-NEXT:    vexpandpd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x88,0x07]
   11520 ; X64-NEXT:    retq # encoding: [0xc3]
   11521   %res = call <2 x double> @llvm.x86.avx512.mask.expand.load.pd.128(i8* %addr, <2 x double> %data, i8 -1)
   11522   ret <2 x double> %res
   11523 }
   11524 
   11525 define <4 x float> @test_mask_expand_load_ps_128(i8* %addr, <4 x float> %data, i8 %mask) {
   11526 ; X86-LABEL: test_mask_expand_load_ps_128:
   11527 ; X86:       # %bb.0:
   11528 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11529 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11530 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11531 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
   11532 ; X86-NEXT:    retl # encoding: [0xc3]
   11533 ;
   11534 ; X64-LABEL: test_mask_expand_load_ps_128:
   11535 ; X64:       # %bb.0:
   11536 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11537 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
   11538 ; X64-NEXT:    retq # encoding: [0xc3]
   11539   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
   11540   ret <4 x float> %res
   11541 }
   11542 
   11543 define <4 x float> @test_maskz_expand_load_ps_128(i8* %addr, i8 %mask) {
   11544 ; X86-LABEL: test_maskz_expand_load_ps_128:
   11545 ; X86:       # %bb.0:
   11546 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11547 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11548 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11549 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x00]
   11550 ; X86-NEXT:    retl # encoding: [0xc3]
   11551 ;
   11552 ; X64-LABEL: test_maskz_expand_load_ps_128:
   11553 ; X64:       # %bb.0:
   11554 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11555 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x88,0x07]
   11556 ; X64-NEXT:    retq # encoding: [0xc3]
   11557   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> zeroinitializer, i8 %mask)
   11558   ret <4 x float> %res
   11559 }
   11560 
   11561 declare <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 %mask)
   11562 
   11563 define <4 x float> @test_expand_load_ps_128(i8* %addr, <4 x float> %data) {
   11564 ; X86-LABEL: test_expand_load_ps_128:
   11565 ; X86:       # %bb.0:
   11566 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11567 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11568 ; X86-NEXT:    vexpandps (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x00]
   11569 ; X86-NEXT:    retl # encoding: [0xc3]
   11570 ;
   11571 ; X64-LABEL: test_expand_load_ps_128:
   11572 ; X64:       # %bb.0:
   11573 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11574 ; X64-NEXT:    vexpandps (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x88,0x07]
   11575 ; X64-NEXT:    retq # encoding: [0xc3]
   11576   %res = call <4 x float> @llvm.x86.avx512.mask.expand.load.ps.128(i8* %addr, <4 x float> %data, i8 -1)
   11577   ret <4 x float> %res
   11578 }
   11579 
   11580 define <2 x i64> @test_mask_expand_load_q_128(i8* %addr, <2 x i64> %data, i8 %mask) {
   11581 ; X86-LABEL: test_mask_expand_load_q_128:
   11582 ; X86:       # %bb.0:
   11583 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11584 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11585 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11586 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
   11587 ; X86-NEXT:    retl # encoding: [0xc3]
   11588 ;
   11589 ; X64-LABEL: test_mask_expand_load_q_128:
   11590 ; X64:       # %bb.0:
   11591 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11592 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
   11593 ; X64-NEXT:    retq # encoding: [0xc3]
   11594   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
   11595   ret <2 x i64> %res
   11596 }
   11597 
   11598 define <2 x i64> @test_maskz_expand_load_q_128(i8* %addr, i8 %mask) {
   11599 ; X86-LABEL: test_maskz_expand_load_q_128:
   11600 ; X86:       # %bb.0:
   11601 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11602 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11603 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11604 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x00]
   11605 ; X86-NEXT:    retl # encoding: [0xc3]
   11606 ;
   11607 ; X64-LABEL: test_maskz_expand_load_q_128:
   11608 ; X64:       # %bb.0:
   11609 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11610 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x89,0x07]
   11611 ; X64-NEXT:    retq # encoding: [0xc3]
   11612   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> zeroinitializer, i8 %mask)
   11613   ret <2 x i64> %res
   11614 }
   11615 
   11616 declare <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 %mask)
   11617 
   11618 define <2 x i64> @test_expand_load_q_128(i8* %addr, <2 x i64> %data) {
   11619 ; X86-LABEL: test_expand_load_q_128:
   11620 ; X86:       # %bb.0:
   11621 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11622 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11623 ; X86-NEXT:    vpexpandq (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x00]
   11624 ; X86-NEXT:    retl # encoding: [0xc3]
   11625 ;
   11626 ; X64-LABEL: test_expand_load_q_128:
   11627 ; X64:       # %bb.0:
   11628 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11629 ; X64-NEXT:    vpexpandq (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x89,0x07]
   11630 ; X64-NEXT:    retq # encoding: [0xc3]
   11631   %res = call <2 x i64> @llvm.x86.avx512.mask.expand.load.q.128(i8* %addr, <2 x i64> %data, i8 -1)
   11632   ret <2 x i64> %res
   11633 }
   11634 
   11635 define <4 x i32> @test_mask_expand_load_d_128(i8* %addr, <4 x i32> %data, i8 %mask) {
   11636 ; X86-LABEL: test_mask_expand_load_d_128:
   11637 ; X86:       # %bb.0:
   11638 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11639 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11640 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11641 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
   11642 ; X86-NEXT:    retl # encoding: [0xc3]
   11643 ;
   11644 ; X64-LABEL: test_mask_expand_load_d_128:
   11645 ; X64:       # %bb.0:
   11646 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11647 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
   11648 ; X64-NEXT:    retq # encoding: [0xc3]
   11649   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
   11650   ret <4 x i32> %res
   11651 }
   11652 
   11653 define <4 x i32> @test_maskz_expand_load_d_128(i8* %addr, i8 %mask) {
   11654 ; X86-LABEL: test_maskz_expand_load_d_128:
   11655 ; X86:       # %bb.0:
   11656 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11657 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11658 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11659 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x00]
   11660 ; X86-NEXT:    retl # encoding: [0xc3]
   11661 ;
   11662 ; X64-LABEL: test_maskz_expand_load_d_128:
   11663 ; X64:       # %bb.0:
   11664 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11665 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x89,0x07]
   11666 ; X64-NEXT:    retq # encoding: [0xc3]
   11667   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> zeroinitializer, i8 %mask)
   11668   ret <4 x i32> %res
   11669 }
   11670 
   11671 declare <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 %mask)
   11672 
   11673 define <4 x i32> @test_expand_load_d_128(i8* %addr, <4 x i32> %data) {
   11674 ; X86-LABEL: test_expand_load_d_128:
   11675 ; X86:       # %bb.0:
   11676 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11677 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11678 ; X86-NEXT:    vpexpandd (%eax), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x00]
   11679 ; X86-NEXT:    retl # encoding: [0xc3]
   11680 ;
   11681 ; X64-LABEL: test_expand_load_d_128:
   11682 ; X64:       # %bb.0:
   11683 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11684 ; X64-NEXT:    vpexpandd (%rdi), %xmm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x89,0x07]
   11685 ; X64-NEXT:    retq # encoding: [0xc3]
   11686   %res = call <4 x i32> @llvm.x86.avx512.mask.expand.load.d.128(i8* %addr, <4 x i32> %data, i8 -1)
   11687   ret <4 x i32> %res
   11688 }
   11689 
   11690 define void @test_mask_compress_store_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
   11691 ; X86-LABEL: test_mask_compress_store_pd_256:
   11692 ; X86:       # %bb.0:
   11693 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11694 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11695 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11696 ; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
   11697 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11698 ; X86-NEXT:    retl # encoding: [0xc3]
   11699 ;
   11700 ; X64-LABEL: test_mask_compress_store_pd_256:
   11701 ; X64:       # %bb.0:
   11702 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11703 ; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
   11704 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11705 ; X64-NEXT:    retq # encoding: [0xc3]
   11706   call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
   11707   ret void
   11708 }
   11709 
   11710 declare void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
   11711 
   11712 define void @test_compress_store_pd_256(i8* %addr, <4 x double> %data) {
   11713 ; X86-LABEL: test_compress_store_pd_256:
   11714 ; X86:       # %bb.0:
   11715 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11716 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11717 ; X86-NEXT:    vcompresspd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x00]
   11718 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11719 ; X86-NEXT:    retl # encoding: [0xc3]
   11720 ;
   11721 ; X64-LABEL: test_compress_store_pd_256:
   11722 ; X64:       # %bb.0:
   11723 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11724 ; X64-NEXT:    vcompresspd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8a,0x07]
   11725 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11726 ; X64-NEXT:    retq # encoding: [0xc3]
   11727   call void @llvm.x86.avx512.mask.compress.store.pd.256(i8* %addr, <4 x double> %data, i8 -1)
   11728   ret void
   11729 }
   11730 
   11731 define void @test_mask_compress_store_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
   11732 ; X86-LABEL: test_mask_compress_store_ps_256:
   11733 ; X86:       # %bb.0:
   11734 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11735 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11736 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11737 ; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
   11738 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11739 ; X86-NEXT:    retl # encoding: [0xc3]
   11740 ;
   11741 ; X64-LABEL: test_mask_compress_store_ps_256:
   11742 ; X64:       # %bb.0:
   11743 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11744 ; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
   11745 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11746 ; X64-NEXT:    retq # encoding: [0xc3]
   11747   call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
   11748   ret void
   11749 }
   11750 
   11751 declare void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
   11752 
   11753 define void @test_compress_store_ps_256(i8* %addr, <8 x float> %data) {
   11754 ; X86-LABEL: test_compress_store_ps_256:
   11755 ; X86:       # %bb.0:
   11756 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11757 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11758 ; X86-NEXT:    vcompressps %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x00]
   11759 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11760 ; X86-NEXT:    retl # encoding: [0xc3]
   11761 ;
   11762 ; X64-LABEL: test_compress_store_ps_256:
   11763 ; X64:       # %bb.0:
   11764 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11765 ; X64-NEXT:    vcompressps %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8a,0x07]
   11766 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11767 ; X64-NEXT:    retq # encoding: [0xc3]
   11768   call void @llvm.x86.avx512.mask.compress.store.ps.256(i8* %addr, <8 x float> %data, i8 -1)
   11769   ret void
   11770 }
   11771 
   11772 define void @test_mask_compress_store_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
   11773 ; X86-LABEL: test_mask_compress_store_q_256:
   11774 ; X86:       # %bb.0:
   11775 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11776 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11777 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11778 ; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
   11779 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11780 ; X86-NEXT:    retl # encoding: [0xc3]
   11781 ;
   11782 ; X64-LABEL: test_mask_compress_store_q_256:
   11783 ; X64:       # %bb.0:
   11784 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11785 ; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
   11786 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11787 ; X64-NEXT:    retq # encoding: [0xc3]
   11788   call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
   11789   ret void
   11790 }
   11791 
   11792 declare void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
   11793 
   11794 define void @test_compress_store_q_256(i8* %addr, <4 x i64> %data) {
   11795 ; X86-LABEL: test_compress_store_q_256:
   11796 ; X86:       # %bb.0:
   11797 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11798 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11799 ; X86-NEXT:    vpcompressq %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x00]
   11800 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11801 ; X86-NEXT:    retl # encoding: [0xc3]
   11802 ;
   11803 ; X64-LABEL: test_compress_store_q_256:
   11804 ; X64:       # %bb.0:
   11805 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11806 ; X64-NEXT:    vpcompressq %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x8b,0x07]
   11807 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11808 ; X64-NEXT:    retq # encoding: [0xc3]
   11809   call void @llvm.x86.avx512.mask.compress.store.q.256(i8* %addr, <4 x i64> %data, i8 -1)
   11810   ret void
   11811 }
   11812 
   11813 define void @test_mask_compress_store_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
   11814 ; X86-LABEL: test_mask_compress_store_d_256:
   11815 ; X86:       # %bb.0:
   11816 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11817 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11818 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11819 ; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
   11820 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11821 ; X86-NEXT:    retl # encoding: [0xc3]
   11822 ;
   11823 ; X64-LABEL: test_mask_compress_store_d_256:
   11824 ; X64:       # %bb.0:
   11825 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11826 ; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
   11827 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11828 ; X64-NEXT:    retq # encoding: [0xc3]
   11829   call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
   11830   ret void
   11831 }
   11832 
   11833 declare void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
   11834 
   11835 define void @test_compress_store_d_256(i8* %addr, <8 x i32> %data) {
   11836 ; X86-LABEL: test_compress_store_d_256:
   11837 ; X86:       # %bb.0:
   11838 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11839 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11840 ; X86-NEXT:    vpcompressd %ymm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x00]
   11841 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11842 ; X86-NEXT:    retl # encoding: [0xc3]
   11843 ;
   11844 ; X64-LABEL: test_compress_store_d_256:
   11845 ; X64:       # %bb.0:
   11846 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11847 ; X64-NEXT:    vpcompressd %ymm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x8b,0x07]
   11848 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   11849 ; X64-NEXT:    retq # encoding: [0xc3]
   11850   call void @llvm.x86.avx512.mask.compress.store.d.256(i8* %addr, <8 x i32> %data, i8 -1)
   11851   ret void
   11852 }
   11853 
   11854 define <4 x double> @test_mask_expand_load_pd_256(i8* %addr, <4 x double> %data, i8 %mask) {
   11855 ; X86-LABEL: test_mask_expand_load_pd_256:
   11856 ; X86:       # %bb.0:
   11857 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11858 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11859 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11860 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
   11861 ; X86-NEXT:    retl # encoding: [0xc3]
   11862 ;
   11863 ; X64-LABEL: test_mask_expand_load_pd_256:
   11864 ; X64:       # %bb.0:
   11865 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11866 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
   11867 ; X64-NEXT:    retq # encoding: [0xc3]
   11868   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
   11869   ret <4 x double> %res
   11870 }
   11871 
   11872 define <4 x double> @test_maskz_expand_load_pd_256(i8* %addr, i8 %mask) {
   11873 ; X86-LABEL: test_maskz_expand_load_pd_256:
   11874 ; X86:       # %bb.0:
   11875 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11876 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11877 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11878 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x00]
   11879 ; X86-NEXT:    retl # encoding: [0xc3]
   11880 ;
   11881 ; X64-LABEL: test_maskz_expand_load_pd_256:
   11882 ; X64:       # %bb.0:
   11883 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11884 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x88,0x07]
   11885 ; X64-NEXT:    retq # encoding: [0xc3]
   11886   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> zeroinitializer, i8 %mask)
   11887   ret <4 x double> %res
   11888 }
   11889 
   11890 declare <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 %mask)
   11891 
   11892 define <4 x double> @test_expand_load_pd_256(i8* %addr, <4 x double> %data) {
   11893 ; X86-LABEL: test_expand_load_pd_256:
   11894 ; X86:       # %bb.0:
   11895 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11896 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11897 ; X86-NEXT:    vexpandpd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x00]
   11898 ; X86-NEXT:    retl # encoding: [0xc3]
   11899 ;
   11900 ; X64-LABEL: test_expand_load_pd_256:
   11901 ; X64:       # %bb.0:
   11902 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11903 ; X64-NEXT:    vexpandpd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x88,0x07]
   11904 ; X64-NEXT:    retq # encoding: [0xc3]
   11905   %res = call <4 x double> @llvm.x86.avx512.mask.expand.load.pd.256(i8* %addr, <4 x double> %data, i8 -1)
   11906   ret <4 x double> %res
   11907 }
   11908 
   11909 define <8 x float> @test_mask_expand_load_ps_256(i8* %addr, <8 x float> %data, i8 %mask) {
   11910 ; X86-LABEL: test_mask_expand_load_ps_256:
   11911 ; X86:       # %bb.0:
   11912 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11913 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11914 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11915 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
   11916 ; X86-NEXT:    retl # encoding: [0xc3]
   11917 ;
   11918 ; X64-LABEL: test_mask_expand_load_ps_256:
   11919 ; X64:       # %bb.0:
   11920 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11921 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
   11922 ; X64-NEXT:    retq # encoding: [0xc3]
   11923   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
   11924   ret <8 x float> %res
   11925 }
   11926 
   11927 define <8 x float> @test_maskz_expand_load_ps_256(i8* %addr, i8 %mask) {
   11928 ; X86-LABEL: test_maskz_expand_load_ps_256:
   11929 ; X86:       # %bb.0:
   11930 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11931 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11932 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11933 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x00]
   11934 ; X86-NEXT:    retl # encoding: [0xc3]
   11935 ;
   11936 ; X64-LABEL: test_maskz_expand_load_ps_256:
   11937 ; X64:       # %bb.0:
   11938 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11939 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x88,0x07]
   11940 ; X64-NEXT:    retq # encoding: [0xc3]
   11941   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> zeroinitializer, i8 %mask)
   11942   ret <8 x float> %res
   11943 }
   11944 
   11945 declare <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 %mask)
   11946 
   11947 define <8 x float> @test_expand_load_ps_256(i8* %addr, <8 x float> %data) {
   11948 ; X86-LABEL: test_expand_load_ps_256:
   11949 ; X86:       # %bb.0:
   11950 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11951 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11952 ; X86-NEXT:    vexpandps (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x00]
   11953 ; X86-NEXT:    retl # encoding: [0xc3]
   11954 ;
   11955 ; X64-LABEL: test_expand_load_ps_256:
   11956 ; X64:       # %bb.0:
   11957 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   11958 ; X64-NEXT:    vexpandps (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x88,0x07]
   11959 ; X64-NEXT:    retq # encoding: [0xc3]
   11960   %res = call <8 x float> @llvm.x86.avx512.mask.expand.load.ps.256(i8* %addr, <8 x float> %data, i8 -1)
   11961   ret <8 x float> %res
   11962 }
   11963 
   11964 define <4 x i64> @test_mask_expand_load_q_256(i8* %addr, <4 x i64> %data, i8 %mask) {
   11965 ; X86-LABEL: test_mask_expand_load_q_256:
   11966 ; X86:       # %bb.0:
   11967 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11968 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11969 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11970 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
   11971 ; X86-NEXT:    retl # encoding: [0xc3]
   11972 ;
   11973 ; X64-LABEL: test_mask_expand_load_q_256:
   11974 ; X64:       # %bb.0:
   11975 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11976 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
   11977 ; X64-NEXT:    retq # encoding: [0xc3]
   11978   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
   11979   ret <4 x i64> %res
   11980 }
   11981 
   11982 define <4 x i64> @test_maskz_expand_load_q_256(i8* %addr, i8 %mask) {
   11983 ; X86-LABEL: test_maskz_expand_load_q_256:
   11984 ; X86:       # %bb.0:
   11985 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   11986 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   11987 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   11988 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x00]
   11989 ; X86-NEXT:    retl # encoding: [0xc3]
   11990 ;
   11991 ; X64-LABEL: test_maskz_expand_load_q_256:
   11992 ; X64:       # %bb.0:
   11993 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   11994 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x89,0x07]
   11995 ; X64-NEXT:    retq # encoding: [0xc3]
   11996   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> zeroinitializer, i8 %mask)
   11997   ret <4 x i64> %res
   11998 }
   11999 
   12000 declare <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 %mask)
   12001 
   12002 define <4 x i64> @test_expand_load_q_256(i8* %addr, <4 x i64> %data) {
   12003 ; X86-LABEL: test_expand_load_q_256:
   12004 ; X86:       # %bb.0:
   12005 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   12006 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   12007 ; X86-NEXT:    vpexpandq (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x00]
   12008 ; X86-NEXT:    retl # encoding: [0xc3]
   12009 ;
   12010 ; X64-LABEL: test_expand_load_q_256:
   12011 ; X64:       # %bb.0:
   12012 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   12013 ; X64-NEXT:    vpexpandq (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x89,0x07]
   12014 ; X64-NEXT:    retq # encoding: [0xc3]
   12015   %res = call <4 x i64> @llvm.x86.avx512.mask.expand.load.q.256(i8* %addr, <4 x i64> %data, i8 -1)
   12016   ret <4 x i64> %res
   12017 }
   12018 
   12019 define <8 x i32> @test_mask_expand_load_d_256(i8* %addr, <8 x i32> %data, i8 %mask) {
   12020 ; X86-LABEL: test_mask_expand_load_d_256:
   12021 ; X86:       # %bb.0:
   12022 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   12023 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   12024 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   12025 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
   12026 ; X86-NEXT:    retl # encoding: [0xc3]
   12027 ;
   12028 ; X64-LABEL: test_mask_expand_load_d_256:
   12029 ; X64:       # %bb.0:
   12030 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12031 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
   12032 ; X64-NEXT:    retq # encoding: [0xc3]
   12033   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
   12034   ret <8 x i32> %res
   12035 }
   12036 
   12037 define <8 x i32> @test_maskz_expand_load_d_256(i8* %addr, i8 %mask) {
   12038 ; X86-LABEL: test_maskz_expand_load_d_256:
   12039 ; X86:       # %bb.0:
   12040 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   12041 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   12042 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   12043 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x00]
   12044 ; X86-NEXT:    retl # encoding: [0xc3]
   12045 ;
   12046 ; X64-LABEL: test_maskz_expand_load_d_256:
   12047 ; X64:       # %bb.0:
   12048 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12049 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x89,0x07]
   12050 ; X64-NEXT:    retq # encoding: [0xc3]
   12051   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> zeroinitializer, i8 %mask)
   12052   ret <8 x i32> %res
   12053 }
   12054 
   12055 declare <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 %mask)
   12056 
   12057 define <8 x i32> @test_expand_load_d_256(i8* %addr, <8 x i32> %data) {
   12058 ; X86-LABEL: test_expand_load_d_256:
   12059 ; X86:       # %bb.0:
   12060 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   12061 ; X86-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   12062 ; X86-NEXT:    vpexpandd (%eax), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x00]
   12063 ; X86-NEXT:    retl # encoding: [0xc3]
   12064 ;
   12065 ; X64-LABEL: test_expand_load_d_256:
   12066 ; X64:       # %bb.0:
   12067 ; X64-NEXT:    kxnorw %k0, %k0, %k1 # encoding: [0xc5,0xfc,0x46,0xc8]
   12068 ; X64-NEXT:    vpexpandd (%rdi), %ymm0 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x89,0x07]
   12069 ; X64-NEXT:    retq # encoding: [0xc3]
   12070   %res = call <8 x i32> @llvm.x86.avx512.mask.expand.load.d.256(i8* %addr, <8 x i32> %data, i8 -1)
   12071   ret <8 x i32> %res
   12072 }
   12073 
   12074 define <4 x double> @test_sqrt_pd_256(<4 x double> %a0, i8 %mask) {
   12075 ; X86-LABEL: test_sqrt_pd_256:
   12076 ; X86:       # %bb.0:
   12077 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12078 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12079 ; X86-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
   12080 ; X86-NEXT:    retl # encoding: [0xc3]
   12081 ;
   12082 ; X64-LABEL: test_sqrt_pd_256:
   12083 ; X64:       # %bb.0:
   12084 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12085 ; X64-NEXT:    vsqrtpd %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x51,0xc0]
   12086 ; X64-NEXT:    retq # encoding: [0xc3]
   12087   %res = call <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double> %a0,  <4 x double> zeroinitializer, i8 %mask)
   12088   ret <4 x double> %res
   12089 }
   12090 declare <4 x double> @llvm.x86.avx512.mask.sqrt.pd.256(<4 x double>, <4 x double>, i8) nounwind readnone
   12091 
   12092 define <8 x float> @test_sqrt_ps_256(<8 x float> %a0, i8 %mask) {
   12093 ; X86-LABEL: test_sqrt_ps_256:
   12094 ; X86:       # %bb.0:
   12095 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12096 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12097 ; X86-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
   12098 ; X86-NEXT:    retl # encoding: [0xc3]
   12099 ;
   12100 ; X64-LABEL: test_sqrt_ps_256:
   12101 ; X64:       # %bb.0:
   12102 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12103 ; X64-NEXT:    vsqrtps %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7c,0xa9,0x51,0xc0]
   12104 ; X64-NEXT:    retq # encoding: [0xc3]
   12105   %res = call <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float> %a0, <8 x float> zeroinitializer, i8 %mask)
   12106   ret <8 x float> %res
   12107 }
   12108 
   12109 declare <8 x float> @llvm.x86.avx512.mask.sqrt.ps.256(<8 x float>, <8 x float>, i8) nounwind readnone
   12110 
   12111 declare <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   12112 
   12113 define <4 x i32>@test_int_x86_avx512_mask_prorv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   12114 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_128:
   12115 ; X86:       # %bb.0:
   12116 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
   12117 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12118 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12119 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
   12120 ; X86-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
   12121 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   12122 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   12123 ; X86-NEXT:    retl # encoding: [0xc3]
   12124 ;
   12125 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_128:
   12126 ; X64:       # %bb.0:
   12127 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x14,0xd9]
   12128 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12129 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x14,0xd1]
   12130 ; X64-NEXT:    vprorvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x14,0xc1]
   12131 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   12132 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   12133 ; X64-NEXT:    retq # encoding: [0xc3]
   12134   %res = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   12135   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   12136   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prorv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   12137   %res3 = add <4 x i32> %res, %res1
   12138   %res4 = add <4 x i32> %res3, %res2
   12139   ret <4 x i32> %res4
   12140 }
   12141 
   12142 declare <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   12143 
   12144 define <8 x i32>@test_int_x86_avx512_mask_prorv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   12145 ; X86-LABEL: test_int_x86_avx512_mask_prorv_d_256:
   12146 ; X86:       # %bb.0:
   12147 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
   12148 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12149 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12150 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
   12151 ; X86-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
   12152 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   12153 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   12154 ; X86-NEXT:    retl # encoding: [0xc3]
   12155 ;
   12156 ; X64-LABEL: test_int_x86_avx512_mask_prorv_d_256:
   12157 ; X64:       # %bb.0:
   12158 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x14,0xd9]
   12159 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12160 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x14,0xd1]
   12161 ; X64-NEXT:    vprorvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x14,0xc1]
   12162 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   12163 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   12164 ; X64-NEXT:    retq # encoding: [0xc3]
   12165   %res = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   12166   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   12167   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prorv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   12168   %res3 = add <8 x i32> %res, %res1
   12169   %res4 = add <8 x i32> %res3, %res2
   12170   ret <8 x i32> %res4
   12171 }
   12172 
   12173 declare <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   12174 
   12175 define <2 x i64>@test_int_x86_avx512_mask_prorv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   12176 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_128:
   12177 ; X86:       # %bb.0:
   12178 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
   12179 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12180 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12181 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
   12182 ; X86-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
   12183 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   12184 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   12185 ; X86-NEXT:    retl # encoding: [0xc3]
   12186 ;
   12187 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_128:
   12188 ; X64:       # %bb.0:
   12189 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x14,0xd9]
   12190 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12191 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x14,0xd1]
   12192 ; X64-NEXT:    vprorvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x14,0xc1]
   12193 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   12194 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   12195 ; X64-NEXT:    retq # encoding: [0xc3]
   12196   %res = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   12197   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   12198   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prorv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   12199   %res3 = add <2 x i64> %res, %res1
   12200   %res4 = add <2 x i64> %res3, %res2
   12201   ret <2 x i64> %res4
   12202 }
   12203 
   12204 declare <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   12205 
   12206 define <4 x i64>@test_int_x86_avx512_mask_prorv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   12207 ; X86-LABEL: test_int_x86_avx512_mask_prorv_q_256:
   12208 ; X86:       # %bb.0:
   12209 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
   12210 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12211 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12212 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
   12213 ; X86-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
   12214 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   12215 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   12216 ; X86-NEXT:    retl # encoding: [0xc3]
   12217 ;
   12218 ; X64-LABEL: test_int_x86_avx512_mask_prorv_q_256:
   12219 ; X64:       # %bb.0:
   12220 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x14,0xd9]
   12221 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12222 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x14,0xd1]
   12223 ; X64-NEXT:    vprorvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x14,0xc1]
   12224 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   12225 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   12226 ; X64-NEXT:    retq # encoding: [0xc3]
   12227   %res = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   12228   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   12229   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prorv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   12230   %res3 = add <4 x i64> %res, %res1
   12231   %res4 = add <4 x i64> %res3, %res2
   12232   ret <4 x i64> %res4
   12233 }
   12234 
   12235 declare <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32>, i32, <4 x i32>, i8)
   12236 
   12237 define <4 x i32>@test_int_x86_avx512_mask_prol_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
   12238 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_128:
   12239 ; X86:       # %bb.0:
   12240 ; X86-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
   12241 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12242 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12243 ; X86-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
   12244 ; X86-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
   12245 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   12246 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   12247 ; X86-NEXT:    retl # encoding: [0xc3]
   12248 ;
   12249 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_128:
   12250 ; X64:       # %bb.0:
   12251 ; X64-NEXT:    vprold $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc8,0x03]
   12252 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12253 ; X64-NEXT:    vprold $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc8,0x03]
   12254 ; X64-NEXT:    vprold $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc8,0x03]
   12255 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   12256 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   12257 ; X64-NEXT:    retq # encoding: [0xc3]
   12258   %res = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
   12259   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
   12260   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prol.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
   12261   %res3 = add <4 x i32> %res, %res1
   12262   %res4 = add <4 x i32> %res3, %res2
   12263   ret <4 x i32> %res4
   12264 }
   12265 
   12266 declare <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32>, i32, <8 x i32>, i8)
   12267 
   12268 define <8 x i32>@test_int_x86_avx512_mask_prol_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
   12269 ; X86-LABEL: test_int_x86_avx512_mask_prol_d_256:
   12270 ; X86:       # %bb.0:
   12271 ; X86-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
   12272 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12273 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12274 ; X86-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
   12275 ; X86-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
   12276 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   12277 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   12278 ; X86-NEXT:    retl # encoding: [0xc3]
   12279 ;
   12280 ; X64-LABEL: test_int_x86_avx512_mask_prol_d_256:
   12281 ; X64:       # %bb.0:
   12282 ; X64-NEXT:    vprold $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc8,0x03]
   12283 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12284 ; X64-NEXT:    vprold $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc8,0x03]
   12285 ; X64-NEXT:    vprold $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc8,0x03]
   12286 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   12287 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   12288 ; X64-NEXT:    retq # encoding: [0xc3]
   12289   %res = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
   12290   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
   12291   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prol.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
   12292   %res3 = add <8 x i32> %res, %res1
   12293   %res4 = add <8 x i32> %res3, %res2
   12294   ret <8 x i32> %res4
   12295 }
   12296 
   12297 declare <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64>, i32, <2 x i64>, i8)
   12298 
   12299 define <2 x i64>@test_int_x86_avx512_mask_prol_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
   12300 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_128:
   12301 ; X86:       # %bb.0:
   12302 ; X86-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
   12303 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12304 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12305 ; X86-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
   12306 ; X86-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
   12307 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   12308 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   12309 ; X86-NEXT:    retl # encoding: [0xc3]
   12310 ;
   12311 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_128:
   12312 ; X64:       # %bb.0:
   12313 ; X64-NEXT:    vprolq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc8,0x03]
   12314 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12315 ; X64-NEXT:    vprolq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc8,0x03]
   12316 ; X64-NEXT:    vprolq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc8,0x03]
   12317 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   12318 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   12319 ; X64-NEXT:    retq # encoding: [0xc3]
   12320   %res = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
   12321   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
   12322   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prol.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
   12323   %res3 = add <2 x i64> %res, %res1
   12324   %res4 = add <2 x i64> %res3, %res2
   12325   ret <2 x i64> %res4
   12326 }
   12327 
   12328 declare <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64>, i32, <4 x i64>, i8)
   12329 
   12330 define <4 x i64>@test_int_x86_avx512_mask_prol_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
   12331 ; X86-LABEL: test_int_x86_avx512_mask_prol_q_256:
   12332 ; X86:       # %bb.0:
   12333 ; X86-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
   12334 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12335 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12336 ; X86-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
   12337 ; X86-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
   12338 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   12339 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   12340 ; X86-NEXT:    retl # encoding: [0xc3]
   12341 ;
   12342 ; X64-LABEL: test_int_x86_avx512_mask_prol_q_256:
   12343 ; X64:       # %bb.0:
   12344 ; X64-NEXT:    vprolq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc8,0x03]
   12345 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12346 ; X64-NEXT:    vprolq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc8,0x03]
   12347 ; X64-NEXT:    vprolq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc8,0x03]
   12348 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   12349 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   12350 ; X64-NEXT:    retq # encoding: [0xc3]
   12351   %res = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
   12352   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
   12353   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prol.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
   12354   %res3 = add <4 x i64> %res, %res1
   12355   %res4 = add <4 x i64> %res3, %res2
   12356   ret <4 x i64> %res4
   12357 }
   12358 
   12359 declare <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32>, <4 x i32>, <4 x i32>, i8)
   12360 
   12361 define <4 x i32>@test_int_x86_avx512_mask_prolv_d_128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3) {
   12362 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_128:
   12363 ; X86:       # %bb.0:
   12364 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
   12365 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12366 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12367 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
   12368 ; X86-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
   12369 ; X86-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   12370 ; X86-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   12371 ; X86-NEXT:    retl # encoding: [0xc3]
   12372 ;
   12373 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_128:
   12374 ; X64:       # %bb.0:
   12375 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0x7d,0x08,0x15,0xd9]
   12376 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12377 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0x15,0xd1]
   12378 ; X64-NEXT:    vprolvd %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0x15,0xc1]
   12379 ; X64-NEXT:    vpaddd %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc3]
   12380 ; X64-NEXT:    vpaddd %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xfe,0xc0]
   12381 ; X64-NEXT:    retq # encoding: [0xc3]
   12382   %res = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 %x3)
   12383   %res1 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> zeroinitializer, i8 %x3)
   12384   %res2 = call <4 x i32> @llvm.x86.avx512.mask.prolv.d.128(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, i8 -1)
   12385   %res3 = add <4 x i32> %res, %res1
   12386   %res4 = add <4 x i32> %res3, %res2
   12387   ret <4 x i32> %res4
   12388 }
   12389 
   12390 declare <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32>, <8 x i32>, <8 x i32>, i8)
   12391 
   12392 define <8 x i32>@test_int_x86_avx512_mask_prolv_d_256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3) {
   12393 ; X86-LABEL: test_int_x86_avx512_mask_prolv_d_256:
   12394 ; X86:       # %bb.0:
   12395 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
   12396 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12397 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12398 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
   12399 ; X86-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
   12400 ; X86-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   12401 ; X86-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   12402 ; X86-NEXT:    retl # encoding: [0xc3]
   12403 ;
   12404 ; X64-LABEL: test_int_x86_avx512_mask_prolv_d_256:
   12405 ; X64:       # %bb.0:
   12406 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0x7d,0x28,0x15,0xd9]
   12407 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12408 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0x15,0xd1]
   12409 ; X64-NEXT:    vprolvd %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0x15,0xc1]
   12410 ; X64-NEXT:    vpaddd %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc3]
   12411 ; X64-NEXT:    vpaddd %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xfe,0xc0]
   12412 ; X64-NEXT:    retq # encoding: [0xc3]
   12413   %res = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 %x3)
   12414   %res1 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> zeroinitializer, i8 %x3)
   12415   %res2 = call <8 x i32> @llvm.x86.avx512.mask.prolv.d.256(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> %x2, i8 -1)
   12416   %res3 = add <8 x i32> %res, %res1
   12417   %res4 = add <8 x i32> %res3, %res2
   12418   ret <8 x i32> %res4
   12419 }
   12420 
   12421 declare <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64>, <2 x i64>, <2 x i64>, i8)
   12422 
   12423 define <2 x i64>@test_int_x86_avx512_mask_prolv_q_128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3) {
   12424 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_128:
   12425 ; X86:       # %bb.0:
   12426 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
   12427 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12428 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12429 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
   12430 ; X86-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
   12431 ; X86-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   12432 ; X86-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   12433 ; X86-NEXT:    retl # encoding: [0xc3]
   12434 ;
   12435 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_128:
   12436 ; X64:       # %bb.0:
   12437 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm3 # encoding: [0x62,0xf2,0xfd,0x08,0x15,0xd9]
   12438 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12439 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0x15,0xd1]
   12440 ; X64-NEXT:    vprolvq %xmm1, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0x89,0x15,0xc1]
   12441 ; X64-NEXT:    vpaddq %xmm3, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc3]
   12442 ; X64-NEXT:    vpaddq %xmm0, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xd4,0xc0]
   12443 ; X64-NEXT:    retq # encoding: [0xc3]
   12444   %res = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 %x3)
   12445   %res1 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> zeroinitializer, i8 %x3)
   12446   %res2 = call <2 x i64> @llvm.x86.avx512.mask.prolv.q.128(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, i8 -1)
   12447   %res3 = add <2 x i64> %res, %res1
   12448   %res4 = add <2 x i64> %res3, %res2
   12449   ret <2 x i64> %res4
   12450 }
   12451 
   12452 declare <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64>, <4 x i64>, <4 x i64>, i8)
   12453 
   12454 define <4 x i64>@test_int_x86_avx512_mask_prolv_q_256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3) {
   12455 ; X86-LABEL: test_int_x86_avx512_mask_prolv_q_256:
   12456 ; X86:       # %bb.0:
   12457 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
   12458 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12459 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12460 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
   12461 ; X86-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
   12462 ; X86-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   12463 ; X86-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   12464 ; X86-NEXT:    retl # encoding: [0xc3]
   12465 ;
   12466 ; X64-LABEL: test_int_x86_avx512_mask_prolv_q_256:
   12467 ; X64:       # %bb.0:
   12468 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm3 # encoding: [0x62,0xf2,0xfd,0x28,0x15,0xd9]
   12469 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12470 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0x15,0xd1]
   12471 ; X64-NEXT:    vprolvq %ymm1, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xa9,0x15,0xc1]
   12472 ; X64-NEXT:    vpaddq %ymm3, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc3]
   12473 ; X64-NEXT:    vpaddq %ymm0, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xd4,0xc0]
   12474 ; X64-NEXT:    retq # encoding: [0xc3]
   12475   %res = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 %x3)
   12476   %res1 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> zeroinitializer, i8 %x3)
   12477   %res2 = call <4 x i64> @llvm.x86.avx512.mask.prolv.q.256(<4 x i64> %x0, <4 x i64> %x1, <4 x i64> %x2, i8 -1)
   12478   %res3 = add <4 x i64> %res, %res1
   12479   %res4 = add <4 x i64> %res3, %res2
   12480   ret <4 x i64> %res4
   12481 }
   12482 
   12483 declare <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32>, i32, <4 x i32>, i8)
   12484 
   12485 define <4 x i32>@test_int_x86_avx512_mask_pror_d_128(<4 x i32> %x0, i32 %x1, <4 x i32> %x2, i8 %x3) {
   12486 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_128:
   12487 ; X86:       # %bb.0:
   12488 ; X86-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
   12489 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12490 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12491 ; X86-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
   12492 ; X86-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
   12493 ; X86-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   12494 ; X86-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   12495 ; X86-NEXT:    retl # encoding: [0xc3]
   12496 ;
   12497 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_128:
   12498 ; X64:       # %bb.0:
   12499 ; X64-NEXT:    vprord $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0x6d,0x08,0x72,0xc0,0x03]
   12500 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12501 ; X64-NEXT:    vprord $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x09,0x72,0xc0,0x03]
   12502 ; X64-NEXT:    vprord $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0x89,0x72,0xc0,0x03]
   12503 ; X64-NEXT:    vpaddd %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xfe,0xc2]
   12504 ; X64-NEXT:    vpaddd %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xfe,0xc0]
   12505 ; X64-NEXT:    retq # encoding: [0xc3]
   12506   %res = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 %x3)
   12507   %res1 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> zeroinitializer, i8 %x3)
   12508   %res2 = call <4 x i32> @llvm.x86.avx512.mask.pror.d.128(<4 x i32> %x0, i32 3, <4 x i32> %x2, i8 -1)
   12509   %res3 = add <4 x i32> %res, %res1
   12510   %res4 = add <4 x i32> %res3, %res2
   12511   ret <4 x i32> %res4
   12512 }
   12513 
   12514 declare <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32>, i32, <8 x i32>, i8)
   12515 
   12516 define <8 x i32>@test_int_x86_avx512_mask_pror_d_256(<8 x i32> %x0, i32 %x1, <8 x i32> %x2, i8 %x3) {
   12517 ; X86-LABEL: test_int_x86_avx512_mask_pror_d_256:
   12518 ; X86:       # %bb.0:
   12519 ; X86-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
   12520 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12521 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12522 ; X86-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
   12523 ; X86-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
   12524 ; X86-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   12525 ; X86-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   12526 ; X86-NEXT:    retl # encoding: [0xc3]
   12527 ;
   12528 ; X64-LABEL: test_int_x86_avx512_mask_pror_d_256:
   12529 ; X64:       # %bb.0:
   12530 ; X64-NEXT:    vprord $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0x6d,0x28,0x72,0xc0,0x03]
   12531 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12532 ; X64-NEXT:    vprord $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0x75,0x29,0x72,0xc0,0x03]
   12533 ; X64-NEXT:    vprord $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xa9,0x72,0xc0,0x03]
   12534 ; X64-NEXT:    vpaddd %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xfe,0xc2]
   12535 ; X64-NEXT:    vpaddd %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xfe,0xc0]
   12536 ; X64-NEXT:    retq # encoding: [0xc3]
   12537   %res = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 %x3)
   12538   %res1 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> zeroinitializer, i8 %x3)
   12539   %res2 = call <8 x i32> @llvm.x86.avx512.mask.pror.d.256(<8 x i32> %x0, i32 3, <8 x i32> %x2, i8 -1)
   12540   %res3 = add <8 x i32> %res, %res1
   12541   %res4 = add <8 x i32> %res3, %res2
   12542   ret <8 x i32> %res4
   12543 }
   12544 
   12545 declare <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64>, i32, <2 x i64>, i8)
   12546 
   12547 define <2 x i64>@test_int_x86_avx512_mask_pror_q_128(<2 x i64> %x0, i32 %x1, <2 x i64> %x2, i8 %x3) {
   12548 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_128:
   12549 ; X86:       # %bb.0:
   12550 ; X86-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
   12551 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12552 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12553 ; X86-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
   12554 ; X86-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
   12555 ; X86-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   12556 ; X86-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   12557 ; X86-NEXT:    retl # encoding: [0xc3]
   12558 ;
   12559 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_128:
   12560 ; X64:       # %bb.0:
   12561 ; X64-NEXT:    vprorq $3, %xmm0, %xmm2 # encoding: [0x62,0xf1,0xed,0x08,0x72,0xc0,0x03]
   12562 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12563 ; X64-NEXT:    vprorq $3, %xmm0, %xmm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x09,0x72,0xc0,0x03]
   12564 ; X64-NEXT:    vprorq $3, %xmm0, %xmm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0x89,0x72,0xc0,0x03]
   12565 ; X64-NEXT:    vpaddq %xmm2, %xmm0, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0xd4,0xc2]
   12566 ; X64-NEXT:    vpaddq %xmm0, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf1,0xd4,0xc0]
   12567 ; X64-NEXT:    retq # encoding: [0xc3]
   12568   %res = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 %x3)
   12569   %res1 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> zeroinitializer, i8 %x3)
   12570   %res2 = call <2 x i64> @llvm.x86.avx512.mask.pror.q.128(<2 x i64> %x0, i32 3, <2 x i64> %x2, i8 -1)
   12571   %res3 = add <2 x i64> %res, %res1
   12572   %res4 = add <2 x i64> %res3, %res2
   12573   ret <2 x i64> %res4
   12574 }
   12575 
   12576 declare <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64>, i32, <4 x i64>, i8)
   12577 
   12578 define <4 x i64>@test_int_x86_avx512_mask_pror_q_256(<4 x i64> %x0, i32 %x1, <4 x i64> %x2, i8 %x3) {
   12579 ; X86-LABEL: test_int_x86_avx512_mask_pror_q_256:
   12580 ; X86:       # %bb.0:
   12581 ; X86-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
   12582 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x08]
   12583 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12584 ; X86-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
   12585 ; X86-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
   12586 ; X86-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   12587 ; X86-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   12588 ; X86-NEXT:    retl # encoding: [0xc3]
   12589 ;
   12590 ; X64-LABEL: test_int_x86_avx512_mask_pror_q_256:
   12591 ; X64:       # %bb.0:
   12592 ; X64-NEXT:    vprorq $3, %ymm0, %ymm2 # encoding: [0x62,0xf1,0xed,0x28,0x72,0xc0,0x03]
   12593 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   12594 ; X64-NEXT:    vprorq $3, %ymm0, %ymm1 {%k1} # encoding: [0x62,0xf1,0xf5,0x29,0x72,0xc0,0x03]
   12595 ; X64-NEXT:    vprorq $3, %ymm0, %ymm0 {%k1} {z} # encoding: [0x62,0xf1,0xfd,0xa9,0x72,0xc0,0x03]
   12596 ; X64-NEXT:    vpaddq %ymm2, %ymm0, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0xd4,0xc2]
   12597 ; X64-NEXT:    vpaddq %ymm0, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xf5,0xd4,0xc0]
   12598 ; X64-NEXT:    retq # encoding: [0xc3]
   12599   %res = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 %x3)
   12600   %res1 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> zeroinitializer, i8 %x3)
   12601   %res2 = call <4 x i64> @llvm.x86.avx512.mask.pror.q.256(<4 x i64> %x0, i32 3, <4 x i64> %x2, i8 -1)
   12602   %res3 = add <4 x i64> %res, %res1
   12603   %res4 = add <4 x i64> %res3, %res2
   12604   ret <4 x i64> %res4
   12605 }
   12606 
   12607 declare <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   12608 
   12609 define <8 x float> @test_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   12610 ; CHECK-LABEL: test_vfmadd256_ps:
   12611 ; CHECK:       # %bb.0:
   12612 ; CHECK-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa8,0xc2]
   12613 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12614 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   12615   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
   12616   ret <8 x float> %res
   12617 }
   12618 
   12619 define <8 x float> @test_mask_vfmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   12620 ; X86-LABEL: test_mask_vfmadd256_ps:
   12621 ; X86:       # %bb.0:
   12622 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12623 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12624 ; X86-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
   12625 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
   12626 ; X86-NEXT:    retl # encoding: [0xc3]
   12627 ;
   12628 ; X64-LABEL: test_mask_vfmadd256_ps:
   12629 ; X64:       # %bb.0:
   12630 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12631 ; X64-NEXT:    vfmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x98,0xc1]
   12632 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
   12633 ; X64-NEXT:    retq # encoding: [0xc3]
   12634   %res = call <8 x float> @llvm.x86.avx512.mask.vfmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   12635   ret <8 x float> %res
   12636 }
   12637 
   12638 declare <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   12639 
   12640 define <4 x float> @test_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   12641 ; CHECK-LABEL: test_vfmadd128_ps:
   12642 ; CHECK:       # %bb.0:
   12643 ; CHECK-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0xc2]
   12644 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12645 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   12646   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   12647   ret <4 x float> %res
   12648 }
   12649 
   12650 define <4 x float> @test_mask_vfmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   12651 ; X86-LABEL: test_mask_vfmadd128_ps:
   12652 ; X86:       # %bb.0:
   12653 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12654 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12655 ; X86-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
   12656 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
   12657 ; X86-NEXT:    retl # encoding: [0xc3]
   12658 ;
   12659 ; X64-LABEL: test_mask_vfmadd128_ps:
   12660 ; X64:       # %bb.0:
   12661 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12662 ; X64-NEXT:    vfmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x98,0xc1]
   12663 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
   12664 ; X64-NEXT:    retq # encoding: [0xc3]
   12665   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   12666   ret <4 x float> %res
   12667 }
   12668 
   12669 declare <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   12670 
   12671 define <4 x double> @test_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c) {
   12672 ; CHECK-LABEL: test_fmadd256_pd:
   12673 ; CHECK:       # %bb.0:
   12674 ; CHECK-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0xc2]
   12675 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12676 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   12677   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 -1)
   12678   ret <4 x double> %res
   12679 }
   12680 
   12681 define <4 x double> @test_mask_fmadd256_pd(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask) {
   12682 ; X86-LABEL: test_mask_fmadd256_pd:
   12683 ; X86:       # %bb.0:
   12684 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12685 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12686 ; X86-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
   12687 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
   12688 ; X86-NEXT:    retl # encoding: [0xc3]
   12689 ;
   12690 ; X64-LABEL: test_mask_fmadd256_pd:
   12691 ; X64:       # %bb.0:
   12692 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12693 ; X64-NEXT:    vfmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x98,0xc1]
   12694 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) + ymm2
   12695 ; X64-NEXT:    retq # encoding: [0xc3]
   12696   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c, i8 %mask)
   12697   ret <4 x double> %res
   12698 }
   12699 
   12700 declare <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   12701 
   12702 define <2 x double> @test_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
   12703 ; CHECK-LABEL: test_fmadd128_pd:
   12704 ; CHECK:       # %bb.0:
   12705 ; CHECK-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0xc2]
   12706 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12707 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   12708   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 -1)
   12709   ret <2 x double> %res
   12710 }
   12711 
   12712 define <2 x double> @test_mask_fmadd128_pd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
   12713 ; X86-LABEL: test_mask_fmadd128_pd:
   12714 ; X86:       # %bb.0:
   12715 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12716 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12717 ; X86-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
   12718 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
   12719 ; X86-NEXT:    retl # encoding: [0xc3]
   12720 ;
   12721 ; X64-LABEL: test_mask_fmadd128_pd:
   12722 ; X64:       # %bb.0:
   12723 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12724 ; X64-NEXT:    vfmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x98,0xc1]
   12725 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) + xmm2
   12726 ; X64-NEXT:    retq # encoding: [0xc3]
   12727   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask)
   12728   ret <2 x double> %res
   12729 }
   12730 
   12731 declare <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   12732 
   12733 define <2 x double>@test_int_x86_avx512_mask3_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   12734 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
   12735 ; X86:       # %bb.0:
   12736 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12737 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12738 ; X86-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
   12739 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
   12740 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   12741 ; X86-NEXT:    retl # encoding: [0xc3]
   12742 ;
   12743 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_128:
   12744 ; X64:       # %bb.0:
   12745 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12746 ; X64-NEXT:    vfmadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb8,0xd1]
   12747 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
   12748 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   12749 ; X64-NEXT:    retq # encoding: [0xc3]
   12750   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   12751   ret <2 x double> %res
   12752 }
   12753 
   12754 declare <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   12755 
   12756 define <2 x double>@test_int_x86_avx512_maskz_vfmadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   12757 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
   12758 ; X86:       # %bb.0:
   12759 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12760 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12761 ; X86-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
   12762 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12763 ; X86-NEXT:    retl # encoding: [0xc3]
   12764 ;
   12765 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_128:
   12766 ; X64:       # %bb.0:
   12767 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12768 ; X64-NEXT:    vfmadd213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa8,0xc2]
   12769 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12770 ; X64-NEXT:    retq # encoding: [0xc3]
   12771   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   12772   ret <2 x double> %res
   12773 }
   12774 
   12775 declare <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   12776 
   12777 define <4 x double>@test_int_x86_avx512_mask3_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   12778 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
   12779 ; X86:       # %bb.0:
   12780 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12781 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12782 ; X86-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
   12783 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
   12784 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   12785 ; X86-NEXT:    retl # encoding: [0xc3]
   12786 ;
   12787 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_pd_256:
   12788 ; X64:       # %bb.0:
   12789 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12790 ; X64-NEXT:    vfmadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb8,0xd1]
   12791 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
   12792 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   12793 ; X64-NEXT:    retq # encoding: [0xc3]
   12794   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   12795   ret <4 x double> %res
   12796 }
   12797 
   12798 declare <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   12799 
   12800 define <4 x double>@test_int_x86_avx512_maskz_vfmadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   12801 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
   12802 ; X86:       # %bb.0:
   12803 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12804 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12805 ; X86-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
   12806 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12807 ; X86-NEXT:    retl # encoding: [0xc3]
   12808 ;
   12809 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_pd_256:
   12810 ; X64:       # %bb.0:
   12811 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12812 ; X64-NEXT:    vfmadd213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa8,0xc2]
   12813 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12814 ; X64-NEXT:    retq # encoding: [0xc3]
   12815   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   12816   ret <4 x double> %res
   12817 }
   12818 
   12819 declare <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   12820 
   12821 define <4 x float>@test_int_x86_avx512_mask3_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   12822 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
   12823 ; X86:       # %bb.0:
   12824 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12825 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12826 ; X86-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
   12827 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
   12828 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   12829 ; X86-NEXT:    retl # encoding: [0xc3]
   12830 ;
   12831 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_128:
   12832 ; X64:       # %bb.0:
   12833 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12834 ; X64-NEXT:    vfmadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb8,0xd1]
   12835 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) + xmm2
   12836 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   12837 ; X64-NEXT:    retq # encoding: [0xc3]
   12838   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   12839   ret <4 x float> %res
   12840 }
   12841 
   12842 declare <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   12843 
   12844 define <4 x float>@test_int_x86_avx512_maskz_vfmadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   12845 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
   12846 ; X86:       # %bb.0:
   12847 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12848 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12849 ; X86-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
   12850 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12851 ; X86-NEXT:    retl # encoding: [0xc3]
   12852 ;
   12853 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_128:
   12854 ; X64:       # %bb.0:
   12855 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12856 ; X64-NEXT:    vfmadd213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa8,0xc2]
   12857 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + xmm2
   12858 ; X64-NEXT:    retq # encoding: [0xc3]
   12859   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   12860   ret <4 x float> %res
   12861 }
   12862 
   12863 declare <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   12864 
   12865 define <8 x float>@test_int_x86_avx512_mask3_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   12866 ; X86-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
   12867 ; X86:       # %bb.0:
   12868 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12869 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12870 ; X86-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
   12871 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
   12872 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   12873 ; X86-NEXT:    retl # encoding: [0xc3]
   12874 ;
   12875 ; X64-LABEL: test_int_x86_avx512_mask3_vfmadd_ps_256:
   12876 ; X64:       # %bb.0:
   12877 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12878 ; X64-NEXT:    vfmadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb8,0xd1]
   12879 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) + ymm2
   12880 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   12881 ; X64-NEXT:    retq # encoding: [0xc3]
   12882   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   12883   ret <8 x float> %res
   12884 }
   12885 
   12886 declare <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   12887 
   12888 define <8 x float>@test_int_x86_avx512_maskz_vfmadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   12889 ; X86-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
   12890 ; X86:       # %bb.0:
   12891 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12892 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12893 ; X86-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
   12894 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12895 ; X86-NEXT:    retl # encoding: [0xc3]
   12896 ;
   12897 ; X64-LABEL: test_int_x86_avx512_maskz_vfmadd_ps_256:
   12898 ; X64:       # %bb.0:
   12899 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12900 ; X64-NEXT:    vfmadd213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa8,0xc2]
   12901 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + ymm2
   12902 ; X64-NEXT:    retq # encoding: [0xc3]
   12903   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   12904   ret <8 x float> %res
   12905 }
   12906 
   12907 
   12908 declare <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   12909 
   12910 define <2 x double>@test_int_x86_avx512_mask3_vfmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   12911 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
   12912 ; X86:       # %bb.0:
   12913 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12914 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12915 ; X86-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
   12916 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
   12917 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   12918 ; X86-NEXT:    retl # encoding: [0xc3]
   12919 ;
   12920 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_128:
   12921 ; X64:       # %bb.0:
   12922 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12923 ; X64-NEXT:    vfmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xba,0xd1]
   12924 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
   12925 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   12926 ; X64-NEXT:    retq # encoding: [0xc3]
   12927   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   12928   ret <2 x double> %res
   12929 }
   12930 
   12931 
   12932 declare <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   12933 
   12934 define <4 x double>@test_int_x86_avx512_mask3_vfmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   12935 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
   12936 ; X86:       # %bb.0:
   12937 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12938 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12939 ; X86-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
   12940 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
   12941 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   12942 ; X86-NEXT:    retl # encoding: [0xc3]
   12943 ;
   12944 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_pd_256:
   12945 ; X64:       # %bb.0:
   12946 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12947 ; X64-NEXT:    vfmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xba,0xd1]
   12948 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
   12949 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   12950 ; X64-NEXT:    retq # encoding: [0xc3]
   12951   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   12952   ret <4 x double> %res
   12953 }
   12954 
   12955 declare <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   12956 
   12957 define <4 x float>@test_int_x86_avx512_mask3_vfmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   12958 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
   12959 ; X86:       # %bb.0:
   12960 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12961 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12962 ; X86-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
   12963 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
   12964 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   12965 ; X86-NEXT:    retl # encoding: [0xc3]
   12966 ;
   12967 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_128:
   12968 ; X64:       # %bb.0:
   12969 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12970 ; X64-NEXT:    vfmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xba,0xd1]
   12971 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) - xmm2
   12972 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   12973 ; X64-NEXT:    retq # encoding: [0xc3]
   12974   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   12975   ret <4 x float> %res
   12976 }
   12977 
   12978 declare <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   12979 
   12980 define <8 x float>@test_int_x86_avx512_mask3_vfmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   12981 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
   12982 ; X86:       # %bb.0:
   12983 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   12984 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   12985 ; X86-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
   12986 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
   12987 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   12988 ; X86-NEXT:    retl # encoding: [0xc3]
   12989 ;
   12990 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsub_ps_256:
   12991 ; X64:       # %bb.0:
   12992 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   12993 ; X64-NEXT:    vfmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xba,0xd1]
   12994 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) - ymm2
   12995 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   12996 ; X64-NEXT:    retq # encoding: [0xc3]
   12997   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   12998   ret <8 x float> %res
   12999 }
   13000 
   13001 declare <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   13002 
   13003 define <8 x float> @test_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   13004 ; CHECK-LABEL: test_vfnmadd256_ps:
   13005 ; CHECK:       # %bb.0:
   13006 ; CHECK-NEXT:    vfnmadd213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xac,0xc2]
   13007 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
   13008 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13009   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
   13010   ret <8 x float> %res
   13011 }
   13012 
   13013 define <8 x float> @test_mask_vfnmadd256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   13014 ; X86-LABEL: test_mask_vfnmadd256_ps:
   13015 ; X86:       # %bb.0:
   13016 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13017 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13018 ; X86-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
   13019 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
   13020 ; X86-NEXT:    retl # encoding: [0xc3]
   13021 ;
   13022 ; X64-LABEL: test_mask_vfnmadd256_ps:
   13023 ; X64:       # %bb.0:
   13024 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13025 ; X64-NEXT:    vfnmadd132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9c,0xc1]
   13026 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
   13027 ; X64-NEXT:    retq # encoding: [0xc3]
   13028   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmadd.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   13029   ret <8 x float> %res
   13030 }
   13031 
   13032 declare <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   13033 
   13034 define <4 x float> @test_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   13035 ; CHECK-LABEL: test_vfnmadd128_ps:
   13036 ; CHECK:       # %bb.0:
   13037 ; CHECK-NEXT:    vfnmadd213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xac,0xc2]
   13038 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
   13039 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13040   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   13041   ret <4 x float> %res
   13042 }
   13043 
   13044 define <4 x float> @test_mask_vfnmadd128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   13045 ; X86-LABEL: test_mask_vfnmadd128_ps:
   13046 ; X86:       # %bb.0:
   13047 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13048 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13049 ; X86-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
   13050 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
   13051 ; X86-NEXT:    retl # encoding: [0xc3]
   13052 ;
   13053 ; X64-LABEL: test_mask_vfnmadd128_ps:
   13054 ; X64:       # %bb.0:
   13055 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13056 ; X64-NEXT:    vfnmadd132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9c,0xc1]
   13057 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
   13058 ; X64-NEXT:    retq # encoding: [0xc3]
   13059   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   13060   ret <4 x float> %res
   13061 }
   13062 
   13063 declare <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   13064 
   13065 define <4 x double> @test_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   13066 ; CHECK-LABEL: test_vfnmadd256_pd:
   13067 ; CHECK:       # %bb.0:
   13068 ; CHECK-NEXT:    vfnmadd213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xac,0xc2]
   13069 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) + ymm2
   13070 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13071   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   13072   ret <4 x double> %res
   13073 }
   13074 
   13075 define <4 x double> @test_mask_vfnmadd256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   13076 ; X86-LABEL: test_mask_vfnmadd256_pd:
   13077 ; X86:       # %bb.0:
   13078 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13079 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13080 ; X86-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
   13081 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
   13082 ; X86-NEXT:    retl # encoding: [0xc3]
   13083 ;
   13084 ; X64-LABEL: test_mask_vfnmadd256_pd:
   13085 ; X64:       # %bb.0:
   13086 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13087 ; X64-NEXT:    vfnmadd132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9c,0xc1]
   13088 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) + ymm2
   13089 ; X64-NEXT:    retq # encoding: [0xc3]
   13090   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   13091   ret <4 x double> %res
   13092 }
   13093 
   13094 declare <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   13095 
   13096 define <2 x double> @test_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   13097 ; CHECK-LABEL: test_vfnmadd128_pd:
   13098 ; CHECK:       # %bb.0:
   13099 ; CHECK-NEXT:    vfnmadd213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xac,0xc2]
   13100 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) + xmm2
   13101 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13102   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   13103   ret <2 x double> %res
   13104 }
   13105 
   13106 define <2 x double> @test_mask_vfnmadd128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   13107 ; X86-LABEL: test_mask_vfnmadd128_pd:
   13108 ; X86:       # %bb.0:
   13109 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13110 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13111 ; X86-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
   13112 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
   13113 ; X86-NEXT:    retl # encoding: [0xc3]
   13114 ;
   13115 ; X64-LABEL: test_mask_vfnmadd128_pd:
   13116 ; X64:       # %bb.0:
   13117 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13118 ; X64-NEXT:    vfnmadd132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9c,0xc1]
   13119 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) + xmm2
   13120 ; X64-NEXT:    retq # encoding: [0xc3]
   13121   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   13122   ret <2 x double> %res
   13123 }
   13124 
   13125 declare <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   13126 
   13127 define <8 x float> @test_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2) {
   13128 ; CHECK-LABEL: test_vfnmsub256_ps:
   13129 ; CHECK:       # %bb.0:
   13130 ; CHECK-NEXT:    vfnmsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xae,0xc2]
   13131 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
   13132 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13133   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 -1) nounwind
   13134   ret <8 x float> %res
   13135 }
   13136 
   13137 define <8 x float> @test_mask_vfnmsub256_ps(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) {
   13138 ; X86-LABEL: test_mask_vfnmsub256_ps:
   13139 ; X86:       # %bb.0:
   13140 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13141 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13142 ; X86-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
   13143 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
   13144 ; X86-NEXT:    retl # encoding: [0xc3]
   13145 ;
   13146 ; X64-LABEL: test_mask_vfnmsub256_ps:
   13147 ; X64:       # %bb.0:
   13148 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13149 ; X64-NEXT:    vfnmsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x9e,0xc1]
   13150 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
   13151 ; X64-NEXT:    retq # encoding: [0xc3]
   13152   %res = call <8 x float> @llvm.x86.avx512.mask.vfnmsub.ps.256(<8 x float> %a0, <8 x float> %a1, <8 x float> %a2, i8 %mask) nounwind
   13153   ret <8 x float> %res
   13154 }
   13155 
   13156 declare <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   13157 
   13158 define <4 x float> @test_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2) {
   13159 ; CHECK-LABEL: test_vfnmsub128_ps:
   13160 ; CHECK:       # %bb.0:
   13161 ; CHECK-NEXT:    vfnmsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xae,0xc2]
   13162 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
   13163 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13164   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   13165   ret <4 x float> %res
   13166 }
   13167 
   13168 define <4 x float> @test_mask_vfnmsub128_ps(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) {
   13169 ; X86-LABEL: test_mask_vfnmsub128_ps:
   13170 ; X86:       # %bb.0:
   13171 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13172 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13173 ; X86-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
   13174 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
   13175 ; X86-NEXT:    retl # encoding: [0xc3]
   13176 ;
   13177 ; X64-LABEL: test_mask_vfnmsub128_ps:
   13178 ; X64:       # %bb.0:
   13179 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13180 ; X64-NEXT:    vfnmsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x9e,0xc1]
   13181 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
   13182 ; X64-NEXT:    retq # encoding: [0xc3]
   13183   %res = call <4 x float> @llvm.x86.avx512.mask.vfnmsub.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   13184   ret <4 x float> %res
   13185 }
   13186 
   13187 declare <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   13188 
   13189 define <4 x double> @test_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   13190 ; CHECK-LABEL: test_vfnmsub256_pd:
   13191 ; CHECK:       # %bb.0:
   13192 ; CHECK-NEXT:    vfnmsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xae,0xc2]
   13193 ; CHECK-NEXT:    # ymm0 = -(ymm1 * ymm0) - ymm2
   13194 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13195   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   13196   ret <4 x double> %res
   13197 }
   13198 
   13199 define <4 x double> @test_mask_vfnmsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   13200 ; X86-LABEL: test_mask_vfnmsub256_pd:
   13201 ; X86:       # %bb.0:
   13202 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13203 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13204 ; X86-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
   13205 ; X86-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
   13206 ; X86-NEXT:    retl # encoding: [0xc3]
   13207 ;
   13208 ; X64-LABEL: test_mask_vfnmsub256_pd:
   13209 ; X64:       # %bb.0:
   13210 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13211 ; X64-NEXT:    vfnmsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x9e,0xc1]
   13212 ; X64-NEXT:    # ymm0 = -(ymm0 * ymm1) - ymm2
   13213 ; X64-NEXT:    retq # encoding: [0xc3]
   13214   %res = call <4 x double> @llvm.x86.avx512.mask.vfnmsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   13215   ret <4 x double> %res
   13216 }
   13217 
   13218 declare <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   13219 
   13220 define <2 x double> @test_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   13221 ; CHECK-LABEL: test_vfnmsub128_pd:
   13222 ; CHECK:       # %bb.0:
   13223 ; CHECK-NEXT:    vfnmsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xae,0xc2]
   13224 ; CHECK-NEXT:    # xmm0 = -(xmm1 * xmm0) - xmm2
   13225 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13226   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   13227   ret <2 x double> %res
   13228 }
   13229 
   13230 define <2 x double> @test_mask_vfnmsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   13231 ; X86-LABEL: test_mask_vfnmsub128_pd:
   13232 ; X86:       # %bb.0:
   13233 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13234 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13235 ; X86-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
   13236 ; X86-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
   13237 ; X86-NEXT:    retl # encoding: [0xc3]
   13238 ;
   13239 ; X64-LABEL: test_mask_vfnmsub128_pd:
   13240 ; X64:       # %bb.0:
   13241 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13242 ; X64-NEXT:    vfnmsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x9e,0xc1]
   13243 ; X64-NEXT:    # xmm0 = -(xmm0 * xmm1) - xmm2
   13244 ; X64-NEXT:    retq # encoding: [0xc3]
   13245   %res = call <2 x double> @llvm.x86.avx512.mask.vfnmsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   13246   ret <2 x double> %res
   13247 }
   13248 
   13249 declare <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   13250 
   13251 define <2 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   13252 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
   13253 ; X86:       # %bb.0:
   13254 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13255 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13256 ; X86-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
   13257 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
   13258 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13259 ; X86-NEXT:    retl # encoding: [0xc3]
   13260 ;
   13261 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_128:
   13262 ; X64:       # %bb.0:
   13263 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13264 ; X64-NEXT:    vfnmsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xbe,0xd1]
   13265 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
   13266 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13267 ; X64-NEXT:    retq # encoding: [0xc3]
   13268   %res = call <2 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   13269   ret <2 x double> %res
   13270 }
   13271 
   13272 declare <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   13273 
   13274 define <4 x double>@test_int_x86_avx512_mask3_vfnmsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   13275 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
   13276 ; X86:       # %bb.0:
   13277 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13278 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13279 ; X86-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
   13280 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
   13281 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13282 ; X86-NEXT:    retl # encoding: [0xc3]
   13283 ;
   13284 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_pd_256:
   13285 ; X64:       # %bb.0:
   13286 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13287 ; X64-NEXT:    vfnmsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xbe,0xd1]
   13288 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
   13289 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13290 ; X64-NEXT:    retq # encoding: [0xc3]
   13291   %res = call <4 x double> @llvm.x86.avx512.mask3.vfnmsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   13292   ret <4 x double> %res
   13293 }
   13294 
   13295 declare <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   13296 
   13297 define <4 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   13298 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
   13299 ; X86:       # %bb.0:
   13300 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13301 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13302 ; X86-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
   13303 ; X86-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
   13304 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13305 ; X86-NEXT:    retl # encoding: [0xc3]
   13306 ;
   13307 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_128:
   13308 ; X64:       # %bb.0:
   13309 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13310 ; X64-NEXT:    vfnmsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xbe,0xd1]
   13311 ; X64-NEXT:    # xmm2 = -(xmm0 * xmm1) - xmm2
   13312 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13313 ; X64-NEXT:    retq # encoding: [0xc3]
   13314   %res = call <4 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   13315   ret <4 x float> %res
   13316 }
   13317 
   13318 declare <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   13319 
   13320 define <8 x float>@test_int_x86_avx512_mask3_vfnmsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   13321 ; X86-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
   13322 ; X86:       # %bb.0:
   13323 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13324 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13325 ; X86-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
   13326 ; X86-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
   13327 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13328 ; X86-NEXT:    retl # encoding: [0xc3]
   13329 ;
   13330 ; X64-LABEL: test_int_x86_avx512_mask3_vfnmsub_ps_256:
   13331 ; X64:       # %bb.0:
   13332 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13333 ; X64-NEXT:    vfnmsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xbe,0xd1]
   13334 ; X64-NEXT:    # ymm2 = -(ymm0 * ymm1) - ymm2
   13335 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13336 ; X64-NEXT:    retq # encoding: [0xc3]
   13337   %res = call <8 x float> @llvm.x86.avx512.mask3.vfnmsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   13338   ret <8 x float> %res
   13339 }
   13340 
   13341 declare <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8) nounwind readnone
   13342 
   13343 define <8 x float> @test_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
   13344 ; CHECK-LABEL: test_fmaddsub256_ps:
   13345 ; CHECK:       # %bb.0:
   13346 ; CHECK-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x75,0xa6,0xc2]
   13347 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13348 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13349   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 -1)
   13350   ret <8 x float> %res
   13351 }
   13352 
   13353 define <8 x float> @test_mask_fmaddsub256_ps(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask) {
   13354 ; X86-LABEL: test_mask_fmaddsub256_ps:
   13355 ; X86:       # %bb.0:
   13356 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13357 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13358 ; X86-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
   13359 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
   13360 ; X86-NEXT:    retl # encoding: [0xc3]
   13361 ;
   13362 ; X64-LABEL: test_mask_fmaddsub256_ps:
   13363 ; X64:       # %bb.0:
   13364 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13365 ; X64-NEXT:    vfmaddsub132ps %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x29,0x96,0xc1]
   13366 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
   13367 ; X64-NEXT:    retq # encoding: [0xc3]
   13368   %res = call <8 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c, i8 %mask)
   13369   ret <8 x float> %res
   13370 }
   13371 
   13372 declare <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8) nounwind readnone
   13373 
   13374 define <4 x float> @test_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
   13375 ; CHECK-LABEL: test_fmaddsub128_ps:
   13376 ; CHECK:       # %bb.0:
   13377 ; CHECK-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa6,0xc2]
   13378 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13379 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13380   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 -1)
   13381   ret <4 x float> %res
   13382 }
   13383 
   13384 define <4 x float> @test_mask_fmaddsub128_ps(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
   13385 ; X86-LABEL: test_mask_fmaddsub128_ps:
   13386 ; X86:       # %bb.0:
   13387 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13388 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13389 ; X86-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
   13390 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
   13391 ; X86-NEXT:    retl # encoding: [0xc3]
   13392 ;
   13393 ; X64-LABEL: test_mask_fmaddsub128_ps:
   13394 ; X64:       # %bb.0:
   13395 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13396 ; X64-NEXT:    vfmaddsub132ps %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0x6d,0x09,0x96,0xc1]
   13397 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
   13398 ; X64-NEXT:    retq # encoding: [0xc3]
   13399   %res = call <4 x float> @llvm.x86.avx512.mask.vfmaddsub.ps.128(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask)
   13400   ret <4 x float> %res
   13401 }
   13402 
   13403 declare <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8) nounwind readnone
   13404 
   13405 define <4 x double> @test_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2) {
   13406 ; CHECK-LABEL: test_vfmaddsub256_pd:
   13407 ; CHECK:       # %bb.0:
   13408 ; CHECK-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa6,0xc2]
   13409 ; CHECK-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13410 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13411   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   13412   ret <4 x double> %res
   13413 }
   13414 
   13415 define <4 x double> @test_mask_vfmaddsub256_pd(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) {
   13416 ; X86-LABEL: test_mask_vfmaddsub256_pd:
   13417 ; X86:       # %bb.0:
   13418 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13419 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13420 ; X86-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
   13421 ; X86-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
   13422 ; X86-NEXT:    retl # encoding: [0xc3]
   13423 ;
   13424 ; X64-LABEL: test_mask_vfmaddsub256_pd:
   13425 ; X64:       # %bb.0:
   13426 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13427 ; X64-NEXT:    vfmaddsub132pd %ymm1, %ymm2, %ymm0 {%k1} # encoding: [0x62,0xf2,0xed,0x29,0x96,0xc1]
   13428 ; X64-NEXT:    # ymm0 = (ymm0 * ymm1) +/- ymm2
   13429 ; X64-NEXT:    retq # encoding: [0xc3]
   13430   %res = call <4 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   13431   ret <4 x double> %res
   13432 }
   13433 
   13434 declare <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8) nounwind readnone
   13435 
   13436 define <2 x double> @test_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2) {
   13437 ; CHECK-LABEL: test_vfmaddsub128_pd:
   13438 ; CHECK:       # %bb.0:
   13439 ; CHECK-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa6,0xc2]
   13440 ; CHECK-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13441 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   13442   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   13443   ret <2 x double> %res
   13444 }
   13445 
   13446 define <2 x double> @test_mask_vfmaddsub128_pd(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) {
   13447 ; X86-LABEL: test_mask_vfmaddsub128_pd:
   13448 ; X86:       # %bb.0:
   13449 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13450 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13451 ; X86-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
   13452 ; X86-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
   13453 ; X86-NEXT:    retl # encoding: [0xc3]
   13454 ;
   13455 ; X64-LABEL: test_mask_vfmaddsub128_pd:
   13456 ; X64:       # %bb.0:
   13457 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13458 ; X64-NEXT:    vfmaddsub132pd %xmm1, %xmm2, %xmm0 {%k1} # encoding: [0x62,0xf2,0xed,0x09,0x96,0xc1]
   13459 ; X64-NEXT:    # xmm0 = (xmm0 * xmm1) +/- xmm2
   13460 ; X64-NEXT:    retq # encoding: [0xc3]
   13461   %res = call <2 x double> @llvm.x86.avx512.mask.vfmaddsub.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   13462   ret <2 x double> %res
   13463 }
   13464 
   13465 declare <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   13466 
   13467 define <2 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   13468 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
   13469 ; X86:       # %bb.0:
   13470 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13471 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13472 ; X86-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
   13473 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
   13474 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13475 ; X86-NEXT:    retl # encoding: [0xc3]
   13476 ;
   13477 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_128:
   13478 ; X64:       # %bb.0:
   13479 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13480 ; X64-NEXT:    vfmaddsub231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb6,0xd1]
   13481 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
   13482 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13483 ; X64-NEXT:    retq # encoding: [0xc3]
   13484   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   13485   ret <2 x double> %res
   13486 }
   13487 
   13488 declare <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   13489 
   13490 define <2 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   13491 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
   13492 ; X86:       # %bb.0:
   13493 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13494 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13495 ; X86-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
   13496 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13497 ; X86-NEXT:    retl # encoding: [0xc3]
   13498 ;
   13499 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_128:
   13500 ; X64:       # %bb.0:
   13501 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13502 ; X64-NEXT:    vfmaddsub213pd %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0x89,0xa6,0xc2]
   13503 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13504 ; X64-NEXT:    retq # encoding: [0xc3]
   13505   %res = call <2 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   13506   ret <2 x double> %res
   13507 }
   13508 
   13509 declare <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   13510 
   13511 define <4 x double>@test_int_x86_avx512_mask3_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   13512 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
   13513 ; X86:       # %bb.0:
   13514 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13515 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13516 ; X86-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
   13517 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
   13518 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13519 ; X86-NEXT:    retl # encoding: [0xc3]
   13520 ;
   13521 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_pd_256:
   13522 ; X64:       # %bb.0:
   13523 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13524 ; X64-NEXT:    vfmaddsub231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb6,0xd1]
   13525 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
   13526 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13527 ; X64-NEXT:    retq # encoding: [0xc3]
   13528   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   13529   ret <4 x double> %res
   13530 }
   13531 
   13532 declare <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   13533 
   13534 define <4 x double>@test_int_x86_avx512_maskz_vfmaddsub_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   13535 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
   13536 ; X86:       # %bb.0:
   13537 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13538 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13539 ; X86-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
   13540 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13541 ; X86-NEXT:    retl # encoding: [0xc3]
   13542 ;
   13543 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_pd_256:
   13544 ; X64:       # %bb.0:
   13545 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13546 ; X64-NEXT:    vfmaddsub213pd %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xa9,0xa6,0xc2]
   13547 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13548 ; X64-NEXT:    retq # encoding: [0xc3]
   13549   %res = call <4 x double> @llvm.x86.avx512.maskz.vfmaddsub.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   13550   ret <4 x double> %res
   13551 }
   13552 
   13553 declare <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   13554 
   13555 define <4 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   13556 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
   13557 ; X86:       # %bb.0:
   13558 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13559 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13560 ; X86-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
   13561 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
   13562 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13563 ; X86-NEXT:    retl # encoding: [0xc3]
   13564 ;
   13565 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_128:
   13566 ; X64:       # %bb.0:
   13567 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13568 ; X64-NEXT:    vfmaddsub231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb6,0xd1]
   13569 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) +/- xmm2
   13570 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13571 ; X64-NEXT:    retq # encoding: [0xc3]
   13572   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   13573   ret <4 x float> %res
   13574 }
   13575 
   13576 declare <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   13577 
   13578 define <4 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   13579 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
   13580 ; X86:       # %bb.0:
   13581 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13582 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13583 ; X86-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
   13584 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13585 ; X86-NEXT:    retl # encoding: [0xc3]
   13586 ;
   13587 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_128:
   13588 ; X64:       # %bb.0:
   13589 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13590 ; X64-NEXT:    vfmaddsub213ps %xmm2, %xmm1, %xmm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0x89,0xa6,0xc2]
   13591 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) +/- xmm2
   13592 ; X64-NEXT:    retq # encoding: [0xc3]
   13593   %res = call <4 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   13594   ret <4 x float> %res
   13595 }
   13596 
   13597 declare <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   13598 
   13599 define <8 x float>@test_int_x86_avx512_mask3_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   13600 ; X86-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
   13601 ; X86:       # %bb.0:
   13602 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13603 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13604 ; X86-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
   13605 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
   13606 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13607 ; X86-NEXT:    retl # encoding: [0xc3]
   13608 ;
   13609 ; X64-LABEL: test_int_x86_avx512_mask3_vfmaddsub_ps_256:
   13610 ; X64:       # %bb.0:
   13611 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13612 ; X64-NEXT:    vfmaddsub231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb6,0xd1]
   13613 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) +/- ymm2
   13614 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13615 ; X64-NEXT:    retq # encoding: [0xc3]
   13616   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   13617   ret <8 x float> %res
   13618 }
   13619 
   13620 declare <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   13621 
   13622 define <8 x float>@test_int_x86_avx512_maskz_vfmaddsub_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   13623 ; X86-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
   13624 ; X86:       # %bb.0:
   13625 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13626 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13627 ; X86-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
   13628 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13629 ; X86-NEXT:    retl # encoding: [0xc3]
   13630 ;
   13631 ; X64-LABEL: test_int_x86_avx512_maskz_vfmaddsub_ps_256:
   13632 ; X64:       # %bb.0:
   13633 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13634 ; X64-NEXT:    vfmaddsub213ps %ymm2, %ymm1, %ymm0 {%k1} {z} # encoding: [0x62,0xf2,0x75,0xa9,0xa6,0xc2]
   13635 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) +/- ymm2
   13636 ; X64-NEXT:    retq # encoding: [0xc3]
   13637   %res = call <8 x float> @llvm.x86.avx512.maskz.vfmaddsub.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   13638   ret <8 x float> %res
   13639 }
   13640 
   13641 declare <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double>, <2 x double>, <2 x double>, i8)
   13642 
   13643 define <2 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3) {
   13644 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
   13645 ; X86:       # %bb.0:
   13646 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13647 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13648 ; X86-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
   13649 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
   13650 ; X86-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13651 ; X86-NEXT:    retl # encoding: [0xc3]
   13652 ;
   13653 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_128:
   13654 ; X64:       # %bb.0:
   13655 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13656 ; X64-NEXT:    vfmsubadd231pd %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x09,0xb7,0xd1]
   13657 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
   13658 ; X64-NEXT:    vmovapd %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x28,0xc2]
   13659 ; X64-NEXT:    retq # encoding: [0xc3]
   13660   %res = call <2 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.128(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, i8 %x3)
   13661   ret <2 x double> %res
   13662 }
   13663 
   13664 declare <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>, i8)
   13665 
   13666 define <4 x double>@test_int_x86_avx512_mask3_vfmsubadd_pd_256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3) {
   13667 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
   13668 ; X86:       # %bb.0:
   13669 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13670 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13671 ; X86-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
   13672 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
   13673 ; X86-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13674 ; X86-NEXT:    retl # encoding: [0xc3]
   13675 ;
   13676 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_pd_256:
   13677 ; X64:       # %bb.0:
   13678 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13679 ; X64-NEXT:    vfmsubadd231pd %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x29,0xb7,0xd1]
   13680 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
   13681 ; X64-NEXT:    vmovapd %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x28,0xc2]
   13682 ; X64-NEXT:    retq # encoding: [0xc3]
   13683   %res = call <4 x double> @llvm.x86.avx512.mask3.vfmsubadd.pd.256(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, i8 %x3)
   13684   ret <4 x double> %res
   13685 }
   13686 
   13687 declare <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float>, <4 x float>, <4 x float>, i8)
   13688 
   13689 define <4 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3) {
   13690 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
   13691 ; X86:       # %bb.0:
   13692 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13693 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13694 ; X86-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
   13695 ; X86-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
   13696 ; X86-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13697 ; X86-NEXT:    retl # encoding: [0xc3]
   13698 ;
   13699 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_128:
   13700 ; X64:       # %bb.0:
   13701 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13702 ; X64-NEXT:    vfmsubadd231ps %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xb7,0xd1]
   13703 ; X64-NEXT:    # xmm2 = (xmm0 * xmm1) -/+ xmm2
   13704 ; X64-NEXT:    vmovaps %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf8,0x28,0xc2]
   13705 ; X64-NEXT:    retq # encoding: [0xc3]
   13706   %res = call <4 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.128(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, i8 %x3)
   13707   ret <4 x float> %res
   13708 }
   13709 
   13710 declare <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>, i8)
   13711 
   13712 define <8 x float>@test_int_x86_avx512_mask3_vfmsubadd_ps_256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3) {
   13713 ; X86-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
   13714 ; X86:       # %bb.0:
   13715 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %eax # encoding: [0x0f,0xb6,0x44,0x24,0x04]
   13716 ; X86-NEXT:    kmovw %eax, %k1 # encoding: [0xc5,0xf8,0x92,0xc8]
   13717 ; X86-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
   13718 ; X86-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
   13719 ; X86-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13720 ; X86-NEXT:    retl # encoding: [0xc3]
   13721 ;
   13722 ; X64-LABEL: test_int_x86_avx512_mask3_vfmsubadd_ps_256:
   13723 ; X64:       # %bb.0:
   13724 ; X64-NEXT:    kmovw %edi, %k1 # encoding: [0xc5,0xf8,0x92,0xcf]
   13725 ; X64-NEXT:    vfmsubadd231ps %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xb7,0xd1]
   13726 ; X64-NEXT:    # ymm2 = (ymm0 * ymm1) -/+ ymm2
   13727 ; X64-NEXT:    vmovaps %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfc,0x28,0xc2]
   13728 ; X64-NEXT:    retq # encoding: [0xc3]
   13729   %res = call <8 x float> @llvm.x86.avx512.mask3.vfmsubadd.ps.256(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, i8 %x3)
   13730   ret <8 x float> %res
   13731 }
   13732 
   13733 
   13734 define <4 x float> @test_mask_vfmadd128_ps_rmk(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   13735 ; X86-LABEL: test_mask_vfmadd128_ps_rmk:
   13736 ; X86:       # %bb.0:
   13737 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13738 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13739 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13740 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
   13741 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13742 ; X86-NEXT:    retl # encoding: [0xc3]
   13743 ;
   13744 ; X64-LABEL: test_mask_vfmadd128_ps_rmk:
   13745 ; X64:       # %bb.0:
   13746 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13747 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   13748 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13749 ; X64-NEXT:    retq # encoding: [0xc3]
   13750   %a2 = load <4 x float>, <4 x float>* %ptr_a2
   13751   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   13752   ret <4 x float> %res
   13753 }
   13754 
   13755 define <4 x float> @test_mask_vfmadd128_ps_rmka(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2, i8 %mask) {
   13756 ; X86-LABEL: test_mask_vfmadd128_ps_rmka:
   13757 ; X86:       # %bb.0:
   13758 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13759 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13760 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13761 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x00]
   13762 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13763 ; X86-NEXT:    retl # encoding: [0xc3]
   13764 ;
   13765 ; X64-LABEL: test_mask_vfmadd128_ps_rmka:
   13766 ; X64:       # %bb.0:
   13767 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13768 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x09,0xa8,0x07]
   13769 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13770 ; X64-NEXT:    retq # encoding: [0xc3]
   13771   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 8
   13772   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 %mask) nounwind
   13773   ret <4 x float> %res
   13774 }
   13775 
   13776 define <4 x float> @test_mask_vfmadd128_ps_rmkz(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   13777 ; X86-LABEL: test_mask_vfmadd128_ps_rmkz:
   13778 ; X86:       # %bb.0:
   13779 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13780 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
   13781 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13782 ; X86-NEXT:    retl # encoding: [0xc3]
   13783 ;
   13784 ; X64-LABEL: test_mask_vfmadd128_ps_rmkz:
   13785 ; X64:       # %bb.0:
   13786 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
   13787 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13788 ; X64-NEXT:    retq # encoding: [0xc3]
   13789   %a2 = load <4 x float>, <4 x float>* %ptr_a2
   13790   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   13791   ret <4 x float> %res
   13792 }
   13793 
   13794 define <4 x float> @test_mask_vfmadd128_ps_rmkza(<4 x float> %a0, <4 x float> %a1, <4 x float>* %ptr_a2) {
   13795 ; X86-LABEL: test_mask_vfmadd128_ps_rmkza:
   13796 ; X86:       # %bb.0:
   13797 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13798 ; X86-NEXT:    vfmadd213ps (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x00]
   13799 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13800 ; X86-NEXT:    retl # encoding: [0xc3]
   13801 ;
   13802 ; X64-LABEL: test_mask_vfmadd128_ps_rmkza:
   13803 ; X64:       # %bb.0:
   13804 ; X64-NEXT:    vfmadd213ps (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x71,0xa8,0x07]
   13805 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13806 ; X64-NEXT:    retq # encoding: [0xc3]
   13807   %a2 = load <4 x float>, <4 x float>* %ptr_a2, align 4
   13808   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %a2, i8 -1) nounwind
   13809   ret <4 x float> %res
   13810 }
   13811 
   13812 define <4 x float> @test_mask_vfmadd128_ps_rmb(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   13813 ; X86-LABEL: test_mask_vfmadd128_ps_rmb:
   13814 ; X86:       # %bb.0:
   13815 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13816 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13817 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13818 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
   13819 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13820 ; X86-NEXT:    retl # encoding: [0xc3]
   13821 ;
   13822 ; X64-LABEL: test_mask_vfmadd128_ps_rmb:
   13823 ; X64:       # %bb.0:
   13824 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13825 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
   13826 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13827 ; X64-NEXT:    retq # encoding: [0xc3]
   13828   %q = load float, float* %ptr_a2
   13829   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   13830   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   13831   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   13832   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   13833   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   13834   ret <4 x float> %res
   13835 }
   13836 
   13837 define <4 x float> @test_mask_vfmadd128_ps_rmba(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2, i8 %mask) {
   13838 ; X86-LABEL: test_mask_vfmadd128_ps_rmba:
   13839 ; X86:       # %bb.0:
   13840 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13841 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13842 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13843 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x00]
   13844 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13845 ; X86-NEXT:    retl # encoding: [0xc3]
   13846 ;
   13847 ; X64-LABEL: test_mask_vfmadd128_ps_rmba:
   13848 ; X64:       # %bb.0:
   13849 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13850 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0x75,0x19,0xa8,0x07]
   13851 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13852 ; X64-NEXT:    retq # encoding: [0xc3]
   13853   %q = load float, float* %ptr_a2, align 4
   13854   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   13855   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   13856   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   13857   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   13858   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 %mask) nounwind
   13859   ret <4 x float> %res
   13860 }
   13861 
   13862 define <4 x float> @test_mask_vfmadd128_ps_rmbz(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   13863 ; X86-LABEL: test_mask_vfmadd128_ps_rmbz:
   13864 ; X86:       # %bb.0:
   13865 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13866 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
   13867 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13868 ; X86-NEXT:    retl # encoding: [0xc3]
   13869 ;
   13870 ; X64-LABEL: test_mask_vfmadd128_ps_rmbz:
   13871 ; X64:       # %bb.0:
   13872 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
   13873 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13874 ; X64-NEXT:    retq # encoding: [0xc3]
   13875   %q = load float, float* %ptr_a2
   13876   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   13877   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   13878   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   13879   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   13880   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   13881   ret <4 x float> %res
   13882 }
   13883 
   13884 define <4 x float> @test_mask_vfmadd128_ps_rmbza(<4 x float> %a0, <4 x float> %a1, float* %ptr_a2) {
   13885 ; X86-LABEL: test_mask_vfmadd128_ps_rmbza:
   13886 ; X86:       # %bb.0:
   13887 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13888 ; X86-NEXT:    vfmadd213ps (%eax){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x00]
   13889 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13890 ; X86-NEXT:    retl # encoding: [0xc3]
   13891 ;
   13892 ; X64-LABEL: test_mask_vfmadd128_ps_rmbza:
   13893 ; X64:       # %bb.0:
   13894 ; X64-NEXT:    vfmadd213ps (%rdi){1to4}, %xmm1, %xmm0 # encoding: [0x62,0xf2,0x75,0x18,0xa8,0x07]
   13895 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13896 ; X64-NEXT:    retq # encoding: [0xc3]
   13897   %q = load float, float* %ptr_a2, align 4
   13898   %vecinit.i = insertelement <4 x float> undef, float %q, i32 0
   13899   %vecinit2.i = insertelement <4 x float> %vecinit.i, float %q, i32 1
   13900   %vecinit4.i = insertelement <4 x float> %vecinit2.i, float %q, i32 2
   13901   %vecinit6.i = insertelement <4 x float> %vecinit4.i, float %q, i32 3
   13902   %res = call <4 x float> @llvm.x86.avx512.mask.vfmadd.ps.128(<4 x float> %a0, <4 x float> %a1, <4 x float> %vecinit6.i, i8 -1) nounwind
   13903   ret <4 x float> %res
   13904 }
   13905 
   13906 define <2 x double> @test_mask_vfmadd128_pd_rmk(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2, i8 %mask) {
   13907 ; X86-LABEL: test_mask_vfmadd128_pd_rmk:
   13908 ; X86:       # %bb.0:
   13909 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13910 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13911 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13912 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x00]
   13913 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13914 ; X86-NEXT:    retl # encoding: [0xc3]
   13915 ;
   13916 ; X64-LABEL: test_mask_vfmadd128_pd_rmk:
   13917 ; X64:       # %bb.0:
   13918 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13919 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x09,0xa8,0x07]
   13920 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13921 ; X64-NEXT:    retq # encoding: [0xc3]
   13922   %a2 = load <2 x double>, <2 x double>* %ptr_a2
   13923   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 %mask) nounwind
   13924   ret <2 x double> %res
   13925 }
   13926 
   13927 define <2 x double> @test_mask_vfmadd128_pd_rmkz(<2 x double> %a0, <2 x double> %a1, <2 x double>* %ptr_a2) {
   13928 ; X86-LABEL: test_mask_vfmadd128_pd_rmkz:
   13929 ; X86:       # %bb.0:
   13930 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13931 ; X86-NEXT:    vfmadd213pd (%eax), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x00]
   13932 ; X86-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13933 ; X86-NEXT:    retl # encoding: [0xc3]
   13934 ;
   13935 ; X64-LABEL: test_mask_vfmadd128_pd_rmkz:
   13936 ; X64:       # %bb.0:
   13937 ; X64-NEXT:    vfmadd213pd (%rdi), %xmm1, %xmm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf1,0xa8,0x07]
   13938 ; X64-NEXT:    # xmm0 = (xmm1 * xmm0) + mem
   13939 ; X64-NEXT:    retq # encoding: [0xc3]
   13940   %a2 = load <2 x double>, <2 x double>* %ptr_a2
   13941   %res = call <2 x double> @llvm.x86.avx512.mask.vfmadd.pd.128(<2 x double> %a0, <2 x double> %a1, <2 x double> %a2, i8 -1) nounwind
   13942   ret <2 x double> %res
   13943 }
   13944 
   13945 define <4 x double> @test_mask_vfmadd256_pd_rmk(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2, i8 %mask) {
   13946 ; X86-LABEL: test_mask_vfmadd256_pd_rmk:
   13947 ; X86:       # %bb.0:
   13948 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13949 ; X86-NEXT:    movzbl {{[0-9]+}}(%esp), %ecx # encoding: [0x0f,0xb6,0x4c,0x24,0x08]
   13950 ; X86-NEXT:    kmovw %ecx, %k1 # encoding: [0xc5,0xf8,0x92,0xc9]
   13951 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x00]
   13952 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
   13953 ; X86-NEXT:    retl # encoding: [0xc3]
   13954 ;
   13955 ; X64-LABEL: test_mask_vfmadd256_pd_rmk:
   13956 ; X64:       # %bb.0:
   13957 ; X64-NEXT:    kmovw %esi, %k1 # encoding: [0xc5,0xf8,0x92,0xce]
   13958 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 {%k1} # encoding: [0x62,0xf2,0xf5,0x29,0xa8,0x07]
   13959 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
   13960 ; X64-NEXT:    retq # encoding: [0xc3]
   13961   %a2 = load <4 x double>, <4 x double>* %ptr_a2
   13962   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 %mask) nounwind
   13963   ret <4 x double> %res
   13964 }
   13965 
   13966 define <4 x double> @test_mask_vfmadd256_pd_rmkz(<4 x double> %a0, <4 x double> %a1, <4 x double>* %ptr_a2) {
   13967 ; X86-LABEL: test_mask_vfmadd256_pd_rmkz:
   13968 ; X86:       # %bb.0:
   13969 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   13970 ; X86-NEXT:    vfmadd213pd (%eax), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x00]
   13971 ; X86-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
   13972 ; X86-NEXT:    retl # encoding: [0xc3]
   13973 ;
   13974 ; X64-LABEL: test_mask_vfmadd256_pd_rmkz:
   13975 ; X64:       # %bb.0:
   13976 ; X64-NEXT:    vfmadd213pd (%rdi), %ymm1, %ymm0 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0xf5,0xa8,0x07]
   13977 ; X64-NEXT:    # ymm0 = (ymm1 * ymm0) + mem
   13978 ; X64-NEXT:    retq # encoding: [0xc3]
   13979   %a2 = load <4 x double>, <4 x double>* %ptr_a2
   13980   %res = call <4 x double> @llvm.x86.avx512.mask.vfmadd.pd.256(<4 x double> %a0, <4 x double> %a1, <4 x double> %a2, i8 -1) nounwind
   13981   ret <4 x double> %res
   13982 }
   13983