Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512bw --show-mc-encoding | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 define <32 x i16> @test_mask_packs_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
      6 ; CHECK-LABEL: test_mask_packs_epi32_rr_512:
      7 ; CHECK:       # %bb.0:
      8 ; CHECK-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0xc1]
      9 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
     10   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
     11   ret <32 x i16> %1
     12 }
     13 
     14 define <32 x i16> @test_mask_packs_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
     15 ; X86-LABEL: test_mask_packs_epi32_rrk_512:
     16 ; X86:       # %bb.0:
     17 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
     18 ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
     19 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
     20 ; X86-NEXT:    retl # encoding: [0xc3]
     21 ;
     22 ; X64-LABEL: test_mask_packs_epi32_rrk_512:
     23 ; X64:       # %bb.0:
     24 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     25 ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0xd1]
     26 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
     27 ; X64-NEXT:    retq # encoding: [0xc3]
     28   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
     29   %2 = bitcast i32 %mask to <32 x i1>
     30   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
     31   ret <32 x i16> %3
     32 }
     33 
     34 define <32 x i16> @test_mask_packs_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
     35 ; X86-LABEL: test_mask_packs_epi32_rrkz_512:
     36 ; X86:       # %bb.0:
     37 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
     38 ; X86-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
     39 ; X86-NEXT:    retl # encoding: [0xc3]
     40 ;
     41 ; X64-LABEL: test_mask_packs_epi32_rrkz_512:
     42 ; X64:       # %bb.0:
     43 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
     44 ; X64-NEXT:    vpackssdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0xc1]
     45 ; X64-NEXT:    retq # encoding: [0xc3]
     46   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
     47   %2 = bitcast i32 %mask to <32 x i1>
     48   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
     49   ret <32 x i16> %3
     50 }
     51 
     52 define <32 x i16> @test_mask_packs_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
     53 ; X86-LABEL: test_mask_packs_epi32_rm_512:
     54 ; X86:       # %bb.0:
     55 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     56 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x00]
     57 ; X86-NEXT:    retl # encoding: [0xc3]
     58 ;
     59 ; X64-LABEL: test_mask_packs_epi32_rm_512:
     60 ; X64:       # %bb.0:
     61 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x6b,0x07]
     62 ; X64-NEXT:    retq # encoding: [0xc3]
     63   %b = load <16 x i32>, <16 x i32>* %ptr_b
     64   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
     65   ret <32 x i16> %1
     66 }
     67 
     68 define <32 x i16> @test_mask_packs_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
     69 ; X86-LABEL: test_mask_packs_epi32_rmk_512:
     70 ; X86:       # %bb.0:
     71 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     72 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     73 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x08]
     74 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     75 ; X86-NEXT:    retl # encoding: [0xc3]
     76 ;
     77 ; X64-LABEL: test_mask_packs_epi32_rmk_512:
     78 ; X64:       # %bb.0:
     79 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
     80 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x6b,0x0f]
     81 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
     82 ; X64-NEXT:    retq # encoding: [0xc3]
     83   %b = load <16 x i32>, <16 x i32>* %ptr_b
     84   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
     85   %2 = bitcast i32 %mask to <32 x i1>
     86   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
     87   ret <32 x i16> %3
     88 }
     89 
     90 define <32 x i16> @test_mask_packs_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
     91 ; X86-LABEL: test_mask_packs_epi32_rmkz_512:
     92 ; X86:       # %bb.0:
     93 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
     94 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
     95 ; X86-NEXT:    vpackssdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x00]
     96 ; X86-NEXT:    retl # encoding: [0xc3]
     97 ;
     98 ; X64-LABEL: test_mask_packs_epi32_rmkz_512:
     99 ; X64:       # %bb.0:
    100 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    101 ; X64-NEXT:    vpackssdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x6b,0x07]
    102 ; X64-NEXT:    retq # encoding: [0xc3]
    103   %b = load <16 x i32>, <16 x i32>* %ptr_b
    104   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
    105   %2 = bitcast i32 %mask to <32 x i1>
    106   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
    107   ret <32 x i16> %3
    108 }
    109 
    110 define <32 x i16> @test_mask_packs_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
    111 ; X86-LABEL: test_mask_packs_epi32_rmb_512:
    112 ; X86:       # %bb.0:
    113 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    114 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x00]
    115 ; X86-NEXT:    retl # encoding: [0xc3]
    116 ;
    117 ; X64-LABEL: test_mask_packs_epi32_rmb_512:
    118 ; X64:       # %bb.0:
    119 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x58,0x6b,0x07]
    120 ; X64-NEXT:    retq # encoding: [0xc3]
    121   %q = load i32, i32* %ptr_b
    122   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    123   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    124   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
    125   ret <32 x i16> %1
    126 }
    127 
    128 define <32 x i16> @test_mask_packs_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    129 ; X86-LABEL: test_mask_packs_epi32_rmbk_512:
    130 ; X86:       # %bb.0:
    131 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    132 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    133 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x08]
    134 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    135 ; X86-NEXT:    retl # encoding: [0xc3]
    136 ;
    137 ; X64-LABEL: test_mask_packs_epi32_rmbk_512:
    138 ; X64:       # %bb.0:
    139 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    140 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x59,0x6b,0x0f]
    141 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    142 ; X64-NEXT:    retq # encoding: [0xc3]
    143   %q = load i32, i32* %ptr_b
    144   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    145   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    146   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
    147   %2 = bitcast i32 %mask to <32 x i1>
    148   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
    149   ret <32 x i16> %3
    150 }
    151 
    152 define <32 x i16> @test_mask_packs_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
    153 ; X86-LABEL: test_mask_packs_epi32_rmbkz_512:
    154 ; X86:       # %bb.0:
    155 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    156 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    157 ; X86-NEXT:    vpackssdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x00]
    158 ; X86-NEXT:    retl # encoding: [0xc3]
    159 ;
    160 ; X64-LABEL: test_mask_packs_epi32_rmbkz_512:
    161 ; X64:       # %bb.0:
    162 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    163 ; X64-NEXT:    vpackssdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xd9,0x6b,0x07]
    164 ; X64-NEXT:    retq # encoding: [0xc3]
    165   %q = load i32, i32* %ptr_b
    166   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    167   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    168   %1 = call <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32> %a, <16 x i32> %b)
    169   %2 = bitcast i32 %mask to <32 x i1>
    170   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
    171   ret <32 x i16> %3
    172 }
    173 
    174 declare <32 x i16> @llvm.x86.avx512.packssdw.512(<16 x i32>, <16 x i32>)
    175 
    176 define <64 x i8> @test_mask_packs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    177 ; CHECK-LABEL: test_mask_packs_epi16_rr_512:
    178 ; CHECK:       # %bb.0:
    179 ; CHECK-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0xc1]
    180 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    181   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    182   ret <64 x i8> %1
    183 }
    184 
    185 define <64 x i8> @test_mask_packs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
    186 ; X86-LABEL: test_mask_packs_epi16_rrk_512:
    187 ; X86:       # %bb.0:
    188 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    189 ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
    190 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    191 ; X86-NEXT:    retl # encoding: [0xc3]
    192 ;
    193 ; X64-LABEL: test_mask_packs_epi16_rrk_512:
    194 ; X64:       # %bb.0:
    195 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    196 ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0xd1]
    197 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    198 ; X64-NEXT:    retq # encoding: [0xc3]
    199   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    200   %2 = bitcast i64 %mask to <64 x i1>
    201   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
    202   ret <64 x i8> %3
    203 }
    204 
    205 define <64 x i8> @test_mask_packs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
    206 ; X86-LABEL: test_mask_packs_epi16_rrkz_512:
    207 ; X86:       # %bb.0:
    208 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    209 ; X86-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
    210 ; X86-NEXT:    retl # encoding: [0xc3]
    211 ;
    212 ; X64-LABEL: test_mask_packs_epi16_rrkz_512:
    213 ; X64:       # %bb.0:
    214 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    215 ; X64-NEXT:    vpacksswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0xc1]
    216 ; X64-NEXT:    retq # encoding: [0xc3]
    217   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    218   %2 = bitcast i64 %mask to <64 x i1>
    219   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
    220   ret <64 x i8> %3
    221 }
    222 
    223 define <64 x i8> @test_mask_packs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    224 ; X86-LABEL: test_mask_packs_epi16_rm_512:
    225 ; X86:       # %bb.0:
    226 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    227 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x00]
    228 ; X86-NEXT:    retl # encoding: [0xc3]
    229 ;
    230 ; X64-LABEL: test_mask_packs_epi16_rm_512:
    231 ; X64:       # %bb.0:
    232 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x63,0x07]
    233 ; X64-NEXT:    retq # encoding: [0xc3]
    234   %b = load <32 x i16>, <32 x i16>* %ptr_b
    235   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    236   ret <64 x i8> %1
    237 }
    238 
    239 define <64 x i8> @test_mask_packs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
    240 ; X86-LABEL: test_mask_packs_epi16_rmk_512:
    241 ; X86:       # %bb.0:
    242 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    243 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
    244 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x08]
    245 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    246 ; X86-NEXT:    retl # encoding: [0xc3]
    247 ;
    248 ; X64-LABEL: test_mask_packs_epi16_rmk_512:
    249 ; X64:       # %bb.0:
    250 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
    251 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x63,0x0f]
    252 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    253 ; X64-NEXT:    retq # encoding: [0xc3]
    254   %b = load <32 x i16>, <32 x i16>* %ptr_b
    255   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    256   %2 = bitcast i64 %mask to <64 x i1>
    257   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
    258   ret <64 x i8> %3
    259 }
    260 
    261 define <64 x i8> @test_mask_packs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
    262 ; X86-LABEL: test_mask_packs_epi16_rmkz_512:
    263 ; X86:       # %bb.0:
    264 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    265 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
    266 ; X86-NEXT:    vpacksswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x00]
    267 ; X86-NEXT:    retl # encoding: [0xc3]
    268 ;
    269 ; X64-LABEL: test_mask_packs_epi16_rmkz_512:
    270 ; X64:       # %bb.0:
    271 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
    272 ; X64-NEXT:    vpacksswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x63,0x07]
    273 ; X64-NEXT:    retq # encoding: [0xc3]
    274   %b = load <32 x i16>, <32 x i16>* %ptr_b
    275   %1 = call <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16> %a, <32 x i16> %b)
    276   %2 = bitcast i64 %mask to <64 x i1>
    277   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
    278   ret <64 x i8> %3
    279 }
    280 
    281 declare <64 x i8> @llvm.x86.avx512.packsswb.512(<32 x i16>, <32 x i16>)
    282 
    283 
    284 define <32 x i16> @test_mask_packus_epi32_rr_512(<16 x i32> %a, <16 x i32> %b) {
    285 ; CHECK-LABEL: test_mask_packus_epi32_rr_512:
    286 ; CHECK:       # %bb.0:
    287 ; CHECK-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0xc1]
    288 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    289   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    290   ret <32 x i16> %1
    291 }
    292 
    293 define <32 x i16> @test_mask_packus_epi32_rrk_512(<16 x i32> %a, <16 x i32> %b, <32 x i16> %passThru, i32 %mask) {
    294 ; X86-LABEL: test_mask_packus_epi32_rrk_512:
    295 ; X86:       # %bb.0:
    296 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    297 ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
    298 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    299 ; X86-NEXT:    retl # encoding: [0xc3]
    300 ;
    301 ; X64-LABEL: test_mask_packus_epi32_rrk_512:
    302 ; X64:       # %bb.0:
    303 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    304 ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0xd1]
    305 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    306 ; X64-NEXT:    retq # encoding: [0xc3]
    307   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    308   %2 = bitcast i32 %mask to <32 x i1>
    309   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
    310   ret <32 x i16> %3
    311 }
    312 
    313 define <32 x i16> @test_mask_packus_epi32_rrkz_512(<16 x i32> %a, <16 x i32> %b, i32 %mask) {
    314 ; X86-LABEL: test_mask_packus_epi32_rrkz_512:
    315 ; X86:       # %bb.0:
    316 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    317 ; X86-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
    318 ; X86-NEXT:    retl # encoding: [0xc3]
    319 ;
    320 ; X64-LABEL: test_mask_packus_epi32_rrkz_512:
    321 ; X64:       # %bb.0:
    322 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    323 ; X64-NEXT:    vpackusdw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0xc1]
    324 ; X64-NEXT:    retq # encoding: [0xc3]
    325   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    326   %2 = bitcast i32 %mask to <32 x i1>
    327   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
    328   ret <32 x i16> %3
    329 }
    330 
    331 define <32 x i16> @test_mask_packus_epi32_rm_512(<16 x i32> %a, <16 x i32>* %ptr_b) {
    332 ; X86-LABEL: test_mask_packus_epi32_rm_512:
    333 ; X86:       # %bb.0:
    334 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    335 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x00]
    336 ; X86-NEXT:    retl # encoding: [0xc3]
    337 ;
    338 ; X64-LABEL: test_mask_packus_epi32_rm_512:
    339 ; X64:       # %bb.0:
    340 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x2b,0x07]
    341 ; X64-NEXT:    retq # encoding: [0xc3]
    342   %b = load <16 x i32>, <16 x i32>* %ptr_b
    343   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    344   ret <32 x i16> %1
    345 }
    346 
    347 define <32 x i16> @test_mask_packus_epi32_rmk_512(<16 x i32> %a, <16 x i32>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    348 ; X86-LABEL: test_mask_packus_epi32_rmk_512:
    349 ; X86:       # %bb.0:
    350 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    351 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    352 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x08]
    353 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    354 ; X86-NEXT:    retl # encoding: [0xc3]
    355 ;
    356 ; X64-LABEL: test_mask_packus_epi32_rmk_512:
    357 ; X64:       # %bb.0:
    358 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    359 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x2b,0x0f]
    360 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    361 ; X64-NEXT:    retq # encoding: [0xc3]
    362   %b = load <16 x i32>, <16 x i32>* %ptr_b
    363   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    364   %2 = bitcast i32 %mask to <32 x i1>
    365   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
    366   ret <32 x i16> %3
    367 }
    368 
    369 define <32 x i16> @test_mask_packus_epi32_rmkz_512(<16 x i32> %a, <16 x i32>* %ptr_b, i32 %mask) {
    370 ; X86-LABEL: test_mask_packus_epi32_rmkz_512:
    371 ; X86:       # %bb.0:
    372 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    373 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    374 ; X86-NEXT:    vpackusdw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x00]
    375 ; X86-NEXT:    retl # encoding: [0xc3]
    376 ;
    377 ; X64-LABEL: test_mask_packus_epi32_rmkz_512:
    378 ; X64:       # %bb.0:
    379 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    380 ; X64-NEXT:    vpackusdw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x2b,0x07]
    381 ; X64-NEXT:    retq # encoding: [0xc3]
    382   %b = load <16 x i32>, <16 x i32>* %ptr_b
    383   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    384   %2 = bitcast i32 %mask to <32 x i1>
    385   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
    386   ret <32 x i16> %3
    387 }
    388 
    389 define <32 x i16> @test_mask_packus_epi32_rmb_512(<16 x i32> %a, i32* %ptr_b) {
    390 ; X86-LABEL: test_mask_packus_epi32_rmb_512:
    391 ; X86:       # %bb.0:
    392 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    393 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x00]
    394 ; X86-NEXT:    retl # encoding: [0xc3]
    395 ;
    396 ; X64-LABEL: test_mask_packus_epi32_rmb_512:
    397 ; X64:       # %bb.0:
    398 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x58,0x2b,0x07]
    399 ; X64-NEXT:    retq # encoding: [0xc3]
    400   %q = load i32, i32* %ptr_b
    401   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    402   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    403   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    404   ret <32 x i16> %1
    405 }
    406 
    407 define <32 x i16> @test_mask_packus_epi32_rmbk_512(<16 x i32> %a, i32* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    408 ; X86-LABEL: test_mask_packus_epi32_rmbk_512:
    409 ; X86:       # %bb.0:
    410 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    411 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    412 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x08]
    413 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    414 ; X86-NEXT:    retl # encoding: [0xc3]
    415 ;
    416 ; X64-LABEL: test_mask_packus_epi32_rmbk_512:
    417 ; X64:       # %bb.0:
    418 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    419 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0x7d,0x59,0x2b,0x0f]
    420 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    421 ; X64-NEXT:    retq # encoding: [0xc3]
    422   %q = load i32, i32* %ptr_b
    423   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    424   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    425   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    426   %2 = bitcast i32 %mask to <32 x i1>
    427   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passThru
    428   ret <32 x i16> %3
    429 }
    430 
    431 define <32 x i16> @test_mask_packus_epi32_rmbkz_512(<16 x i32> %a, i32* %ptr_b, i32 %mask) {
    432 ; X86-LABEL: test_mask_packus_epi32_rmbkz_512:
    433 ; X86:       # %bb.0:
    434 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    435 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    436 ; X86-NEXT:    vpackusdw (%eax){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x00]
    437 ; X86-NEXT:    retl # encoding: [0xc3]
    438 ;
    439 ; X64-LABEL: test_mask_packus_epi32_rmbkz_512:
    440 ; X64:       # %bb.0:
    441 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    442 ; X64-NEXT:    vpackusdw (%rdi){1to16}, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xd9,0x2b,0x07]
    443 ; X64-NEXT:    retq # encoding: [0xc3]
    444   %q = load i32, i32* %ptr_b
    445   %vecinit.i = insertelement <16 x i32> undef, i32 %q, i32 0
    446   %b = shufflevector <16 x i32> %vecinit.i, <16 x i32> undef, <16 x i32> zeroinitializer
    447   %1 = call <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32> %a, <16 x i32> %b)
    448   %2 = bitcast i32 %mask to <32 x i1>
    449   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
    450   ret <32 x i16> %3
    451 }
    452 
    453 declare <32 x i16> @llvm.x86.avx512.packusdw.512(<16 x i32>, <16 x i32>)
    454 
    455 define <64 x i8> @test_mask_packus_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    456 ; CHECK-LABEL: test_mask_packus_epi16_rr_512:
    457 ; CHECK:       # %bb.0:
    458 ; CHECK-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0xc1]
    459 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    460   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    461   ret <64 x i8> %1
    462 }
    463 
    464 define <64 x i8> @test_mask_packus_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <64 x i8> %passThru, i64 %mask) {
    465 ; X86-LABEL: test_mask_packus_epi16_rrk_512:
    466 ; X86:       # %bb.0:
    467 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    468 ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
    469 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    470 ; X86-NEXT:    retl # encoding: [0xc3]
    471 ;
    472 ; X64-LABEL: test_mask_packus_epi16_rrk_512:
    473 ; X64:       # %bb.0:
    474 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    475 ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0xd1]
    476 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    477 ; X64-NEXT:    retq # encoding: [0xc3]
    478   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    479   %2 = bitcast i64 %mask to <64 x i1>
    480   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
    481   ret <64 x i8> %3
    482 }
    483 
    484 define <64 x i8> @test_mask_packus_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i64 %mask) {
    485 ; X86-LABEL: test_mask_packus_epi16_rrkz_512:
    486 ; X86:       # %bb.0:
    487 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
    488 ; X86-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
    489 ; X86-NEXT:    retl # encoding: [0xc3]
    490 ;
    491 ; X64-LABEL: test_mask_packus_epi16_rrkz_512:
    492 ; X64:       # %bb.0:
    493 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
    494 ; X64-NEXT:    vpackuswb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0xc1]
    495 ; X64-NEXT:    retq # encoding: [0xc3]
    496   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    497   %2 = bitcast i64 %mask to <64 x i1>
    498   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
    499   ret <64 x i8> %3
    500 }
    501 
    502 define <64 x i8> @test_mask_packus_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    503 ; X86-LABEL: test_mask_packus_epi16_rm_512:
    504 ; X86:       # %bb.0:
    505 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    506 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x00]
    507 ; X86-NEXT:    retl # encoding: [0xc3]
    508 ;
    509 ; X64-LABEL: test_mask_packus_epi16_rm_512:
    510 ; X64:       # %bb.0:
    511 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x67,0x07]
    512 ; X64-NEXT:    retq # encoding: [0xc3]
    513   %b = load <32 x i16>, <32 x i16>* %ptr_b
    514   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    515   ret <64 x i8> %1
    516 }
    517 
    518 define <64 x i8> @test_mask_packus_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <64 x i8> %passThru, i64 %mask) {
    519 ; X86-LABEL: test_mask_packus_epi16_rmk_512:
    520 ; X86:       # %bb.0:
    521 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    522 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
    523 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x08]
    524 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    525 ; X86-NEXT:    retl # encoding: [0xc3]
    526 ;
    527 ; X64-LABEL: test_mask_packus_epi16_rmk_512:
    528 ; X64:       # %bb.0:
    529 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
    530 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0x67,0x0f]
    531 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    532 ; X64-NEXT:    retq # encoding: [0xc3]
    533   %b = load <32 x i16>, <32 x i16>* %ptr_b
    534   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    535   %2 = bitcast i64 %mask to <64 x i1>
    536   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passThru
    537   ret <64 x i8> %3
    538 }
    539 
    540 define <64 x i8> @test_mask_packus_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i64 %mask) {
    541 ; X86-LABEL: test_mask_packus_epi16_rmkz_512:
    542 ; X86:       # %bb.0:
    543 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    544 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x08]
    545 ; X86-NEXT:    vpackuswb (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x00]
    546 ; X86-NEXT:    retl # encoding: [0xc3]
    547 ;
    548 ; X64-LABEL: test_mask_packus_epi16_rmkz_512:
    549 ; X64:       # %bb.0:
    550 ; X64-NEXT:    kmovq %rsi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xce]
    551 ; X64-NEXT:    vpackuswb (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x67,0x07]
    552 ; X64-NEXT:    retq # encoding: [0xc3]
    553   %b = load <32 x i16>, <32 x i16>* %ptr_b
    554   %1 = call <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16> %a, <32 x i16> %b)
    555   %2 = bitcast i64 %mask to <64 x i1>
    556   %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> zeroinitializer
    557   ret <64 x i8> %3
    558 }
    559 
    560 declare <64 x i8> @llvm.x86.avx512.packuswb.512(<32 x i16>, <32 x i16>)
    561 
    562 define <32 x i16> @test_mask_adds_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    563 ; CHECK-LABEL: test_mask_adds_epi16_rr_512:
    564 ; CHECK:       # %bb.0:
    565 ; CHECK-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0xc1]
    566 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    567   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    568   ret <32 x i16> %res
    569 }
    570 
    571 define <32 x i16> @test_mask_adds_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
    572 ; X86-LABEL: test_mask_adds_epi16_rrk_512:
    573 ; X86:       # %bb.0:
    574 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    575 ; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
    576 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    577 ; X86-NEXT:    retl # encoding: [0xc3]
    578 ;
    579 ; X64-LABEL: test_mask_adds_epi16_rrk_512:
    580 ; X64:       # %bb.0:
    581 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    582 ; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0xd1]
    583 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    584 ; X64-NEXT:    retq # encoding: [0xc3]
    585   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    586   ret <32 x i16> %res
    587 }
    588 
    589 define <32 x i16> @test_mask_adds_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    590 ; X86-LABEL: test_mask_adds_epi16_rrkz_512:
    591 ; X86:       # %bb.0:
    592 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    593 ; X86-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
    594 ; X86-NEXT:    retl # encoding: [0xc3]
    595 ;
    596 ; X64-LABEL: test_mask_adds_epi16_rrkz_512:
    597 ; X64:       # %bb.0:
    598 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    599 ; X64-NEXT:    vpaddsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0xc1]
    600 ; X64-NEXT:    retq # encoding: [0xc3]
    601   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    602   ret <32 x i16> %res
    603 }
    604 
    605 define <32 x i16> @test_mask_adds_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    606 ; X86-LABEL: test_mask_adds_epi16_rm_512:
    607 ; X86:       # %bb.0:
    608 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    609 ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x00]
    610 ; X86-NEXT:    retl # encoding: [0xc3]
    611 ;
    612 ; X64-LABEL: test_mask_adds_epi16_rm_512:
    613 ; X64:       # %bb.0:
    614 ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xed,0x07]
    615 ; X64-NEXT:    retq # encoding: [0xc3]
    616   %b = load <32 x i16>, <32 x i16>* %ptr_b
    617   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    618   ret <32 x i16> %res
    619 }
    620 
    621 define <32 x i16> @test_mask_adds_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    622 ; X86-LABEL: test_mask_adds_epi16_rmk_512:
    623 ; X86:       # %bb.0:
    624 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    625 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    626 ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x08]
    627 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    628 ; X86-NEXT:    retl # encoding: [0xc3]
    629 ;
    630 ; X64-LABEL: test_mask_adds_epi16_rmk_512:
    631 ; X64:       # %bb.0:
    632 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    633 ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xed,0x0f]
    634 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    635 ; X64-NEXT:    retq # encoding: [0xc3]
    636   %b = load <32 x i16>, <32 x i16>* %ptr_b
    637   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    638   ret <32 x i16> %res
    639 }
    640 
    641 define <32 x i16> @test_mask_adds_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
    642 ; X86-LABEL: test_mask_adds_epi16_rmkz_512:
    643 ; X86:       # %bb.0:
    644 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    645 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    646 ; X86-NEXT:    vpaddsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x00]
    647 ; X86-NEXT:    retl # encoding: [0xc3]
    648 ;
    649 ; X64-LABEL: test_mask_adds_epi16_rmkz_512:
    650 ; X64:       # %bb.0:
    651 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    652 ; X64-NEXT:    vpaddsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xed,0x07]
    653 ; X64-NEXT:    retq # encoding: [0xc3]
    654   %b = load <32 x i16>, <32 x i16>* %ptr_b
    655   %res = call <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    656   ret <32 x i16> %res
    657 }
    658 
    659 declare <32 x i16> @llvm.x86.avx512.mask.padds.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    660 
    661 define <32 x i16> @test_mask_subs_epi16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    662 ; CHECK-LABEL: test_mask_subs_epi16_rr_512:
    663 ; CHECK:       # %bb.0:
    664 ; CHECK-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0xc1]
    665 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    666   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    667   ret <32 x i16> %res
    668 }
    669 
    670 define <32 x i16> @test_mask_subs_epi16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
    671 ; X86-LABEL: test_mask_subs_epi16_rrk_512:
    672 ; X86:       # %bb.0:
    673 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    674 ; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
    675 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    676 ; X86-NEXT:    retl # encoding: [0xc3]
    677 ;
    678 ; X64-LABEL: test_mask_subs_epi16_rrk_512:
    679 ; X64:       # %bb.0:
    680 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    681 ; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0xd1]
    682 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    683 ; X64-NEXT:    retq # encoding: [0xc3]
    684   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    685   ret <32 x i16> %res
    686 }
    687 
    688 define <32 x i16> @test_mask_subs_epi16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    689 ; X86-LABEL: test_mask_subs_epi16_rrkz_512:
    690 ; X86:       # %bb.0:
    691 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    692 ; X86-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
    693 ; X86-NEXT:    retl # encoding: [0xc3]
    694 ;
    695 ; X64-LABEL: test_mask_subs_epi16_rrkz_512:
    696 ; X64:       # %bb.0:
    697 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    698 ; X64-NEXT:    vpsubsw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0xc1]
    699 ; X64-NEXT:    retq # encoding: [0xc3]
    700   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    701   ret <32 x i16> %res
    702 }
    703 
    704 define <32 x i16> @test_mask_subs_epi16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    705 ; X86-LABEL: test_mask_subs_epi16_rm_512:
    706 ; X86:       # %bb.0:
    707 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    708 ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x00]
    709 ; X86-NEXT:    retl # encoding: [0xc3]
    710 ;
    711 ; X64-LABEL: test_mask_subs_epi16_rm_512:
    712 ; X64:       # %bb.0:
    713 ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe9,0x07]
    714 ; X64-NEXT:    retq # encoding: [0xc3]
    715   %b = load <32 x i16>, <32 x i16>* %ptr_b
    716   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    717   ret <32 x i16> %res
    718 }
    719 
    720 define <32 x i16> @test_mask_subs_epi16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    721 ; X86-LABEL: test_mask_subs_epi16_rmk_512:
    722 ; X86:       # %bb.0:
    723 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    724 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    725 ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x08]
    726 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    727 ; X86-NEXT:    retl # encoding: [0xc3]
    728 ;
    729 ; X64-LABEL: test_mask_subs_epi16_rmk_512:
    730 ; X64:       # %bb.0:
    731 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    732 ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe9,0x0f]
    733 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    734 ; X64-NEXT:    retq # encoding: [0xc3]
    735   %b = load <32 x i16>, <32 x i16>* %ptr_b
    736   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    737   ret <32 x i16> %res
    738 }
    739 
    740 define <32 x i16> @test_mask_subs_epi16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
    741 ; X86-LABEL: test_mask_subs_epi16_rmkz_512:
    742 ; X86:       # %bb.0:
    743 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    744 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    745 ; X86-NEXT:    vpsubsw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x00]
    746 ; X86-NEXT:    retl # encoding: [0xc3]
    747 ;
    748 ; X64-LABEL: test_mask_subs_epi16_rmkz_512:
    749 ; X64:       # %bb.0:
    750 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    751 ; X64-NEXT:    vpsubsw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe9,0x07]
    752 ; X64-NEXT:    retq # encoding: [0xc3]
    753   %b = load <32 x i16>, <32 x i16>* %ptr_b
    754   %res = call <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    755   ret <32 x i16> %res
    756 }
    757 
    758 declare <32 x i16> @llvm.x86.avx512.mask.psubs.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    759 
    760 define <32 x i16> @test_mask_adds_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    761 ; CHECK-LABEL: test_mask_adds_epu16_rr_512:
    762 ; CHECK:       # %bb.0:
    763 ; CHECK-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0xc1]
    764 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    765   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    766   ret <32 x i16> %res
    767 }
    768 
    769 define <32 x i16> @test_mask_adds_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
    770 ; X86-LABEL: test_mask_adds_epu16_rrk_512:
    771 ; X86:       # %bb.0:
    772 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    773 ; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
    774 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    775 ; X86-NEXT:    retl # encoding: [0xc3]
    776 ;
    777 ; X64-LABEL: test_mask_adds_epu16_rrk_512:
    778 ; X64:       # %bb.0:
    779 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    780 ; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0xd1]
    781 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    782 ; X64-NEXT:    retq # encoding: [0xc3]
    783   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    784   ret <32 x i16> %res
    785 }
    786 
    787 define <32 x i16> @test_mask_adds_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    788 ; X86-LABEL: test_mask_adds_epu16_rrkz_512:
    789 ; X86:       # %bb.0:
    790 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    791 ; X86-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
    792 ; X86-NEXT:    retl # encoding: [0xc3]
    793 ;
    794 ; X64-LABEL: test_mask_adds_epu16_rrkz_512:
    795 ; X64:       # %bb.0:
    796 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    797 ; X64-NEXT:    vpaddusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0xc1]
    798 ; X64-NEXT:    retq # encoding: [0xc3]
    799   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    800   ret <32 x i16> %res
    801 }
    802 
    803 define <32 x i16> @test_mask_adds_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    804 ; X86-LABEL: test_mask_adds_epu16_rm_512:
    805 ; X86:       # %bb.0:
    806 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    807 ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x00]
    808 ; X86-NEXT:    retl # encoding: [0xc3]
    809 ;
    810 ; X64-LABEL: test_mask_adds_epu16_rm_512:
    811 ; X64:       # %bb.0:
    812 ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xdd,0x07]
    813 ; X64-NEXT:    retq # encoding: [0xc3]
    814   %b = load <32 x i16>, <32 x i16>* %ptr_b
    815   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    816   ret <32 x i16> %res
    817 }
    818 
    819 define <32 x i16> @test_mask_adds_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    820 ; X86-LABEL: test_mask_adds_epu16_rmk_512:
    821 ; X86:       # %bb.0:
    822 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    823 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    824 ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x08]
    825 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    826 ; X86-NEXT:    retl # encoding: [0xc3]
    827 ;
    828 ; X64-LABEL: test_mask_adds_epu16_rmk_512:
    829 ; X64:       # %bb.0:
    830 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    831 ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xdd,0x0f]
    832 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    833 ; X64-NEXT:    retq # encoding: [0xc3]
    834   %b = load <32 x i16>, <32 x i16>* %ptr_b
    835   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    836   ret <32 x i16> %res
    837 }
    838 
    839 define <32 x i16> @test_mask_adds_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
    840 ; X86-LABEL: test_mask_adds_epu16_rmkz_512:
    841 ; X86:       # %bb.0:
    842 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    843 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    844 ; X86-NEXT:    vpaddusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x00]
    845 ; X86-NEXT:    retl # encoding: [0xc3]
    846 ;
    847 ; X64-LABEL: test_mask_adds_epu16_rmkz_512:
    848 ; X64:       # %bb.0:
    849 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    850 ; X64-NEXT:    vpaddusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xdd,0x07]
    851 ; X64-NEXT:    retq # encoding: [0xc3]
    852   %b = load <32 x i16>, <32 x i16>* %ptr_b
    853   %res = call <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    854   ret <32 x i16> %res
    855 }
    856 
    857 declare <32 x i16> @llvm.x86.avx512.mask.paddus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    858 
    859 define <32 x i16> @test_mask_subs_epu16_rr_512(<32 x i16> %a, <32 x i16> %b) {
    860 ; CHECK-LABEL: test_mask_subs_epu16_rr_512:
    861 ; CHECK:       # %bb.0:
    862 ; CHECK-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0xc1]
    863 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
    864   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    865   ret <32 x i16> %res
    866 }
    867 
    868 define <32 x i16> @test_mask_subs_epu16_rrk_512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask) {
    869 ; X86-LABEL: test_mask_subs_epu16_rrk_512:
    870 ; X86:       # %bb.0:
    871 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    872 ; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
    873 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    874 ; X86-NEXT:    retl # encoding: [0xc3]
    875 ;
    876 ; X64-LABEL: test_mask_subs_epu16_rrk_512:
    877 ; X64:       # %bb.0:
    878 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    879 ; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0xd1]
    880 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
    881 ; X64-NEXT:    retq # encoding: [0xc3]
    882   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    883   ret <32 x i16> %res
    884 }
    885 
    886 define <32 x i16> @test_mask_subs_epu16_rrkz_512(<32 x i16> %a, <32 x i16> %b, i32 %mask) {
    887 ; X86-LABEL: test_mask_subs_epu16_rrkz_512:
    888 ; X86:       # %bb.0:
    889 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    890 ; X86-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
    891 ; X86-NEXT:    retl # encoding: [0xc3]
    892 ;
    893 ; X64-LABEL: test_mask_subs_epu16_rrkz_512:
    894 ; X64:       # %bb.0:
    895 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    896 ; X64-NEXT:    vpsubusw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0xc1]
    897 ; X64-NEXT:    retq # encoding: [0xc3]
    898   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    899   ret <32 x i16> %res
    900 }
    901 
    902 define <32 x i16> @test_mask_subs_epu16_rm_512(<32 x i16> %a, <32 x i16>* %ptr_b) {
    903 ; X86-LABEL: test_mask_subs_epu16_rm_512:
    904 ; X86:       # %bb.0:
    905 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    906 ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x00]
    907 ; X86-NEXT:    retl # encoding: [0xc3]
    908 ;
    909 ; X64-LABEL: test_mask_subs_epu16_rm_512:
    910 ; X64:       # %bb.0:
    911 ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd9,0x07]
    912 ; X64-NEXT:    retq # encoding: [0xc3]
    913   %b = load <32 x i16>, <32 x i16>* %ptr_b
    914   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 -1)
    915   ret <32 x i16> %res
    916 }
    917 
    918 define <32 x i16> @test_mask_subs_epu16_rmk_512(<32 x i16> %a, <32 x i16>* %ptr_b, <32 x i16> %passThru, i32 %mask) {
    919 ; X86-LABEL: test_mask_subs_epu16_rmk_512:
    920 ; X86:       # %bb.0:
    921 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    922 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    923 ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x08]
    924 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    925 ; X86-NEXT:    retl # encoding: [0xc3]
    926 ;
    927 ; X64-LABEL: test_mask_subs_epu16_rmk_512:
    928 ; X64:       # %bb.0:
    929 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    930 ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd9,0x0f]
    931 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
    932 ; X64-NEXT:    retq # encoding: [0xc3]
    933   %b = load <32 x i16>, <32 x i16>* %ptr_b
    934   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> %passThru, i32 %mask)
    935   ret <32 x i16> %res
    936 }
    937 
    938 define <32 x i16> @test_mask_subs_epu16_rmkz_512(<32 x i16> %a, <32 x i16>* %ptr_b, i32 %mask) {
    939 ; X86-LABEL: test_mask_subs_epu16_rmkz_512:
    940 ; X86:       # %bb.0:
    941 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
    942 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
    943 ; X86-NEXT:    vpsubusw (%eax), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x00]
    944 ; X86-NEXT:    retl # encoding: [0xc3]
    945 ;
    946 ; X64-LABEL: test_mask_subs_epu16_rmkz_512:
    947 ; X64:       # %bb.0:
    948 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
    949 ; X64-NEXT:    vpsubusw (%rdi), %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd9,0x07]
    950 ; X64-NEXT:    retq # encoding: [0xc3]
    951   %b = load <32 x i16>, <32 x i16>* %ptr_b
    952   %res = call <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16> %a, <32 x i16> %b, <32 x i16> zeroinitializer, i32 %mask)
    953   ret <32 x i16> %res
    954 }
    955 
    956 declare <32 x i16> @llvm.x86.avx512.mask.psubus.w.512(<32 x i16>, <32 x i16>, <32 x i16>, i32)
    957 
    958 define <32 x i16>@test_int_x86_avx512_mask_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    959 ; X86-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
    960 ; X86:       # %bb.0:
    961 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
    962 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
    963 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    964 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
    965 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
    966 ; X86-NEXT:    retl # encoding: [0xc3]
    967 ;
    968 ; X64-LABEL: test_int_x86_avx512_mask_vpermt2var_hi_512:
    969 ; X64:       # %bb.0:
    970 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
    971 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
    972 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    973 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x7d,0xca]
    974 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
    975 ; X64-NEXT:    retq # encoding: [0xc3]
    976   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
    977   %2 = bitcast i32 %x3 to <32 x i1>
    978   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
    979   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
    980   %res2 = add <32 x i16> %3, %4
    981   ret <32 x i16> %res2
    982 }
    983 
    984 define <32 x i16>@test_int_x86_avx512_maskz_vpermt2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
    985 ; X86-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
    986 ; X86:       # %bb.0:
    987 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
    988 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
    989 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
    990 ; X86-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
    991 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
    992 ; X86-NEXT:    retl # encoding: [0xc3]
    993 ;
    994 ; X64-LABEL: test_int_x86_avx512_maskz_vpermt2var_hi_512:
    995 ; X64:       # %bb.0:
    996 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd9]
    997 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x7d,0xda]
    998 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
    999 ; X64-NEXT:    vpermt2w %zmm2, %zmm0, %zmm1 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x7d,0xca]
   1000 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   1001 ; X64-NEXT:    retq # encoding: [0xc3]
   1002   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
   1003   %2 = bitcast i32 %x3 to <32 x i1>
   1004   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> zeroinitializer
   1005   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x1, <32 x i16> %x0, <32 x i16> %x2)
   1006   %res2 = add <32 x i16> %3, %4
   1007   ret <32 x i16> %res2
   1008 }
   1009 
   1010 declare <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16>, <32 x i16>, <32 x i16>)
   1011 
   1012 define <32 x i16>@test_int_x86_avx512_mask_vpermi2var_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1013 ; X86-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
   1014 ; X86:       # %bb.0:
   1015 ; X86-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   1016 ; X86-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
   1017 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1018 ; X86-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
   1019 ; X86-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   1020 ; X86-NEXT:    retl # encoding: [0xc3]
   1021 ;
   1022 ; X64-LABEL: test_int_x86_avx512_mask_vpermi2var_hi_512:
   1023 ; X64:       # %bb.0:
   1024 ; X64-NEXT:    vmovdqa64 %zmm0, %zmm3 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xd8]
   1025 ; X64-NEXT:    vpermt2w %zmm2, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x7d,0xda]
   1026 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1027 ; X64-NEXT:    vpermi2w %zmm2, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x75,0xca]
   1028 ; X64-NEXT:    vpaddw %zmm3, %zmm1, %zmm0 # encoding: [0x62,0xf1,0x75,0x48,0xfd,0xc3]
   1029 ; X64-NEXT:    retq # encoding: [0xc3]
   1030   %1 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
   1031   %2 = bitcast i32 %x3 to <32 x i1>
   1032   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x1
   1033   %4 = call <32 x i16> @llvm.x86.avx512.vpermi2var.hi.512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2)
   1034   %res2 = add <32 x i16> %3, %4
   1035   ret <32 x i16> %res2
   1036 }
   1037 
   1038 declare <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8>, <64 x i8>)
   1039 
   1040 define <64 x i8>@test_int_x86_avx512_pshuf_b_512(<64 x i8> %x0, <64 x i8> %x1) {
   1041 ; CHECK-LABEL: test_int_x86_avx512_pshuf_b_512:
   1042 ; CHECK:       # %bb.0:
   1043 ; CHECK-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 # encoding: [0x62,0xf2,0x7d,0x48,0x00,0xc1]
   1044 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1045   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
   1046   ret <64 x i8> %res
   1047 }
   1048 
   1049 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_mask(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2, i64 %mask) {
   1050 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
   1051 ; X86:       # %bb.0:
   1052 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1053 ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
   1054 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1055 ; X86-NEXT:    retl # encoding: [0xc3]
   1056 ;
   1057 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_mask:
   1058 ; X64:       # %bb.0:
   1059 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1060 ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x00,0xd1]
   1061 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1062 ; X64-NEXT:    retq # encoding: [0xc3]
   1063   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
   1064   %mask.cast = bitcast i64 %mask to <64 x i1>
   1065   %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> %x2
   1066   ret <64 x i8> %res2
   1067 }
   1068 
   1069 define <64 x i8>@test_int_x86_avx512_pshuf_b_512_maskz(<64 x i8> %x0, <64 x i8> %x1, i64 %mask) {
   1070 ; X86-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
   1071 ; X86:       # %bb.0:
   1072 ; X86-NEXT:    kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04]
   1073 ; X86-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
   1074 ; X86-NEXT:    retl # encoding: [0xc3]
   1075 ;
   1076 ; X64-LABEL: test_int_x86_avx512_pshuf_b_512_maskz:
   1077 ; X64:       # %bb.0:
   1078 ; X64-NEXT:    kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf]
   1079 ; X64-NEXT:    vpshufb %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0x00,0xc1]
   1080 ; X64-NEXT:    retq # encoding: [0xc3]
   1081   %res = call <64 x i8> @llvm.x86.avx512.pshuf.b.512(<64 x i8> %x0, <64 x i8> %x1)
   1082   %mask.cast = bitcast i64 %mask to <64 x i1>
   1083   %res2 = select <64 x i1> %mask.cast, <64 x i8> %res, <64 x i8> zeroinitializer
   1084   ret <64 x i8> %res2
   1085 }
   1086 
   1087 declare <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16>, <32 x i16>)
   1088 
   1089 define <32 x i16> @test_int_x86_avx512_mask_pmulhu_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1090 ; X86-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
   1091 ; X86:       # %bb.0:
   1092 ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
   1093 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1094 ; X86-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
   1095 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1096 ; X86-NEXT:    retl # encoding: [0xc3]
   1097 ;
   1098 ; X64-LABEL: test_int_x86_avx512_mask_pmulhu_w_512:
   1099 ; X64:       # %bb.0:
   1100 ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe4,0xd9]
   1101 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1102 ; X64-NEXT:    vpmulhuw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe4,0xd1]
   1103 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1104 ; X64-NEXT:    retq # encoding: [0xc3]
   1105   %1 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
   1106   %2 = bitcast i32 %x3 to <32 x i1>
   1107   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
   1108   %4 = call <32 x i16> @llvm.x86.avx512.pmulhu.w.512(<32 x i16> %x0, <32 x i16> %x1)
   1109   %res2 = add <32 x i16> %3, %4
   1110   ret <32 x i16> %res2
   1111 }
   1112 
   1113 declare <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16>, <32 x i16>)
   1114 
   1115 define <32 x i16> @test_int_x86_avx512_mask_pmulh_w_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1116 ; X86-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
   1117 ; X86:       # %bb.0:
   1118 ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
   1119 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1120 ; X86-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
   1121 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1122 ; X86-NEXT:    retl # encoding: [0xc3]
   1123 ;
   1124 ; X64-LABEL: test_int_x86_avx512_mask_pmulh_w_512:
   1125 ; X64:       # %bb.0:
   1126 ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xe5,0xd9]
   1127 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1128 ; X64-NEXT:    vpmulhw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe5,0xd1]
   1129 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1130 ; X64-NEXT:    retq # encoding: [0xc3]
   1131   %1 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
   1132   %2 = bitcast i32 %x3 to <32 x i1>
   1133   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
   1134   %4 = call <32 x i16> @llvm.x86.avx512.pmulh.w.512(<32 x i16> %x0, <32 x i16> %x1)
   1135   %res2 = add <32 x i16> %3, %4
   1136   ret <32 x i16> %res2
   1137 }
   1138 
   1139 declare <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16>, <32 x i16>)
   1140 
   1141 define <32 x i16> @test_int_x86_avx512_mask_pmulhr_sw_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1142 ; X86-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
   1143 ; X86:       # %bb.0:
   1144 ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
   1145 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1146 ; X86-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
   1147 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1148 ; X86-NEXT:    retl # encoding: [0xc3]
   1149 ;
   1150 ; X64-LABEL: test_int_x86_avx512_mask_pmulhr_sw_512:
   1151 ; X64:       # %bb.0:
   1152 ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x0b,0xd9]
   1153 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1154 ; X64-NEXT:    vpmulhrsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x0b,0xd1]
   1155 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1156 ; X64-NEXT:    retq # encoding: [0xc3]
   1157   %1 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
   1158   %2 = bitcast i32 %x3 to <32 x i1>
   1159   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
   1160   %4 = call <32 x i16> @llvm.x86.avx512.pmul.hr.sw.512(<32 x i16> %x0, <32 x i16> %x1)
   1161   %res2 = add <32 x i16> %3, %4
   1162   ret <32 x i16> %res2
   1163 }
   1164 
   1165 declare <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16>, <32 x i8>, i32)
   1166 
   1167 define <32 x i8>@test_int_x86_avx512_mask_pmov_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
   1168 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
   1169 ; X86:       # %bb.0:
   1170 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1171 ; X86-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
   1172 ; X86-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2]
   1173 ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1174 ; X86-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
   1175 ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1176 ; X86-NEXT:    retl # encoding: [0xc3]
   1177 ;
   1178 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_512:
   1179 ; X64:       # %bb.0:
   1180 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1181 ; X64-NEXT:    vpmovwb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0xc1]
   1182 ; X64-NEXT:    vpmovwb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x30,0xc2]
   1183 ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1184 ; X64-NEXT:    vpmovwb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x30,0xc0]
   1185 ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1186 ; X64-NEXT:    retq # encoding: [0xc3]
   1187     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
   1188     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
   1189     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmov.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
   1190     %res3 = add <32 x i8> %res0, %res1
   1191     %res4 = add <32 x i8> %res3, %res2
   1192     ret <32 x i8> %res4
   1193 }
   1194 
   1195 declare void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16>, i32)
   1196 
   1197 define void @test_int_x86_avx512_mask_pmov_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
   1198 ; X86-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
   1199 ; X86:       # %bb.0:
   1200 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1201 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1202 ; X86-NEXT:    vpmovwb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x00]
   1203 ; X86-NEXT:    vpmovwb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x00]
   1204 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1205 ; X86-NEXT:    retl # encoding: [0xc3]
   1206 ;
   1207 ; X64-LABEL: test_int_x86_avx512_mask_pmov_wb_mem_512:
   1208 ; X64:       # %bb.0:
   1209 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1210 ; X64-NEXT:    vpmovwb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x30,0x07]
   1211 ; X64-NEXT:    vpmovwb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x30,0x07]
   1212 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1213 ; X64-NEXT:    retq # encoding: [0xc3]
   1214     call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
   1215     call void @llvm.x86.avx512.mask.pmov.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
   1216     ret void
   1217 }
   1218 
   1219 declare <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16>, <32 x i8>, i32)
   1220 
   1221 define <32 x i8>@test_int_x86_avx512_mask_pmovs_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
   1222 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
   1223 ; X86:       # %bb.0:
   1224 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1225 ; X86-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
   1226 ; X86-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2]
   1227 ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1228 ; X86-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
   1229 ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1230 ; X86-NEXT:    retl # encoding: [0xc3]
   1231 ;
   1232 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_512:
   1233 ; X64:       # %bb.0:
   1234 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1235 ; X64-NEXT:    vpmovswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0xc1]
   1236 ; X64-NEXT:    vpmovswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x20,0xc2]
   1237 ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1238 ; X64-NEXT:    vpmovswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x20,0xc0]
   1239 ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1240 ; X64-NEXT:    retq # encoding: [0xc3]
   1241     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
   1242     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
   1243     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovs.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
   1244     %res3 = add <32 x i8> %res0, %res1
   1245     %res4 = add <32 x i8> %res3, %res2
   1246     ret <32 x i8> %res4
   1247 }
   1248 
   1249 declare void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16>, i32)
   1250 
   1251 define void @test_int_x86_avx512_mask_pmovs_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
   1252 ; X86-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
   1253 ; X86:       # %bb.0:
   1254 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1255 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1256 ; X86-NEXT:    vpmovswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x00]
   1257 ; X86-NEXT:    vpmovswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x00]
   1258 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1259 ; X86-NEXT:    retl # encoding: [0xc3]
   1260 ;
   1261 ; X64-LABEL: test_int_x86_avx512_mask_pmovs_wb_mem_512:
   1262 ; X64:       # %bb.0:
   1263 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1264 ; X64-NEXT:    vpmovswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x20,0x07]
   1265 ; X64-NEXT:    vpmovswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x20,0x07]
   1266 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1267 ; X64-NEXT:    retq # encoding: [0xc3]
   1268     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
   1269     call void @llvm.x86.avx512.mask.pmovs.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
   1270     ret void
   1271 }
   1272 
   1273 declare <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16>, <32 x i8>, i32)
   1274 
   1275 define <32 x i8>@test_int_x86_avx512_mask_pmovus_wb_512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2) {
   1276 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
   1277 ; X86:       # %bb.0:
   1278 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1279 ; X86-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
   1280 ; X86-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2]
   1281 ; X86-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1282 ; X86-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
   1283 ; X86-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1284 ; X86-NEXT:    retl # encoding: [0xc3]
   1285 ;
   1286 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_512:
   1287 ; X64:       # %bb.0:
   1288 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1289 ; X64-NEXT:    vpmovuswb %zmm0, %ymm1 {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0xc1]
   1290 ; X64-NEXT:    vpmovuswb %zmm0, %ymm2 {%k1} {z} # encoding: [0x62,0xf2,0x7e,0xc9,0x10,0xc2]
   1291 ; X64-NEXT:    vpaddb %ymm2, %ymm1, %ymm1 # encoding: [0xc5,0xf5,0xfc,0xca]
   1292 ; X64-NEXT:    vpmovuswb %zmm0, %ymm0 # encoding: [0x62,0xf2,0x7e,0x48,0x10,0xc0]
   1293 ; X64-NEXT:    vpaddb %ymm1, %ymm0, %ymm0 # encoding: [0xc5,0xfd,0xfc,0xc1]
   1294 ; X64-NEXT:    retq # encoding: [0xc3]
   1295     %res0 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 -1)
   1296     %res1 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> %x1, i32 %x2)
   1297     %res2 = call <32 x i8> @llvm.x86.avx512.mask.pmovus.wb.512(<32 x i16> %x0, <32 x i8> zeroinitializer, i32 %x2)
   1298     %res3 = add <32 x i8> %res0, %res1
   1299     %res4 = add <32 x i8> %res3, %res2
   1300     ret <32 x i8> %res4
   1301 }
   1302 
   1303 declare void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16>, i32)
   1304 
   1305 define void @test_int_x86_avx512_mask_pmovus_wb_mem_512(i8* %ptr, <32 x i16> %x1, i32 %x2) {
   1306 ; X86-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
   1307 ; X86:       # %bb.0:
   1308 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x08]
   1309 ; X86-NEXT:    movl {{[0-9]+}}(%esp), %eax # encoding: [0x8b,0x44,0x24,0x04]
   1310 ; X86-NEXT:    vpmovuswb %zmm0, (%eax) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x00]
   1311 ; X86-NEXT:    vpmovuswb %zmm0, (%eax) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x00]
   1312 ; X86-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1313 ; X86-NEXT:    retl # encoding: [0xc3]
   1314 ;
   1315 ; X64-LABEL: test_int_x86_avx512_mask_pmovus_wb_mem_512:
   1316 ; X64:       # %bb.0:
   1317 ; X64-NEXT:    kmovd %esi, %k1 # encoding: [0xc5,0xfb,0x92,0xce]
   1318 ; X64-NEXT:    vpmovuswb %zmm0, (%rdi) # encoding: [0x62,0xf2,0x7e,0x48,0x10,0x07]
   1319 ; X64-NEXT:    vpmovuswb %zmm0, (%rdi) {%k1} # encoding: [0x62,0xf2,0x7e,0x49,0x10,0x07]
   1320 ; X64-NEXT:    vzeroupper # encoding: [0xc5,0xf8,0x77]
   1321 ; X64-NEXT:    retq # encoding: [0xc3]
   1322     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 -1)
   1323     call void @llvm.x86.avx512.mask.pmovus.wb.mem.512(i8* %ptr, <32 x i16> %x1, i32 %x2)
   1324     ret void
   1325 }
   1326 
   1327 declare <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8>, <64 x i8>)
   1328 
   1329 define <32 x i16> @test_int_x86_avx512_mask_pmaddubs_w_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x2, i32 %x3) {
   1330 ; X86-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
   1331 ; X86:       # %bb.0:
   1332 ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
   1333 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1334 ; X86-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
   1335 ; X86-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1336 ; X86-NEXT:    retl # encoding: [0xc3]
   1337 ;
   1338 ; X64-LABEL: test_int_x86_avx512_mask_pmaddubs_w_512:
   1339 ; X64:       # %bb.0:
   1340 ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0x04,0xd9]
   1341 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1342 ; X64-NEXT:    vpmaddubsw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0x04,0xd1]
   1343 ; X64-NEXT:    vpaddw %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc3]
   1344 ; X64-NEXT:    retq # encoding: [0xc3]
   1345   %1 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
   1346   %2 = bitcast i32 %x3 to <32 x i1>
   1347   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
   1348   %4 = call <32 x i16> @llvm.x86.avx512.pmaddubs.w.512(<64 x i8> %x0, <64 x i8> %x1)
   1349   %res2 = add <32 x i16> %3, %4
   1350   ret <32 x i16> %res2
   1351 }
   1352 
   1353 declare <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16>, <32 x i16>)
   1354 
   1355 define <16 x i32> @test_int_x86_avx512_mask_pmaddw_d_512(<32 x i16> %x0, <32 x i16> %x1, <16 x i32> %x2, i16 %x3) {
   1356 ; X86-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
   1357 ; X86:       # %bb.0:
   1358 ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
   1359 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04]
   1360 ; X86-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
   1361 ; X86-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1362 ; X86-NEXT:    retl # encoding: [0xc3]
   1363 ;
   1364 ; X64-LABEL: test_int_x86_avx512_mask_pmaddw_d_512:
   1365 ; X64:       # %bb.0:
   1366 ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf1,0x7d,0x48,0xf5,0xd9]
   1367 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1368 ; X64-NEXT:    vpmaddwd %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf5,0xd1]
   1369 ; X64-NEXT:    vpaddd %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfe,0xc3]
   1370 ; X64-NEXT:    retq # encoding: [0xc3]
   1371   %1 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
   1372   %2 = bitcast i16 %x3 to <16 x i1>
   1373   %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %x2
   1374   %4 = call <16 x i32> @llvm.x86.avx512.pmaddw.d.512(<32 x i16> %x0, <32 x i16> %x1)
   1375   %res2 = add <16 x i32> %3, %4
   1376   ret <16 x i32> %res2
   1377 }
   1378 
   1379 declare <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8>, <64 x i8>, i32)
   1380 
   1381 define <32 x i16>@test_int_x86_avx512_mask_dbpsadbw_512(<64 x i8> %x0, <64 x i8> %x1, <32 x i16> %x3, i32 %x4) {
   1382 ; X86-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
   1383 ; X86:       # %bb.0:
   1384 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
   1385 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1386 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
   1387 ; X86-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
   1388 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1389 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1390 ; X86-NEXT:    retl # encoding: [0xc3]
   1391 ;
   1392 ; X64-LABEL: test_int_x86_avx512_mask_dbpsadbw_512:
   1393 ; X64:       # %bb.0:
   1394 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0x7d,0x48,0x42,0xd9,0x02]
   1395 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1396 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0x7d,0x49,0x42,0xd1,0x02]
   1397 ; X64-NEXT:    vdbpsadbw $2, %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf3,0x7d,0xc9,0x42,0xc1,0x02]
   1398 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1399 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1400 ; X64-NEXT:    retq # encoding: [0xc3]
   1401   %1 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
   1402   %2 = bitcast i32 %x4 to <32 x i1>
   1403   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x3
   1404   %4 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
   1405   %5 = bitcast i32 %x4 to <32 x i1>
   1406   %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
   1407   %7 = call <32 x i16> @llvm.x86.avx512.dbpsadbw.512(<64 x i8> %x0, <64 x i8> %x1, i32 2)
   1408   %res3 = add <32 x i16> %3, %6
   1409   %res4 = add <32 x i16> %res3, %7
   1410   ret <32 x i16> %res4
   1411 }
   1412 
   1413 declare  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8>, <64 x i8>)
   1414 
   1415 define  <8 x i64>@test_int_x86_avx512_mask_psadb_w_512(<64 x i8> %x0, <64 x i8> %x1, <64 x i8> %x2){
   1416 ; CHECK-LABEL: test_int_x86_avx512_mask_psadb_w_512:
   1417 ; CHECK:       # %bb.0:
   1418 ; CHECK-NEXT:    vpsadbw %zmm1, %zmm0, %zmm1 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc9]
   1419 ; CHECK-NEXT:    vpsadbw %zmm2, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf6,0xc2]
   1420 ; CHECK-NEXT:    vpaddq %zmm0, %zmm1, %zmm0 # encoding: [0x62,0xf1,0xf5,0x48,0xd4,0xc0]
   1421 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1422   %res = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x1)
   1423   %res1 = call  <8 x i64> @llvm.x86.avx512.psad.bw.512(<64 x i8> %x0, <64 x i8> %x2)
   1424   %res2 = add  <8 x i64> %res, %res1
   1425   ret  <8 x i64> %res2
   1426 }
   1427 
   1428 declare <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1429 
   1430 define <32 x i16>@test_int_x86_avx512_mask_psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1431 ; X86-LABEL: test_int_x86_avx512_mask_psrlv32hi:
   1432 ; X86:       # %bb.0:
   1433 ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
   1434 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1435 ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
   1436 ; X86-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
   1437 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1438 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1439 ; X86-NEXT:    retl # encoding: [0xc3]
   1440 ;
   1441 ; X64-LABEL: test_int_x86_avx512_mask_psrlv32hi:
   1442 ; X64:       # %bb.0:
   1443 ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x10,0xd9]
   1444 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1445 ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x10,0xd1]
   1446 ; X64-NEXT:    vpsrlvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x10,0xc1]
   1447 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1448 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1449 ; X64-NEXT:    retq # encoding: [0xc3]
   1450   %res = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   1451   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   1452   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrlv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   1453   %res3 = add <32 x i16> %res, %res1
   1454   %res4 = add <32 x i16> %res3, %res2
   1455   ret <32 x i16> %res4
   1456 }
   1457 
   1458 declare <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1459 
   1460 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1461 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi:
   1462 ; X86:       # %bb.0:
   1463 ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
   1464 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1465 ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
   1466 ; X86-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
   1467 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1468 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1469 ; X86-NEXT:    retl # encoding: [0xc3]
   1470 ;
   1471 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi:
   1472 ; X64:       # %bb.0:
   1473 ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0xd9]
   1474 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1475 ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x11,0xd1]
   1476 ; X64-NEXT:    vpsravw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x11,0xc1]
   1477 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1478 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1479 ; X64-NEXT:    retq # encoding: [0xc3]
   1480   %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   1481   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   1482   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   1483   %res3 = add <32 x i16> %res, %res1
   1484   %res4 = add <32 x i16> %res3, %res2
   1485   ret <32 x i16> %res4
   1486 }
   1487 
   1488 define <32 x i16>@test_int_x86_avx512_mask_psrav32_hi_const(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1489 ; X86-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
   1490 ; X86:       # %bb.0:
   1491 ; X86-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
   1492 ; X86-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
   1493 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
   1494 ; X86-NEXT:    vpsravw {{\.LCPI.*}}, %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
   1495 ; X86-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}, kind: FK_Data_4
   1496 ; X86-NEXT:    retl # encoding: [0xc3]
   1497 ;
   1498 ; X64-LABEL: test_int_x86_avx512_mask_psrav32_hi_const:
   1499 ; X64:       # %bb.0:
   1500 ; X64-NEXT:    vmovdqa64 {{.*#+}} zmm0 = [2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51,2,9,65524,23,65510,37,65496,51]
   1501 ; X64-NEXT:    # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0x05,A,A,A,A]
   1502 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   1503 ; X64-NEXT:    vpsravw {{.*}}(%rip), %zmm0, %zmm0 # encoding: [0x62,0xf2,0xfd,0x48,0x11,0x05,A,A,A,A]
   1504 ; X64-NEXT:    # fixup A - offset: 6, value: {{\.LCPI.*}}-4, kind: reloc_riprel_4byte
   1505 ; X64-NEXT:    retq # encoding: [0xc3]
   1506   %res = call <32 x i16> @llvm.x86.avx512.mask.psrav32.hi(<32 x i16> <i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51, i16 2, i16 9,  i16 -12, i16 23, i16 -26, i16 37, i16 -40, i16 51>,
   1507                                                           <32 x i16> <i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49, i16 1, i16 10, i16 35,  i16 52, i16 69,  i16 9,  i16 16,  i16 49>,
   1508                                                           <32 x i16> zeroinitializer, i32 -1)
   1509   ret <32 x i16> %res
   1510 }
   1511 
   1512 declare <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16>, <32 x i16>, <32 x i16>, i32)
   1513 
   1514 define <32 x i16>@test_int_x86_avx512_mask_psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1515 ; X86-LABEL: test_int_x86_avx512_mask_psllv32hi:
   1516 ; X86:       # %bb.0:
   1517 ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
   1518 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1519 ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
   1520 ; X86-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
   1521 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1522 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1523 ; X86-NEXT:    retl # encoding: [0xc3]
   1524 ;
   1525 ; X64-LABEL: test_int_x86_avx512_mask_psllv32hi:
   1526 ; X64:       # %bb.0:
   1527 ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0xfd,0x48,0x12,0xd9]
   1528 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1529 ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0xfd,0x49,0x12,0xd1]
   1530 ; X64-NEXT:    vpsllvw %zmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xfd,0xc9,0x12,0xc1]
   1531 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1532 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1533 ; X64-NEXT:    retq # encoding: [0xc3]
   1534   %res = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3)
   1535   %res1 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> zeroinitializer, i32 %x3)
   1536   %res2 = call <32 x i16> @llvm.x86.avx512.mask.psllv32hi(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 -1)
   1537   %res3 = add <32 x i16> %res, %res1
   1538   %res4 = add <32 x i16> %res3, %res2
   1539   ret <32 x i16> %res4
   1540 }
   1541 
   1542 declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
   1543 
   1544 define <32 x i16>@test_int_x86_avx512_mask_permvar_hi_512(<32 x i16> %x0, <32 x i16> %x1, <32 x i16> %x2, i32 %x3) {
   1545 ; X86-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
   1546 ; X86:       # %bb.0:
   1547 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
   1548 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1549 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
   1550 ; X86-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
   1551 ; X86-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1552 ; X86-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1553 ; X86-NEXT:    retl # encoding: [0xc3]
   1554 ;
   1555 ; X64-LABEL: test_int_x86_avx512_mask_permvar_hi_512:
   1556 ; X64:       # %bb.0:
   1557 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm3 # encoding: [0x62,0xf2,0xf5,0x48,0x8d,0xd8]
   1558 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1559 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm2 {%k1} # encoding: [0x62,0xf2,0xf5,0x49,0x8d,0xd0]
   1560 ; X64-NEXT:    vpermw %zmm0, %zmm1, %zmm0 {%k1} {z} # encoding: [0x62,0xf2,0xf5,0xc9,0x8d,0xc0]
   1561 ; X64-NEXT:    vpaddw %zmm3, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xfd,0xc3]
   1562 ; X64-NEXT:    vpaddw %zmm0, %zmm2, %zmm0 # encoding: [0x62,0xf1,0x6d,0x48,0xfd,0xc0]
   1563 ; X64-NEXT:    retq # encoding: [0xc3]
   1564   %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
   1565   %2 = bitcast i32 %x3 to <32 x i1>
   1566   %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %x2
   1567   %4 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
   1568   %5 = bitcast i32 %x3 to <32 x i1>
   1569   %6 = select <32 x i1> %5, <32 x i16> %4, <32 x i16> zeroinitializer
   1570   %7 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %x0, <32 x i16> %x1)
   1571   %res3 = add <32 x i16> %3, %6
   1572   %res4 = add <32 x i16> %res3, %7
   1573   ret <32 x i16> %res4
   1574 }
   1575 
   1576 define <32 x i16> @test_x86_avx512_psll_w_512(<32 x i16> %a0, <8 x i16> %a1) {
   1577 ; CHECK-LABEL: test_x86_avx512_psll_w_512:
   1578 ; CHECK:       # %bb.0:
   1579 ; CHECK-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xf1,0xc1]
   1580 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1581   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1582   ret <32 x i16> %res
   1583 }
   1584 define <32 x i16> @test_x86_avx512_mask_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
   1585 ; X86-LABEL: test_x86_avx512_mask_psll_w_512:
   1586 ; X86:       # %bb.0:
   1587 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1588 ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
   1589 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1590 ; X86-NEXT:    retl # encoding: [0xc3]
   1591 ;
   1592 ; X64-LABEL: test_x86_avx512_mask_psll_w_512:
   1593 ; X64:       # %bb.0:
   1594 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1595 ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xf1,0xd1]
   1596 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1597 ; X64-NEXT:    retq # encoding: [0xc3]
   1598   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1599   %mask.cast = bitcast i32 %mask to <32 x i1>
   1600   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1601   ret <32 x i16> %res2
   1602 }
   1603 define <32 x i16> @test_x86_avx512_maskz_psll_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
   1604 ; X86-LABEL: test_x86_avx512_maskz_psll_w_512:
   1605 ; X86:       # %bb.0:
   1606 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1607 ; X86-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
   1608 ; X86-NEXT:    retl # encoding: [0xc3]
   1609 ;
   1610 ; X64-LABEL: test_x86_avx512_maskz_psll_w_512:
   1611 ; X64:       # %bb.0:
   1612 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1613 ; X64-NEXT:    vpsllw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xf1,0xc1]
   1614 ; X64-NEXT:    retq # encoding: [0xc3]
   1615   %res = call <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1616   %mask.cast = bitcast i32 %mask to <32 x i1>
   1617   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1618   ret <32 x i16> %res2
   1619 }
   1620 declare <32 x i16> @llvm.x86.avx512.psll.w.512(<32 x i16>, <8 x i16>) nounwind readnone
   1621 
   1622 
   1623 define <32 x i16> @test_x86_avx512_pslli_w_512(<32 x i16> %a0) {
   1624 ; CHECK-LABEL: test_x86_avx512_pslli_w_512:
   1625 ; CHECK:       # %bb.0:
   1626 ; CHECK-NEXT:    vpsllw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xf0,0x07]
   1627 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1628   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1629   ret <32 x i16> %res
   1630 }
   1631 define <32 x i16> @test_x86_avx512_mask_pslli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
   1632 ; X86-LABEL: test_x86_avx512_mask_pslli_w_512:
   1633 ; X86:       # %bb.0:
   1634 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1635 ; X86-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
   1636 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1637 ; X86-NEXT:    retl # encoding: [0xc3]
   1638 ;
   1639 ; X64-LABEL: test_x86_avx512_mask_pslli_w_512:
   1640 ; X64:       # %bb.0:
   1641 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1642 ; X64-NEXT:    vpsllw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xf0,0x07]
   1643 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1644 ; X64-NEXT:    retq # encoding: [0xc3]
   1645   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1646   %mask.cast = bitcast i32 %mask to <32 x i1>
   1647   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1648   ret <32 x i16> %res2
   1649 }
   1650 define <32 x i16> @test_x86_avx512_maskz_pslli_w_512(<32 x i16> %a0, i32 %mask) {
   1651 ; X86-LABEL: test_x86_avx512_maskz_pslli_w_512:
   1652 ; X86:       # %bb.0:
   1653 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1654 ; X86-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
   1655 ; X86-NEXT:    retl # encoding: [0xc3]
   1656 ;
   1657 ; X64-LABEL: test_x86_avx512_maskz_pslli_w_512:
   1658 ; X64:       # %bb.0:
   1659 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1660 ; X64-NEXT:    vpsllw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xf0,0x07]
   1661 ; X64-NEXT:    retq # encoding: [0xc3]
   1662   %res = call <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1663   %mask.cast = bitcast i32 %mask to <32 x i1>
   1664   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1665   ret <32 x i16> %res2
   1666 }
   1667 declare <32 x i16> @llvm.x86.avx512.pslli.w.512(<32 x i16>, i32) nounwind readnone
   1668 
   1669 
   1670 define <32 x i16> @test_x86_avx512_psra_w_512(<32 x i16> %a0, <8 x i16> %a1) {
   1671 ; CHECK-LABEL: test_x86_avx512_psra_w_512:
   1672 ; CHECK:       # %bb.0:
   1673 ; CHECK-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xe1,0xc1]
   1674 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1675   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1676   ret <32 x i16> %res
   1677 }
   1678 define <32 x i16> @test_x86_avx512_mask_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
   1679 ; X86-LABEL: test_x86_avx512_mask_psra_w_512:
   1680 ; X86:       # %bb.0:
   1681 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1682 ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
   1683 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1684 ; X86-NEXT:    retl # encoding: [0xc3]
   1685 ;
   1686 ; X64-LABEL: test_x86_avx512_mask_psra_w_512:
   1687 ; X64:       # %bb.0:
   1688 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1689 ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xe1,0xd1]
   1690 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1691 ; X64-NEXT:    retq # encoding: [0xc3]
   1692   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1693   %mask.cast = bitcast i32 %mask to <32 x i1>
   1694   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1695   ret <32 x i16> %res2
   1696 }
   1697 define <32 x i16> @test_x86_avx512_maskz_psra_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
   1698 ; X86-LABEL: test_x86_avx512_maskz_psra_w_512:
   1699 ; X86:       # %bb.0:
   1700 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1701 ; X86-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
   1702 ; X86-NEXT:    retl # encoding: [0xc3]
   1703 ;
   1704 ; X64-LABEL: test_x86_avx512_maskz_psra_w_512:
   1705 ; X64:       # %bb.0:
   1706 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1707 ; X64-NEXT:    vpsraw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xe1,0xc1]
   1708 ; X64-NEXT:    retq # encoding: [0xc3]
   1709   %res = call <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1710   %mask.cast = bitcast i32 %mask to <32 x i1>
   1711   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1712   ret <32 x i16> %res2
   1713 }
   1714 declare <32 x i16> @llvm.x86.avx512.psra.w.512(<32 x i16>, <8 x i16>) nounwind readnone
   1715 
   1716 
   1717 define <32 x i16> @test_x86_avx512_psrai_w_512(<32 x i16> %a0) {
   1718 ; CHECK-LABEL: test_x86_avx512_psrai_w_512:
   1719 ; CHECK:       # %bb.0:
   1720 ; CHECK-NEXT:    vpsraw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xe0,0x07]
   1721 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1722   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1723   ret <32 x i16> %res
   1724 }
   1725 define <32 x i16> @test_x86_avx512_mask_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
   1726 ; X86-LABEL: test_x86_avx512_mask_psrai_w_512:
   1727 ; X86:       # %bb.0:
   1728 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1729 ; X86-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
   1730 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1731 ; X86-NEXT:    retl # encoding: [0xc3]
   1732 ;
   1733 ; X64-LABEL: test_x86_avx512_mask_psrai_w_512:
   1734 ; X64:       # %bb.0:
   1735 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1736 ; X64-NEXT:    vpsraw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xe0,0x07]
   1737 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1738 ; X64-NEXT:    retq # encoding: [0xc3]
   1739   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1740   %mask.cast = bitcast i32 %mask to <32 x i1>
   1741   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1742   ret <32 x i16> %res2
   1743 }
   1744 define <32 x i16> @test_x86_avx512_maskz_psrai_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
   1745 ; X86-LABEL: test_x86_avx512_maskz_psrai_w_512:
   1746 ; X86:       # %bb.0:
   1747 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1748 ; X86-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
   1749 ; X86-NEXT:    retl # encoding: [0xc3]
   1750 ;
   1751 ; X64-LABEL: test_x86_avx512_maskz_psrai_w_512:
   1752 ; X64:       # %bb.0:
   1753 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1754 ; X64-NEXT:    vpsraw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xe0,0x07]
   1755 ; X64-NEXT:    retq # encoding: [0xc3]
   1756   %res = call <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1757   %mask.cast = bitcast i32 %mask to <32 x i1>
   1758   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1759   ret <32 x i16> %res2
   1760 }
   1761 declare <32 x i16> @llvm.x86.avx512.psrai.w.512(<32 x i16>, i32) nounwind readnone
   1762 
   1763 
   1764 define <32 x i16> @test_x86_avx512_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1) {
   1765 ; CHECK-LABEL: test_x86_avx512_psrl_w_512:
   1766 ; CHECK:       # %bb.0:
   1767 ; CHECK-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0xd1,0xc1]
   1768 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1769   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1770   ret <32 x i16> %res
   1771 }
   1772 define <32 x i16> @test_x86_avx512_mask_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, <32 x i16> %passthru, i32 %mask) {
   1773 ; X86-LABEL: test_x86_avx512_mask_psrl_w_512:
   1774 ; X86:       # %bb.0:
   1775 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1776 ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
   1777 ; X86-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1778 ; X86-NEXT:    retl # encoding: [0xc3]
   1779 ;
   1780 ; X64-LABEL: test_x86_avx512_mask_psrl_w_512:
   1781 ; X64:       # %bb.0:
   1782 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1783 ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf1,0x7d,0x49,0xd1,0xd1]
   1784 ; X64-NEXT:    vmovdqa64 %zmm2, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc2]
   1785 ; X64-NEXT:    retq # encoding: [0xc3]
   1786   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1787   %mask.cast = bitcast i32 %mask to <32 x i1>
   1788   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1789   ret <32 x i16> %res2
   1790 }
   1791 define <32 x i16> @test_x86_avx512_maskz_psrl_w_512(<32 x i16> %a0, <8 x i16> %a1, i32 %mask) {
   1792 ; X86-LABEL: test_x86_avx512_maskz_psrl_w_512:
   1793 ; X86:       # %bb.0:
   1794 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1795 ; X86-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
   1796 ; X86-NEXT:    retl # encoding: [0xc3]
   1797 ;
   1798 ; X64-LABEL: test_x86_avx512_maskz_psrl_w_512:
   1799 ; X64:       # %bb.0:
   1800 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1801 ; X64-NEXT:    vpsrlw %xmm1, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0xd1,0xc1]
   1802 ; X64-NEXT:    retq # encoding: [0xc3]
   1803   %res = call <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16> %a0, <8 x i16> %a1) ; <<32 x i16>> [#uses=1]
   1804   %mask.cast = bitcast i32 %mask to <32 x i1>
   1805   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1806   ret <32 x i16> %res2
   1807 }
   1808 declare <32 x i16> @llvm.x86.avx512.psrl.w.512(<32 x i16>, <8 x i16>) nounwind readnone
   1809 
   1810 
   1811 define <32 x i16> @test_x86_avx512_psrli_w_512(<32 x i16> %a0) {
   1812 ; CHECK-LABEL: test_x86_avx512_psrli_w_512:
   1813 ; CHECK:       # %bb.0:
   1814 ; CHECK-NEXT:    vpsrlw $7, %zmm0, %zmm0 # encoding: [0x62,0xf1,0x7d,0x48,0x71,0xd0,0x07]
   1815 ; CHECK-NEXT:    ret{{[l|q]}} # encoding: [0xc3]
   1816   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1817   ret <32 x i16> %res
   1818 }
   1819 define <32 x i16> @test_x86_avx512_mask_psrli_w_512(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
   1820 ; X86-LABEL: test_x86_avx512_mask_psrli_w_512:
   1821 ; X86:       # %bb.0:
   1822 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1823 ; X86-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
   1824 ; X86-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1825 ; X86-NEXT:    retl # encoding: [0xc3]
   1826 ;
   1827 ; X64-LABEL: test_x86_avx512_mask_psrli_w_512:
   1828 ; X64:       # %bb.0:
   1829 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1830 ; X64-NEXT:    vpsrlw $7, %zmm0, %zmm1 {%k1} # encoding: [0x62,0xf1,0x75,0x49,0x71,0xd0,0x07]
   1831 ; X64-NEXT:    vmovdqa64 %zmm1, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc1]
   1832 ; X64-NEXT:    retq # encoding: [0xc3]
   1833   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1834   %mask.cast = bitcast i32 %mask to <32 x i1>
   1835   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> %passthru
   1836   ret <32 x i16> %res2
   1837 }
   1838 define <32 x i16> @test_x86_avx512_maskz_psrli_w_512(<32 x i16> %a0, i32 %mask) {
   1839 ; X86-LABEL: test_x86_avx512_maskz_psrli_w_512:
   1840 ; X86:       # %bb.0:
   1841 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04]
   1842 ; X86-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
   1843 ; X86-NEXT:    retl # encoding: [0xc3]
   1844 ;
   1845 ; X64-LABEL: test_x86_avx512_maskz_psrli_w_512:
   1846 ; X64:       # %bb.0:
   1847 ; X64-NEXT:    kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf]
   1848 ; X64-NEXT:    vpsrlw $7, %zmm0, %zmm0 {%k1} {z} # encoding: [0x62,0xf1,0x7d,0xc9,0x71,0xd0,0x07]
   1849 ; X64-NEXT:    retq # encoding: [0xc3]
   1850   %res = call <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16> %a0, i32 7) ; <<32 x i16>> [#uses=1]
   1851   %mask.cast = bitcast i32 %mask to <32 x i1>
   1852   %res2 = select <32 x i1> %mask.cast, <32 x i16> %res, <32 x i16> zeroinitializer
   1853   ret <32 x i16> %res2
   1854 }
   1855 declare <32 x i16> @llvm.x86.avx512.psrli.w.512(<32 x i16>, i32) nounwind readnone
   1856