Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -fast-isel -mtriple=i386-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
      3 ; RUN: llc < %s -fast-isel -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512bw,+avx512vl | FileCheck %s --check-prefixes=CHECK,X64
      4 
      5 ; NOTE: This should use IR equivalent to what is generated by clang/test/CodeGen/avx512vlbw-builtins.c
      6 
      7 define zeroext i16 @test_mm_test_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
      8 ; CHECK-LABEL: test_mm_test_epi8_mask:
      9 ; CHECK:       # %bb.0: # %entry
     10 ; CHECK-NEXT:    vptestmb %xmm0, %xmm1, %k0
     11 ; CHECK-NEXT:    kmovd %k0, %eax
     12 ; CHECK-NEXT:    movzwl %ax, %eax
     13 ; CHECK-NEXT:    ret{{[l|q]}}
     14 entry:
     15   %and.i.i = and <2 x i64> %__B, %__A
     16   %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
     17   %1 = icmp ne <16 x i8> %0, zeroinitializer
     18   %2 = bitcast <16 x i1> %1 to i16
     19   ret i16 %2
     20 }
     21 
     22 define zeroext i16 @test_mm_mask_test_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
     23 ; X86-LABEL: test_mm_mask_test_epi8_mask:
     24 ; X86:       # %bb.0: # %entry
     25 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
     26 ; X86-NEXT:    vptestmb %xmm0, %xmm1, %k0 {%k1}
     27 ; X86-NEXT:    kmovd %k0, %eax
     28 ; X86-NEXT:    movzwl %ax, %eax
     29 ; X86-NEXT:    retl
     30 ;
     31 ; X64-LABEL: test_mm_mask_test_epi8_mask:
     32 ; X64:       # %bb.0: # %entry
     33 ; X64-NEXT:    kmovd %edi, %k1
     34 ; X64-NEXT:    vptestmb %xmm0, %xmm1, %k0 {%k1}
     35 ; X64-NEXT:    kmovd %k0, %eax
     36 ; X64-NEXT:    movzwl %ax, %eax
     37 ; X64-NEXT:    retq
     38 entry:
     39   %and.i.i = and <2 x i64> %__B, %__A
     40   %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
     41   %1 = icmp ne <16 x i8> %0, zeroinitializer
     42   %2 = bitcast i16 %__U to <16 x i1>
     43   %3 = and <16 x i1> %1, %2
     44   %4 = bitcast <16 x i1> %3 to i16
     45   ret i16 %4
     46 }
     47 
     48 define i32 @test_mm256_test_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
     49 ; CHECK-LABEL: test_mm256_test_epi8_mask:
     50 ; CHECK:       # %bb.0: # %entry
     51 ; CHECK-NEXT:    vptestmb %ymm0, %ymm1, %k0
     52 ; CHECK-NEXT:    kmovd %k0, %eax
     53 ; CHECK-NEXT:    vzeroupper
     54 ; CHECK-NEXT:    ret{{[l|q]}}
     55 entry:
     56   %and.i.i = and <4 x i64> %__B, %__A
     57   %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
     58   %1 = icmp ne <32 x i8> %0, zeroinitializer
     59   %2 = bitcast <32 x i1> %1 to i32
     60   ret i32 %2
     61 }
     62 
     63 define i32 @test_mm256_mask_test_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
     64 ; X86-LABEL: test_mm256_mask_test_epi8_mask:
     65 ; X86:       # %bb.0: # %entry
     66 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
     67 ; X86-NEXT:    vptestmb %ymm0, %ymm1, %k0 {%k1}
     68 ; X86-NEXT:    kmovd %k0, %eax
     69 ; X86-NEXT:    vzeroupper
     70 ; X86-NEXT:    retl
     71 ;
     72 ; X64-LABEL: test_mm256_mask_test_epi8_mask:
     73 ; X64:       # %bb.0: # %entry
     74 ; X64-NEXT:    kmovd %edi, %k1
     75 ; X64-NEXT:    vptestmb %ymm0, %ymm1, %k0 {%k1}
     76 ; X64-NEXT:    kmovd %k0, %eax
     77 ; X64-NEXT:    vzeroupper
     78 ; X64-NEXT:    retq
     79 entry:
     80   %and.i.i = and <4 x i64> %__B, %__A
     81   %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
     82   %1 = icmp ne <32 x i8> %0, zeroinitializer
     83   %2 = bitcast i32 %__U to <32 x i1>
     84   %3 = and <32 x i1> %1, %2
     85   %4 = bitcast <32 x i1> %3 to i32
     86   ret i32 %4
     87 }
     88 
     89 define zeroext i8 @test_mm_test_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
     90 ; CHECK-LABEL: test_mm_test_epi16_mask:
     91 ; CHECK:       # %bb.0: # %entry
     92 ; CHECK-NEXT:    vptestmw %xmm0, %xmm1, %k0
     93 ; CHECK-NEXT:    kmovd %k0, %eax
     94 ; CHECK-NEXT:    movzbl %al, %eax
     95 ; CHECK-NEXT:    ret{{[l|q]}}
     96 entry:
     97   %and.i.i = and <2 x i64> %__B, %__A
     98   %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
     99   %1 = icmp ne <8 x i16> %0, zeroinitializer
    100   %2 = bitcast <8 x i1> %1 to i8
    101   ret i8 %2
    102 }
    103 
    104 define zeroext i8 @test_mm_mask_test_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
    105 ; X86-LABEL: test_mm_mask_test_epi16_mask:
    106 ; X86:       # %bb.0: # %entry
    107 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    108 ; X86-NEXT:    kmovd %eax, %k1
    109 ; X86-NEXT:    vptestmw %xmm0, %xmm1, %k0 {%k1}
    110 ; X86-NEXT:    kmovd %k0, %eax
    111 ; X86-NEXT:    movzbl %al, %eax
    112 ; X86-NEXT:    retl
    113 ;
    114 ; X64-LABEL: test_mm_mask_test_epi16_mask:
    115 ; X64:       # %bb.0: # %entry
    116 ; X64-NEXT:    kmovd %edi, %k1
    117 ; X64-NEXT:    vptestmw %xmm0, %xmm1, %k0 {%k1}
    118 ; X64-NEXT:    kmovd %k0, %eax
    119 ; X64-NEXT:    movzbl %al, %eax
    120 ; X64-NEXT:    retq
    121 entry:
    122   %and.i.i = and <2 x i64> %__B, %__A
    123   %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
    124   %1 = icmp ne <8 x i16> %0, zeroinitializer
    125   %2 = bitcast i8 %__U to <8 x i1>
    126   %3 = and <8 x i1> %1, %2
    127   %4 = bitcast <8 x i1> %3 to i8
    128   ret i8 %4
    129 }
    130 
    131 define zeroext i16 @test_mm256_test_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
    132 ; CHECK-LABEL: test_mm256_test_epi16_mask:
    133 ; CHECK:       # %bb.0: # %entry
    134 ; CHECK-NEXT:    vptestmw %ymm0, %ymm1, %k0
    135 ; CHECK-NEXT:    kmovd %k0, %eax
    136 ; CHECK-NEXT:    movzwl %ax, %eax
    137 ; CHECK-NEXT:    vzeroupper
    138 ; CHECK-NEXT:    ret{{[l|q]}}
    139 entry:
    140   %and.i.i = and <4 x i64> %__B, %__A
    141   %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
    142   %1 = icmp ne <16 x i16> %0, zeroinitializer
    143   %2 = bitcast <16 x i1> %1 to i16
    144   ret i16 %2
    145 }
    146 
    147 define zeroext i16 @test_mm256_mask_test_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
    148 ; X86-LABEL: test_mm256_mask_test_epi16_mask:
    149 ; X86:       # %bb.0: # %entry
    150 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    151 ; X86-NEXT:    vptestmw %ymm0, %ymm1, %k0 {%k1}
    152 ; X86-NEXT:    kmovd %k0, %eax
    153 ; X86-NEXT:    movzwl %ax, %eax
    154 ; X86-NEXT:    vzeroupper
    155 ; X86-NEXT:    retl
    156 ;
    157 ; X64-LABEL: test_mm256_mask_test_epi16_mask:
    158 ; X64:       # %bb.0: # %entry
    159 ; X64-NEXT:    kmovd %edi, %k1
    160 ; X64-NEXT:    vptestmw %ymm0, %ymm1, %k0 {%k1}
    161 ; X64-NEXT:    kmovd %k0, %eax
    162 ; X64-NEXT:    movzwl %ax, %eax
    163 ; X64-NEXT:    vzeroupper
    164 ; X64-NEXT:    retq
    165 entry:
    166   %and.i.i = and <4 x i64> %__B, %__A
    167   %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
    168   %1 = icmp ne <16 x i16> %0, zeroinitializer
    169   %2 = bitcast i16 %__U to <16 x i1>
    170   %3 = and <16 x i1> %1, %2
    171   %4 = bitcast <16 x i1> %3 to i16
    172   ret i16 %4
    173 }
    174 
    175 define zeroext i16 @test_mm_testn_epi8_mask(<2 x i64> %__A, <2 x i64> %__B) {
    176 ; CHECK-LABEL: test_mm_testn_epi8_mask:
    177 ; CHECK:       # %bb.0: # %entry
    178 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm1, %k0
    179 ; CHECK-NEXT:    kmovd %k0, %eax
    180 ; CHECK-NEXT:    movzwl %ax, %eax
    181 ; CHECK-NEXT:    ret{{[l|q]}}
    182 entry:
    183   %and.i.i = and <2 x i64> %__B, %__A
    184   %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
    185   %1 = icmp eq <16 x i8> %0, zeroinitializer
    186   %2 = bitcast <16 x i1> %1 to i16
    187   ret i16 %2
    188 }
    189 
    190 define zeroext i16 @test_mm_mask_testn_epi8_mask(i16 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
    191 ; X86-LABEL: test_mm_mask_testn_epi8_mask:
    192 ; X86:       # %bb.0: # %entry
    193 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    194 ; X86-NEXT:    vptestnmb %xmm0, %xmm1, %k0 {%k1}
    195 ; X86-NEXT:    kmovd %k0, %eax
    196 ; X86-NEXT:    movzwl %ax, %eax
    197 ; X86-NEXT:    retl
    198 ;
    199 ; X64-LABEL: test_mm_mask_testn_epi8_mask:
    200 ; X64:       # %bb.0: # %entry
    201 ; X64-NEXT:    kmovd %edi, %k1
    202 ; X64-NEXT:    vptestnmb %xmm0, %xmm1, %k0 {%k1}
    203 ; X64-NEXT:    kmovd %k0, %eax
    204 ; X64-NEXT:    movzwl %ax, %eax
    205 ; X64-NEXT:    retq
    206 entry:
    207   %and.i.i = and <2 x i64> %__B, %__A
    208   %0 = bitcast <2 x i64> %and.i.i to <16 x i8>
    209   %1 = icmp eq <16 x i8> %0, zeroinitializer
    210   %2 = bitcast i16 %__U to <16 x i1>
    211   %3 = and <16 x i1> %1, %2
    212   %4 = bitcast <16 x i1> %3 to i16
    213   ret i16 %4
    214 }
    215 
    216 define i32 @test_mm256_testn_epi8_mask(<4 x i64> %__A, <4 x i64> %__B) {
    217 ; CHECK-LABEL: test_mm256_testn_epi8_mask:
    218 ; CHECK:       # %bb.0: # %entry
    219 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm1, %k0
    220 ; CHECK-NEXT:    kmovd %k0, %eax
    221 ; CHECK-NEXT:    vzeroupper
    222 ; CHECK-NEXT:    ret{{[l|q]}}
    223 entry:
    224   %and.i.i = and <4 x i64> %__B, %__A
    225   %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
    226   %1 = icmp eq <32 x i8> %0, zeroinitializer
    227   %2 = bitcast <32 x i1> %1 to i32
    228   ret i32 %2
    229 }
    230 
    231 define i32 @test_mm256_mask_testn_epi8_mask(i32 %__U, <4 x i64> %__A, <4 x i64> %__B) {
    232 ; X86-LABEL: test_mm256_mask_testn_epi8_mask:
    233 ; X86:       # %bb.0: # %entry
    234 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    235 ; X86-NEXT:    vptestnmb %ymm0, %ymm1, %k0 {%k1}
    236 ; X86-NEXT:    kmovd %k0, %eax
    237 ; X86-NEXT:    vzeroupper
    238 ; X86-NEXT:    retl
    239 ;
    240 ; X64-LABEL: test_mm256_mask_testn_epi8_mask:
    241 ; X64:       # %bb.0: # %entry
    242 ; X64-NEXT:    kmovd %edi, %k1
    243 ; X64-NEXT:    vptestnmb %ymm0, %ymm1, %k0 {%k1}
    244 ; X64-NEXT:    kmovd %k0, %eax
    245 ; X64-NEXT:    vzeroupper
    246 ; X64-NEXT:    retq
    247 entry:
    248   %and.i.i = and <4 x i64> %__B, %__A
    249   %0 = bitcast <4 x i64> %and.i.i to <32 x i8>
    250   %1 = icmp eq <32 x i8> %0, zeroinitializer
    251   %2 = bitcast i32 %__U to <32 x i1>
    252   %3 = and <32 x i1> %1, %2
    253   %4 = bitcast <32 x i1> %3 to i32
    254   ret i32 %4
    255 }
    256 
    257 define zeroext i8 @test_mm_testn_epi16_mask(<2 x i64> %__A, <2 x i64> %__B) {
    258 ; CHECK-LABEL: test_mm_testn_epi16_mask:
    259 ; CHECK:       # %bb.0: # %entry
    260 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm1, %k0
    261 ; CHECK-NEXT:    kmovd %k0, %eax
    262 ; CHECK-NEXT:    movzbl %al, %eax
    263 ; CHECK-NEXT:    ret{{[l|q]}}
    264 entry:
    265   %and.i.i = and <2 x i64> %__B, %__A
    266   %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
    267   %1 = icmp eq <8 x i16> %0, zeroinitializer
    268   %2 = bitcast <8 x i1> %1 to i8
    269   ret i8 %2
    270 }
    271 
    272 define zeroext i8 @test_mm_mask_testn_epi16_mask(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__B) {
    273 ; X86-LABEL: test_mm_mask_testn_epi16_mask:
    274 ; X86:       # %bb.0: # %entry
    275 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    276 ; X86-NEXT:    kmovd %eax, %k1
    277 ; X86-NEXT:    vptestnmw %xmm0, %xmm1, %k0 {%k1}
    278 ; X86-NEXT:    kmovd %k0, %eax
    279 ; X86-NEXT:    movzbl %al, %eax
    280 ; X86-NEXT:    retl
    281 ;
    282 ; X64-LABEL: test_mm_mask_testn_epi16_mask:
    283 ; X64:       # %bb.0: # %entry
    284 ; X64-NEXT:    kmovd %edi, %k1
    285 ; X64-NEXT:    vptestnmw %xmm0, %xmm1, %k0 {%k1}
    286 ; X64-NEXT:    kmovd %k0, %eax
    287 ; X64-NEXT:    movzbl %al, %eax
    288 ; X64-NEXT:    retq
    289 entry:
    290   %and.i.i = and <2 x i64> %__B, %__A
    291   %0 = bitcast <2 x i64> %and.i.i to <8 x i16>
    292   %1 = icmp eq <8 x i16> %0, zeroinitializer
    293   %2 = bitcast i8 %__U to <8 x i1>
    294   %3 = and <8 x i1> %1, %2
    295   %4 = bitcast <8 x i1> %3 to i8
    296   ret i8 %4
    297 }
    298 
    299 define zeroext i16 @test_mm256_testn_epi16_mask(<4 x i64> %__A, <4 x i64> %__B) {
    300 ; CHECK-LABEL: test_mm256_testn_epi16_mask:
    301 ; CHECK:       # %bb.0: # %entry
    302 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm1, %k0
    303 ; CHECK-NEXT:    kmovd %k0, %eax
    304 ; CHECK-NEXT:    movzwl %ax, %eax
    305 ; CHECK-NEXT:    vzeroupper
    306 ; CHECK-NEXT:    ret{{[l|q]}}
    307 entry:
    308   %and.i.i = and <4 x i64> %__B, %__A
    309   %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
    310   %1 = icmp eq <16 x i16> %0, zeroinitializer
    311   %2 = bitcast <16 x i1> %1 to i16
    312   ret i16 %2
    313 }
    314 
    315 define zeroext i16 @test_mm256_mask_testn_epi16_mask(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__B) {
    316 ; X86-LABEL: test_mm256_mask_testn_epi16_mask:
    317 ; X86:       # %bb.0: # %entry
    318 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    319 ; X86-NEXT:    vptestnmw %ymm0, %ymm1, %k0 {%k1}
    320 ; X86-NEXT:    kmovd %k0, %eax
    321 ; X86-NEXT:    movzwl %ax, %eax
    322 ; X86-NEXT:    vzeroupper
    323 ; X86-NEXT:    retl
    324 ;
    325 ; X64-LABEL: test_mm256_mask_testn_epi16_mask:
    326 ; X64:       # %bb.0: # %entry
    327 ; X64-NEXT:    kmovd %edi, %k1
    328 ; X64-NEXT:    vptestnmw %ymm0, %ymm1, %k0 {%k1}
    329 ; X64-NEXT:    kmovd %k0, %eax
    330 ; X64-NEXT:    movzwl %ax, %eax
    331 ; X64-NEXT:    vzeroupper
    332 ; X64-NEXT:    retq
    333 entry:
    334   %and.i.i = and <4 x i64> %__B, %__A
    335   %0 = bitcast <4 x i64> %and.i.i to <16 x i16>
    336   %1 = icmp eq <16 x i16> %0, zeroinitializer
    337   %2 = bitcast i16 %__U to <16 x i1>
    338   %3 = and <16 x i1> %1, %2
    339   %4 = bitcast <16 x i1> %3 to i16
    340   ret i16 %4
    341 }
    342 
    343 define <2 x i64> @test_mm_mask_set1_epi8(<2 x i64> %__O, i16 zeroext %__M, i8 signext %__A) local_unnamed_addr #0 {
    344 ; X86-LABEL: test_mm_mask_set1_epi8:
    345 ; X86:       # %bb.0: # %entry
    346 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    347 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    348 ; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1}
    349 ; X86-NEXT:    retl
    350 ;
    351 ; X64-LABEL: test_mm_mask_set1_epi8:
    352 ; X64:       # %bb.0: # %entry
    353 ; X64-NEXT:    kmovd %edi, %k1
    354 ; X64-NEXT:    vpbroadcastb %esi, %xmm0 {%k1}
    355 ; X64-NEXT:    retq
    356 entry:
    357   %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
    358   %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
    359   %0 = bitcast <2 x i64> %__O to <16 x i8>
    360   %1 = bitcast i16 %__M to <16 x i1>
    361   %2 = select <16 x i1> %1, <16 x i8> %vecinit15.i.i, <16 x i8> %0
    362   %3 = bitcast <16 x i8> %2 to <2 x i64>
    363   ret <2 x i64> %3
    364 }
    365 
    366 define <2 x i64> @test_mm_maskz_set1_epi8(i16 zeroext %__M, i8 signext %__A)  {
    367 ; X86-LABEL: test_mm_maskz_set1_epi8:
    368 ; X86:       # %bb.0: # %entry
    369 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    370 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    371 ; X86-NEXT:    vpbroadcastb %eax, %xmm0 {%k1} {z}
    372 ; X86-NEXT:    retl
    373 ;
    374 ; X64-LABEL: test_mm_maskz_set1_epi8:
    375 ; X64:       # %bb.0: # %entry
    376 ; X64-NEXT:    kmovd %edi, %k1
    377 ; X64-NEXT:    vpbroadcastb %esi, %xmm0 {%k1} {z}
    378 ; X64-NEXT:    retq
    379 entry:
    380   %vecinit.i.i = insertelement <16 x i8> undef, i8 %__A, i32 0
    381   %vecinit15.i.i = shufflevector <16 x i8> %vecinit.i.i, <16 x i8> undef, <16 x i32> zeroinitializer
    382   %0 = bitcast i16 %__M to <16 x i1>
    383   %1 = select <16 x i1> %0, <16 x i8> %vecinit15.i.i, <16 x i8> zeroinitializer
    384   %2 = bitcast <16 x i8> %1 to <2 x i64>
    385   ret <2 x i64> %2
    386 }
    387 
    388 define <4 x i64> @test_mm256_mask_set1_epi8(<4 x i64> %__O, i32 %__M, i8 signext %__A){
    389 ; X86-LABEL: test_mm256_mask_set1_epi8:
    390 ; X86:       # %bb.0: # %entry
    391 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    392 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    393 ; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1}
    394 ; X86-NEXT:    retl
    395 ;
    396 ; X64-LABEL: test_mm256_mask_set1_epi8:
    397 ; X64:       # %bb.0: # %entry
    398 ; X64-NEXT:    kmovd %edi, %k1
    399 ; X64-NEXT:    vpbroadcastb %esi, %ymm0 {%k1}
    400 ; X64-NEXT:    retq
    401 entry:
    402   %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
    403   %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
    404   %0 = bitcast <4 x i64> %__O to <32 x i8>
    405   %1 = bitcast i32 %__M to <32 x i1>
    406   %2 = select <32 x i1> %1, <32 x i8> %vecinit31.i.i, <32 x i8> %0
    407   %3 = bitcast <32 x i8> %2 to <4 x i64>
    408   ret <4 x i64> %3
    409 }
    410 
    411 define <4 x i64> @test_mm256_maskz_set1_epi8(i32 %__M, i8 signext %__A)  {
    412 ; X86-LABEL: test_mm256_maskz_set1_epi8:
    413 ; X86:       # %bb.0: # %entry
    414 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    415 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    416 ; X86-NEXT:    vpbroadcastb %eax, %ymm0 {%k1} {z}
    417 ; X86-NEXT:    retl
    418 ;
    419 ; X64-LABEL: test_mm256_maskz_set1_epi8:
    420 ; X64:       # %bb.0: # %entry
    421 ; X64-NEXT:    kmovd %edi, %k1
    422 ; X64-NEXT:    vpbroadcastb %esi, %ymm0 {%k1} {z}
    423 ; X64-NEXT:    retq
    424 entry:
    425   %vecinit.i.i = insertelement <32 x i8> undef, i8 %__A, i32 0
    426   %vecinit31.i.i = shufflevector <32 x i8> %vecinit.i.i, <32 x i8> undef, <32 x i32> zeroinitializer
    427   %0 = bitcast i32 %__M to <32 x i1>
    428   %1 = select <32 x i1> %0, <32 x i8> %vecinit31.i.i, <32 x i8> zeroinitializer
    429   %2 = bitcast <32 x i8> %1 to <4 x i64>
    430   ret <4 x i64> %2
    431 }
    432 
    433 define <4 x i64> @test_mm256_mask_set1_epi16(<4 x i64> %__O, i16 zeroext %__M, i16 signext %__A)  {
    434 ; X86-LABEL: test_mm256_mask_set1_epi16:
    435 ; X86:       # %bb.0: # %entry
    436 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    437 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    438 ; X86-NEXT:    vpbroadcastw %eax, %ymm0 {%k1}
    439 ; X86-NEXT:    retl
    440 ;
    441 ; X64-LABEL: test_mm256_mask_set1_epi16:
    442 ; X64:       # %bb.0: # %entry
    443 ; X64-NEXT:    kmovd %edi, %k1
    444 ; X64-NEXT:    vpbroadcastw %esi, %ymm0 {%k1}
    445 ; X64-NEXT:    retq
    446 entry:
    447   %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
    448   %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
    449   %0 = bitcast <4 x i64> %__O to <16 x i16>
    450   %1 = bitcast i16 %__M to <16 x i1>
    451   %2 = select <16 x i1> %1, <16 x i16> %vecinit15.i.i, <16 x i16> %0
    452   %3 = bitcast <16 x i16> %2 to <4 x i64>
    453   ret <4 x i64> %3
    454 }
    455 
    456 define <4 x i64> @test_mm256_maskz_set1_epi16(i16 zeroext %__M, i16 signext %__A) {
    457 ; X86-LABEL: test_mm256_maskz_set1_epi16:
    458 ; X86:       # %bb.0: # %entry
    459 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    460 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    461 ; X86-NEXT:    vpbroadcastw %eax, %ymm0 {%k1} {z}
    462 ; X86-NEXT:    retl
    463 ;
    464 ; X64-LABEL: test_mm256_maskz_set1_epi16:
    465 ; X64:       # %bb.0: # %entry
    466 ; X64-NEXT:    kmovd %edi, %k1
    467 ; X64-NEXT:    vpbroadcastw %esi, %ymm0 {%k1} {z}
    468 ; X64-NEXT:    retq
    469 entry:
    470   %vecinit.i.i = insertelement <16 x i16> undef, i16 %__A, i32 0
    471   %vecinit15.i.i = shufflevector <16 x i16> %vecinit.i.i, <16 x i16> undef, <16 x i32> zeroinitializer
    472   %0 = bitcast i16 %__M to <16 x i1>
    473   %1 = select <16 x i1> %0, <16 x i16> %vecinit15.i.i, <16 x i16> zeroinitializer
    474   %2 = bitcast <16 x i16> %1 to <4 x i64>
    475   ret <4 x i64> %2
    476 }
    477 
    478 define <2 x i64> @test_mm_mask_set1_epi16(<2 x i64> %__O, i8 zeroext %__M, i16 signext %__A) {
    479 ; X86-LABEL: test_mm_mask_set1_epi16:
    480 ; X86:       # %bb.0: # %entry
    481 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    482 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    483 ; X86-NEXT:    kmovd %ecx, %k1
    484 ; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1}
    485 ; X86-NEXT:    retl
    486 ;
    487 ; X64-LABEL: test_mm_mask_set1_epi16:
    488 ; X64:       # %bb.0: # %entry
    489 ; X64-NEXT:    kmovd %edi, %k1
    490 ; X64-NEXT:    vpbroadcastw %esi, %xmm0 {%k1}
    491 ; X64-NEXT:    retq
    492 entry:
    493   %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
    494   %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
    495   %0 = bitcast <2 x i64> %__O to <8 x i16>
    496   %1 = bitcast i8 %__M to <8 x i1>
    497   %2 = select <8 x i1> %1, <8 x i16> %vecinit7.i.i, <8 x i16> %0
    498   %3 = bitcast <8 x i16> %2 to <2 x i64>
    499   ret <2 x i64> %3
    500 }
    501 
    502 define <2 x i64> @test_mm_maskz_set1_epi16(i8 zeroext %__M, i16 signext %__A) {
    503 ; X86-LABEL: test_mm_maskz_set1_epi16:
    504 ; X86:       # %bb.0: # %entry
    505 ; X86-NEXT:    movzwl {{[0-9]+}}(%esp), %eax
    506 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %cl
    507 ; X86-NEXT:    kmovd %ecx, %k1
    508 ; X86-NEXT:    vpbroadcastw %eax, %xmm0 {%k1} {z}
    509 ; X86-NEXT:    retl
    510 ;
    511 ; X64-LABEL: test_mm_maskz_set1_epi16:
    512 ; X64:       # %bb.0: # %entry
    513 ; X64-NEXT:    kmovd %edi, %k1
    514 ; X64-NEXT:    vpbroadcastw %esi, %xmm0 {%k1} {z}
    515 ; X64-NEXT:    retq
    516 entry:
    517   %vecinit.i.i = insertelement <8 x i16> undef, i16 %__A, i32 0
    518   %vecinit7.i.i = shufflevector <8 x i16> %vecinit.i.i, <8 x i16> undef, <8 x i32> zeroinitializer
    519   %0 = bitcast i8 %__M to <8 x i1>
    520   %1 = select <8 x i1> %0, <8 x i16> %vecinit7.i.i, <8 x i16> zeroinitializer
    521   %2 = bitcast <8 x i16> %1 to <2 x i64>
    522   ret <2 x i64> %2
    523 }
    524 
    525 
    526 define <2 x i64> @test_mm_broadcastb_epi8(<2 x i64> %a0) {
    527 ; CHECK-LABEL: test_mm_broadcastb_epi8:
    528 ; CHECK:       # %bb.0:
    529 ; CHECK-NEXT:    vpbroadcastb %xmm0, %xmm0
    530 ; CHECK-NEXT:    ret{{[l|q]}}
    531   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    532   %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <16 x i32> zeroinitializer
    533   %res1 = bitcast <16 x i8> %res0 to <2 x i64>
    534   ret <2 x i64> %res1
    535 }
    536 
    537 define <2 x i64> @test_mm_mask_broadcastb_epi8(<2 x i64> %a0, i16 %a1, <2 x i64> %a2) {
    538 ; X86-LABEL: test_mm_mask_broadcastb_epi8:
    539 ; X86:       # %bb.0:
    540 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    541 ; X86-NEXT:    vpbroadcastb %xmm1, %xmm0 {%k1}
    542 ; X86-NEXT:    retl
    543 ;
    544 ; X64-LABEL: test_mm_mask_broadcastb_epi8:
    545 ; X64:       # %bb.0:
    546 ; X64-NEXT:    kmovd %edi, %k1
    547 ; X64-NEXT:    vpbroadcastb %xmm1, %xmm0 {%k1}
    548 ; X64-NEXT:    retq
    549   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    550   %arg1 = bitcast i16 %a1 to <16 x i1>
    551   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
    552   %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <16 x i32> zeroinitializer
    553   %res1 = select <16 x i1> %arg1, <16 x i8> %res0, <16 x i8> %arg0
    554   %res2 = bitcast <16 x i8> %res1 to <2 x i64>
    555   ret <2 x i64> %res2
    556 }
    557 
    558 define <2 x i64> @test_mm_maskz_broadcastb_epi8(i16 %a0, <2 x i64> %a1) {
    559 ; X86-LABEL: test_mm_maskz_broadcastb_epi8:
    560 ; X86:       # %bb.0:
    561 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    562 ; X86-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z}
    563 ; X86-NEXT:    retl
    564 ;
    565 ; X64-LABEL: test_mm_maskz_broadcastb_epi8:
    566 ; X64:       # %bb.0:
    567 ; X64-NEXT:    kmovd %edi, %k1
    568 ; X64-NEXT:    vpbroadcastb %xmm0, %xmm0 {%k1} {z}
    569 ; X64-NEXT:    retq
    570   %arg0 = bitcast i16 %a0 to <16 x i1>
    571   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    572   %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <16 x i32> zeroinitializer
    573   %res1 = select <16 x i1> %arg0, <16 x i8> %res0, <16 x i8> zeroinitializer
    574   %res2 = bitcast <16 x i8> %res1 to <2 x i64>
    575   ret <2 x i64> %res2
    576 }
    577 
    578 define <4 x i64> @test_mm256_broadcastb_epi8(<2 x i64> %a0) {
    579 ; CHECK-LABEL: test_mm256_broadcastb_epi8:
    580 ; CHECK:       # %bb.0:
    581 ; CHECK-NEXT:    vpbroadcastb %xmm0, %ymm0
    582 ; CHECK-NEXT:    ret{{[l|q]}}
    583   %arg0 = bitcast <2 x i64> %a0 to <16 x i8>
    584   %res0 = shufflevector <16 x i8> %arg0, <16 x i8> undef, <32 x i32> zeroinitializer
    585   %res1 = bitcast <32 x i8> %res0 to <4 x i64>
    586   ret <4 x i64> %res1
    587 }
    588 
    589 define <4 x i64> @test_mm256_mask_broadcastb_epi8(<4 x i64> %a0, i32 %a1, <2 x i64> %a2) {
    590 ; X86-LABEL: test_mm256_mask_broadcastb_epi8:
    591 ; X86:       # %bb.0:
    592 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    593 ; X86-NEXT:    vpbroadcastb %xmm1, %ymm0 {%k1}
    594 ; X86-NEXT:    retl
    595 ;
    596 ; X64-LABEL: test_mm256_mask_broadcastb_epi8:
    597 ; X64:       # %bb.0:
    598 ; X64-NEXT:    kmovd %edi, %k1
    599 ; X64-NEXT:    vpbroadcastb %xmm1, %ymm0 {%k1}
    600 ; X64-NEXT:    retq
    601   %arg0 = bitcast <4 x i64> %a0 to <32 x i8>
    602   %arg1 = bitcast i32 %a1 to <32 x i1>
    603   %arg2 = bitcast <2 x i64> %a2 to <16 x i8>
    604   %res0 = shufflevector <16 x i8> %arg2, <16 x i8> undef, <32 x i32> zeroinitializer
    605   %res1 = select <32 x i1> %arg1, <32 x i8> %res0, <32 x i8> %arg0
    606   %res2 = bitcast <32 x i8> %res1 to <4 x i64>
    607   ret <4 x i64> %res2
    608 }
    609 
    610 define <4 x i64> @test_mm256_maskz_broadcastb_epi8(i32 %a0, <2 x i64> %a1) {
    611 ; X86-LABEL: test_mm256_maskz_broadcastb_epi8:
    612 ; X86:       # %bb.0:
    613 ; X86-NEXT:    kmovd {{[0-9]+}}(%esp), %k1
    614 ; X86-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z}
    615 ; X86-NEXT:    retl
    616 ;
    617 ; X64-LABEL: test_mm256_maskz_broadcastb_epi8:
    618 ; X64:       # %bb.0:
    619 ; X64-NEXT:    kmovd %edi, %k1
    620 ; X64-NEXT:    vpbroadcastb %xmm0, %ymm0 {%k1} {z}
    621 ; X64-NEXT:    retq
    622   %arg0 = bitcast i32 %a0 to <32 x i1>
    623   %arg1 = bitcast <2 x i64> %a1 to <16 x i8>
    624   %res0 = shufflevector <16 x i8> %arg1, <16 x i8> undef, <32 x i32> zeroinitializer
    625   %res1 = select <32 x i1> %arg0, <32 x i8> %res0, <32 x i8> zeroinitializer
    626   %res2 = bitcast <32 x i8> %res1 to <4 x i64>
    627   ret <4 x i64> %res2
    628 }
    629 
    630 define <2 x i64> @test_mm_broadcastw_epi16(<2 x i64> %a0) {
    631 ; CHECK-LABEL: test_mm_broadcastw_epi16:
    632 ; CHECK:       # %bb.0:
    633 ; CHECK-NEXT:    vpbroadcastw %xmm0, %xmm0
    634 ; CHECK-NEXT:    ret{{[l|q]}}
    635   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    636   %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <8 x i32> zeroinitializer
    637   %res1 = bitcast <8 x i16> %res0 to <2 x i64>
    638   ret <2 x i64> %res1
    639 }
    640 
    641 define <2 x i64> @test_mm_mask_broadcastw_epi16(<2 x i64> %a0, i8 %a1, <2 x i64> %a2) {
    642 ; X86-LABEL: test_mm_mask_broadcastw_epi16:
    643 ; X86:       # %bb.0:
    644 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    645 ; X86-NEXT:    kmovd %eax, %k1
    646 ; X86-NEXT:    vpbroadcastw %xmm1, %xmm0 {%k1}
    647 ; X86-NEXT:    retl
    648 ;
    649 ; X64-LABEL: test_mm_mask_broadcastw_epi16:
    650 ; X64:       # %bb.0:
    651 ; X64-NEXT:    kmovd %edi, %k1
    652 ; X64-NEXT:    vpbroadcastw %xmm1, %xmm0 {%k1}
    653 ; X64-NEXT:    retq
    654   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    655   %arg1 = bitcast i8 %a1 to <8 x i1>
    656   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
    657   %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <8 x i32> zeroinitializer
    658   %res1 = select <8 x i1> %arg1, <8 x i16> %res0, <8 x i16> %arg0
    659   %res2 = bitcast <8 x i16> %res1 to <2 x i64>
    660   ret <2 x i64> %res2
    661 }
    662 
    663 define <2 x i64> @test_mm_maskz_broadcastw_epi16(i8 %a0, <2 x i64> %a1) {
    664 ; X86-LABEL: test_mm_maskz_broadcastw_epi16:
    665 ; X86:       # %bb.0:
    666 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    667 ; X86-NEXT:    kmovd %eax, %k1
    668 ; X86-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z}
    669 ; X86-NEXT:    retl
    670 ;
    671 ; X64-LABEL: test_mm_maskz_broadcastw_epi16:
    672 ; X64:       # %bb.0:
    673 ; X64-NEXT:    kmovd %edi, %k1
    674 ; X64-NEXT:    vpbroadcastw %xmm0, %xmm0 {%k1} {z}
    675 ; X64-NEXT:    retq
    676   %arg0 = bitcast i8 %a0 to <8 x i1>
    677   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    678   %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <8 x i32> zeroinitializer
    679   %res1 = select <8 x i1> %arg0, <8 x i16> %res0, <8 x i16> zeroinitializer
    680   %res2 = bitcast <8 x i16> %res1 to <2 x i64>
    681   ret <2 x i64> %res2
    682 }
    683 
    684 define <4 x i64> @test_mm256_broadcastw_epi16(<2 x i64> %a0) {
    685 ; CHECK-LABEL: test_mm256_broadcastw_epi16:
    686 ; CHECK:       # %bb.0:
    687 ; CHECK-NEXT:    vpbroadcastw %xmm0, %ymm0
    688 ; CHECK-NEXT:    ret{{[l|q]}}
    689   %arg0 = bitcast <2 x i64> %a0 to <8 x i16>
    690   %res0 = shufflevector <8 x i16> %arg0, <8 x i16> undef, <16 x i32> zeroinitializer
    691   %res1 = bitcast <16 x i16> %res0 to <4 x i64>
    692   ret <4 x i64> %res1
    693 }
    694 
    695 define <4 x i64> @test_mm256_mask_broadcastw_epi16(<4 x i64> %a0, i16 %a1, <2 x i64> %a2) {
    696 ; X86-LABEL: test_mm256_mask_broadcastw_epi16:
    697 ; X86:       # %bb.0:
    698 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    699 ; X86-NEXT:    vpbroadcastw %xmm1, %ymm0 {%k1}
    700 ; X86-NEXT:    retl
    701 ;
    702 ; X64-LABEL: test_mm256_mask_broadcastw_epi16:
    703 ; X64:       # %bb.0:
    704 ; X64-NEXT:    kmovd %edi, %k1
    705 ; X64-NEXT:    vpbroadcastw %xmm1, %ymm0 {%k1}
    706 ; X64-NEXT:    retq
    707   %arg0 = bitcast <4 x i64> %a0 to <16 x i16>
    708   %arg1 = bitcast i16 %a1 to <16 x i1>
    709   %arg2 = bitcast <2 x i64> %a2 to <8 x i16>
    710   %res0 = shufflevector <8 x i16> %arg2, <8 x i16> undef, <16 x i32> zeroinitializer
    711   %res1 = select <16 x i1> %arg1, <16 x i16> %res0, <16 x i16> %arg0
    712   %res2 = bitcast <16 x i16> %res1 to <4 x i64>
    713   ret <4 x i64> %res2
    714 }
    715 
    716 define <4 x i64> @test_mm256_maskz_broadcastw_epi16(i16 %a0, <2 x i64> %a1) {
    717 ; X86-LABEL: test_mm256_maskz_broadcastw_epi16:
    718 ; X86:       # %bb.0:
    719 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    720 ; X86-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z}
    721 ; X86-NEXT:    retl
    722 ;
    723 ; X64-LABEL: test_mm256_maskz_broadcastw_epi16:
    724 ; X64:       # %bb.0:
    725 ; X64-NEXT:    kmovd %edi, %k1
    726 ; X64-NEXT:    vpbroadcastw %xmm0, %ymm0 {%k1} {z}
    727 ; X64-NEXT:    retq
    728   %arg0 = bitcast i16 %a0 to <16 x i1>
    729   %arg1 = bitcast <2 x i64> %a1 to <8 x i16>
    730   %res0 = shufflevector <8 x i16> %arg1, <8 x i16> undef, <16 x i32> zeroinitializer
    731   %res1 = select <16 x i1> %arg0, <16 x i16> %res0, <16 x i16> zeroinitializer
    732   %res2 = bitcast <16 x i16> %res1 to <4 x i64>
    733   ret <4 x i64> %res2
    734 }
    735 
    736 define <2 x i64> @test_mm_cvtepi16_epi8(<2 x i64> %__A) {
    737 ; CHECK-LABEL: test_mm_cvtepi16_epi8:
    738 ; CHECK:       # %bb.0: # %entry
    739 ; CHECK-NEXT:    vpshufb {{.*#+}} xmm0 = xmm0[0,2,4,6,8,10,12,14],zero,zero,zero,zero,zero,zero,zero,zero
    740 ; CHECK-NEXT:    ret{{[l|q]}}
    741 entry:
    742   %0 = bitcast <2 x i64> %__A to <8 x i16>
    743   %conv.i = trunc <8 x i16> %0 to <8 x i8>
    744   %shuf.i = shufflevector <8 x i8> %conv.i, <8 x i8> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    745   %1 = bitcast <16 x i8> %shuf.i to <2 x i64>
    746   ret <2 x i64> %1
    747 }
    748 
    749 define <2 x i64> @test_mm256_cvtepi16_epi8(<4 x i64> %__A) {
    750 ; CHECK-LABEL: test_mm256_cvtepi16_epi8:
    751 ; CHECK:       # %bb.0: # %entry
    752 ; CHECK-NEXT:    vpmovwb %ymm0, %xmm0
    753 ; CHECK-NEXT:    vzeroupper
    754 ; CHECK-NEXT:    ret{{[l|q]}}
    755 entry:
    756   %0 = bitcast <4 x i64> %__A to <16 x i16>
    757   %conv.i = trunc <16 x i16> %0 to <16 x i8>
    758   %1 = bitcast <16 x i8> %conv.i to <2 x i64>
    759   ret <2 x i64> %1
    760 }
    761 
    762 define <2 x i64> @test_mm256_mask_cvtepi16_epi8(<2 x i64> %__O, i16 zeroext %__M, <4 x i64> %__A) {
    763 ; X86-LABEL: test_mm256_mask_cvtepi16_epi8:
    764 ; X86:       # %bb.0: # %entry
    765 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    766 ; X86-NEXT:    vpmovwb %ymm1, %xmm0 {%k1}
    767 ; X86-NEXT:    vzeroupper
    768 ; X86-NEXT:    retl
    769 ;
    770 ; X64-LABEL: test_mm256_mask_cvtepi16_epi8:
    771 ; X64:       # %bb.0: # %entry
    772 ; X64-NEXT:    kmovd %edi, %k1
    773 ; X64-NEXT:    vpmovwb %ymm1, %xmm0 {%k1}
    774 ; X64-NEXT:    vzeroupper
    775 ; X64-NEXT:    retq
    776 entry:
    777   %0 = bitcast <4 x i64> %__A to <16 x i16>
    778   %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
    779   %1 = bitcast <2 x i64> %__O to <16 x i8>
    780   %2 = bitcast i16 %__M to <16 x i1>
    781   %3 = select <16 x i1> %2, <16 x i8> %conv.i.i, <16 x i8> %1
    782   %4 = bitcast <16 x i8> %3 to <2 x i64>
    783   ret <2 x i64> %4
    784 }
    785 
    786 define <2 x i64> @test_mm256_maskz_cvtepi16_epi8(i16 zeroext %__M, <4 x i64> %__A) {
    787 ; X86-LABEL: test_mm256_maskz_cvtepi16_epi8:
    788 ; X86:       # %bb.0: # %entry
    789 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    790 ; X86-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z}
    791 ; X86-NEXT:    vzeroupper
    792 ; X86-NEXT:    retl
    793 ;
    794 ; X64-LABEL: test_mm256_maskz_cvtepi16_epi8:
    795 ; X64:       # %bb.0: # %entry
    796 ; X64-NEXT:    kmovd %edi, %k1
    797 ; X64-NEXT:    vpmovwb %ymm0, %xmm0 {%k1} {z}
    798 ; X64-NEXT:    vzeroupper
    799 ; X64-NEXT:    retq
    800 entry:
    801   %0 = bitcast <4 x i64> %__A to <16 x i16>
    802   %conv.i.i = trunc <16 x i16> %0 to <16 x i8>
    803   %1 = bitcast i16 %__M to <16 x i1>
    804   %2 = select <16 x i1> %1, <16 x i8> %conv.i.i, <16 x i8> zeroinitializer
    805   %3 = bitcast <16 x i8> %2 to <2 x i64>
    806   ret <2 x i64> %3
    807 }
    808 
    809 define <2 x i64> @test_mm_mask2_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, i8 zeroext %__U, <2 x i64> %__B) {
    810 ; X86-LABEL: test_mm_mask2_permutex2var_epi16:
    811 ; X86:       # %bb.0: # %entry
    812 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    813 ; X86-NEXT:    kmovd %eax, %k1
    814 ; X86-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
    815 ; X86-NEXT:    vmovdqa %xmm1, %xmm0
    816 ; X86-NEXT:    retl
    817 ;
    818 ; X64-LABEL: test_mm_mask2_permutex2var_epi16:
    819 ; X64:       # %bb.0: # %entry
    820 ; X64-NEXT:    kmovd %edi, %k1
    821 ; X64-NEXT:    vpermi2w %xmm2, %xmm0, %xmm1 {%k1}
    822 ; X64-NEXT:    vmovdqa %xmm1, %xmm0
    823 ; X64-NEXT:    retq
    824 entry:
    825   %0 = bitcast <2 x i64> %__A to <8 x i16>
    826   %1 = bitcast <2 x i64> %__I to <8 x i16>
    827   %2 = bitcast <2 x i64> %__B to <8 x i16>
    828   %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
    829   %4 = bitcast i8 %__U to <8 x i1>
    830   %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %1
    831   %6 = bitcast <8 x i16> %5 to <2 x i64>
    832   ret <2 x i64> %6
    833 }
    834 
    835 define <4 x i64> @test_mm256_mask2_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, i16 zeroext %__U, <4 x i64> %__B) {
    836 ; X86-LABEL: test_mm256_mask2_permutex2var_epi16:
    837 ; X86:       # %bb.0: # %entry
    838 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    839 ; X86-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
    840 ; X86-NEXT:    vmovdqa %ymm1, %ymm0
    841 ; X86-NEXT:    retl
    842 ;
    843 ; X64-LABEL: test_mm256_mask2_permutex2var_epi16:
    844 ; X64:       # %bb.0: # %entry
    845 ; X64-NEXT:    kmovd %edi, %k1
    846 ; X64-NEXT:    vpermi2w %ymm2, %ymm0, %ymm1 {%k1}
    847 ; X64-NEXT:    vmovdqa %ymm1, %ymm0
    848 ; X64-NEXT:    retq
    849 entry:
    850   %0 = bitcast <4 x i64> %__A to <16 x i16>
    851   %1 = bitcast <4 x i64> %__I to <16 x i16>
    852   %2 = bitcast <4 x i64> %__B to <16 x i16>
    853   %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
    854   %4 = bitcast i16 %__U to <16 x i1>
    855   %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %1
    856   %6 = bitcast <16 x i16> %5 to <4 x i64>
    857   ret <4 x i64> %6
    858 }
    859 
    860 define <2 x i64> @test_mm_permutex2var_epi16(<2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
    861 ; CHECK-LABEL: test_mm_permutex2var_epi16:
    862 ; CHECK:       # %bb.0: # %entry
    863 ; CHECK-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0
    864 ; CHECK-NEXT:    ret{{[l|q]}}
    865 entry:
    866   %0 = bitcast <2 x i64> %__A to <8 x i16>
    867   %1 = bitcast <2 x i64> %__I to <8 x i16>
    868   %2 = bitcast <2 x i64> %__B to <8 x i16>
    869   %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
    870   %4 = bitcast <8 x i16> %3 to <2 x i64>
    871   ret <2 x i64> %4
    872 }
    873 
    874 define <2 x i64> @test_mm_mask_permutex2var_epi16(<2 x i64> %__A, i8 zeroext %__U, <2 x i64> %__I, <2 x i64> %__B) {
    875 ; X86-LABEL: test_mm_mask_permutex2var_epi16:
    876 ; X86:       # %bb.0: # %entry
    877 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    878 ; X86-NEXT:    kmovd %eax, %k1
    879 ; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
    880 ; X86-NEXT:    retl
    881 ;
    882 ; X64-LABEL: test_mm_mask_permutex2var_epi16:
    883 ; X64:       # %bb.0: # %entry
    884 ; X64-NEXT:    kmovd %edi, %k1
    885 ; X64-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1}
    886 ; X64-NEXT:    retq
    887 entry:
    888   %0 = bitcast <2 x i64> %__A to <8 x i16>
    889   %1 = bitcast <2 x i64> %__I to <8 x i16>
    890   %2 = bitcast <2 x i64> %__B to <8 x i16>
    891   %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
    892   %4 = bitcast i8 %__U to <8 x i1>
    893   %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> %0
    894   %6 = bitcast <8 x i16> %5 to <2 x i64>
    895   ret <2 x i64> %6
    896 }
    897 
    898 define <2 x i64> @test_mm_maskz_permutex2var_epi16(i8 zeroext %__U, <2 x i64> %__A, <2 x i64> %__I, <2 x i64> %__B) {
    899 ; X86-LABEL: test_mm_maskz_permutex2var_epi16:
    900 ; X86:       # %bb.0: # %entry
    901 ; X86-NEXT:    movb {{[0-9]+}}(%esp), %al
    902 ; X86-NEXT:    kmovd %eax, %k1
    903 ; X86-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
    904 ; X86-NEXT:    retl
    905 ;
    906 ; X64-LABEL: test_mm_maskz_permutex2var_epi16:
    907 ; X64:       # %bb.0: # %entry
    908 ; X64-NEXT:    kmovd %edi, %k1
    909 ; X64-NEXT:    vpermt2w %xmm2, %xmm1, %xmm0 {%k1} {z}
    910 ; X64-NEXT:    retq
    911 entry:
    912   %0 = bitcast <2 x i64> %__A to <8 x i16>
    913   %1 = bitcast <2 x i64> %__I to <8 x i16>
    914   %2 = bitcast <2 x i64> %__B to <8 x i16>
    915   %3 = tail call <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16> %0, <8 x i16> %1, <8 x i16> %2)
    916   %4 = bitcast i8 %__U to <8 x i1>
    917   %5 = select <8 x i1> %4, <8 x i16> %3, <8 x i16> zeroinitializer
    918   %6 = bitcast <8 x i16> %5 to <2 x i64>
    919   ret <2 x i64> %6
    920 }
    921 
    922 define <4 x i64> @test_mm256_permutex2var_epi16(<4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
    923 ; CHECK-LABEL: test_mm256_permutex2var_epi16:
    924 ; CHECK:       # %bb.0: # %entry
    925 ; CHECK-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0
    926 ; CHECK-NEXT:    ret{{[l|q]}}
    927 entry:
    928   %0 = bitcast <4 x i64> %__A to <16 x i16>
    929   %1 = bitcast <4 x i64> %__I to <16 x i16>
    930   %2 = bitcast <4 x i64> %__B to <16 x i16>
    931   %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
    932   %4 = bitcast <16 x i16> %3 to <4 x i64>
    933   ret <4 x i64> %4
    934 }
    935 
    936 define <4 x i64> @test_mm256_mask_permutex2var_epi16(<4 x i64> %__A, i16 zeroext %__U, <4 x i64> %__I, <4 x i64> %__B) {
    937 ; X86-LABEL: test_mm256_mask_permutex2var_epi16:
    938 ; X86:       # %bb.0: # %entry
    939 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    940 ; X86-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
    941 ; X86-NEXT:    retl
    942 ;
    943 ; X64-LABEL: test_mm256_mask_permutex2var_epi16:
    944 ; X64:       # %bb.0: # %entry
    945 ; X64-NEXT:    kmovd %edi, %k1
    946 ; X64-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1}
    947 ; X64-NEXT:    retq
    948 entry:
    949   %0 = bitcast <4 x i64> %__A to <16 x i16>
    950   %1 = bitcast <4 x i64> %__I to <16 x i16>
    951   %2 = bitcast <4 x i64> %__B to <16 x i16>
    952   %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
    953   %4 = bitcast i16 %__U to <16 x i1>
    954   %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> %0
    955   %6 = bitcast <16 x i16> %5 to <4 x i64>
    956   ret <4 x i64> %6
    957 }
    958 
    959 define <4 x i64> @test_mm256_maskz_permutex2var_epi16(i16 zeroext %__U, <4 x i64> %__A, <4 x i64> %__I, <4 x i64> %__B) {
    960 ; X86-LABEL: test_mm256_maskz_permutex2var_epi16:
    961 ; X86:       # %bb.0: # %entry
    962 ; X86-NEXT:    kmovw {{[0-9]+}}(%esp), %k1
    963 ; X86-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
    964 ; X86-NEXT:    retl
    965 ;
    966 ; X64-LABEL: test_mm256_maskz_permutex2var_epi16:
    967 ; X64:       # %bb.0: # %entry
    968 ; X64-NEXT:    kmovd %edi, %k1
    969 ; X64-NEXT:    vpermt2w %ymm2, %ymm1, %ymm0 {%k1} {z}
    970 ; X64-NEXT:    retq
    971 entry:
    972   %0 = bitcast <4 x i64> %__A to <16 x i16>
    973   %1 = bitcast <4 x i64> %__I to <16 x i16>
    974   %2 = bitcast <4 x i64> %__B to <16 x i16>
    975   %3 = tail call <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16> %0, <16 x i16> %1, <16 x i16> %2)
    976   %4 = bitcast i16 %__U to <16 x i1>
    977   %5 = select <16 x i1> %4, <16 x i16> %3, <16 x i16> zeroinitializer
    978   %6 = bitcast <16 x i16> %5 to <4 x i64>
    979   ret <4 x i64> %6
    980 }
    981 
    982 declare <8 x i16> @llvm.x86.avx512.vpermi2var.hi.128(<8 x i16>, <8 x i16>, <8 x i16>)
    983 declare <16 x i16> @llvm.x86.avx512.vpermi2var.hi.256(<16 x i16>, <16 x i16>, <16 x i16>)
    984 
    985 !0 = !{i32 1}
    986 
    987