Home | History | Annotate | Download | only in avx512-shuffles
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
      3 
      4 define <16 x i8> @test_i8_to_16(i8 %s) {
      5 ; CHECK-LABEL: test_i8_to_16:
      6 ; CHECK:       # %bb.0:
      7 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0
      8 ; CHECK-NEXT:    retq
      9   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     10   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     11   ret <16 x i8> %res
     12 }
     13 define <16 x i8> @test_masked_i8_to_16_mask0(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
     14 ; CHECK-LABEL: test_masked_i8_to_16_mask0:
     15 ; CHECK:       # %bb.0:
     16 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
     17 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
     18 ; CHECK-NEXT:    retq
     19   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     20   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     21   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     22   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
     23   ret <16 x i8> %res
     24 }
     25 
     26 define <16 x i8> @test_masked_z_i8_to_16_mask0(i8 %s, <16 x i8> %mask) {
     27 ; CHECK-LABEL: test_masked_z_i8_to_16_mask0:
     28 ; CHECK:       # %bb.0:
     29 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
     30 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
     31 ; CHECK-NEXT:    retq
     32   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     33   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     34   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     35   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
     36   ret <16 x i8> %res
     37 }
     38 define <16 x i8> @test_masked_i8_to_16_mask1(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
     39 ; CHECK-LABEL: test_masked_i8_to_16_mask1:
     40 ; CHECK:       # %bb.0:
     41 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
     42 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
     43 ; CHECK-NEXT:    retq
     44   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     45   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     46   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     47   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
     48   ret <16 x i8> %res
     49 }
     50 
     51 define <16 x i8> @test_masked_z_i8_to_16_mask1(i8 %s, <16 x i8> %mask) {
     52 ; CHECK-LABEL: test_masked_z_i8_to_16_mask1:
     53 ; CHECK:       # %bb.0:
     54 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
     55 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
     56 ; CHECK-NEXT:    retq
     57   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     58   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     59   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     60   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
     61   ret <16 x i8> %res
     62 }
     63 define <16 x i8> @test_masked_i8_to_16_mask2(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
     64 ; CHECK-LABEL: test_masked_i8_to_16_mask2:
     65 ; CHECK:       # %bb.0:
     66 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
     67 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
     68 ; CHECK-NEXT:    retq
     69   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     70   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     71   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     72   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
     73   ret <16 x i8> %res
     74 }
     75 
     76 define <16 x i8> @test_masked_z_i8_to_16_mask2(i8 %s, <16 x i8> %mask) {
     77 ; CHECK-LABEL: test_masked_z_i8_to_16_mask2:
     78 ; CHECK:       # %bb.0:
     79 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
     80 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
     81 ; CHECK-NEXT:    retq
     82   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     83   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     84   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     85   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
     86   ret <16 x i8> %res
     87 }
     88 define <16 x i8> @test_masked_i8_to_16_mask3(i8 %s, <16 x i8> %default, <16 x i8> %mask) {
     89 ; CHECK-LABEL: test_masked_i8_to_16_mask3:
     90 ; CHECK:       # %bb.0:
     91 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
     92 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1}
     93 ; CHECK-NEXT:    retq
     94   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
     95   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     96   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
     97   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
     98   ret <16 x i8> %res
     99 }
    100 
    101 define <16 x i8> @test_masked_z_i8_to_16_mask3(i8 %s, <16 x i8> %mask) {
    102 ; CHECK-LABEL: test_masked_z_i8_to_16_mask3:
    103 ; CHECK:       # %bb.0:
    104 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
    105 ; CHECK-NEXT:    vpbroadcastb %edi, %xmm0 {%k1} {z}
    106 ; CHECK-NEXT:    retq
    107   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    108   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    109   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
    110   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
    111   ret <16 x i8> %res
    112 }
    113 define <32 x i8> @test_i8_to_32(i8 %s) {
    114 ; CHECK-LABEL: test_i8_to_32:
    115 ; CHECK:       # %bb.0:
    116 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0
    117 ; CHECK-NEXT:    retq
    118   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    119   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    120   ret <32 x i8> %res
    121 }
    122 define <32 x i8> @test_masked_i8_to_32_mask0(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
    123 ; CHECK-LABEL: test_masked_i8_to_32_mask0:
    124 ; CHECK:       # %bb.0:
    125 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
    126 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
    127 ; CHECK-NEXT:    retq
    128   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    129   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    130   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    131   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
    132   ret <32 x i8> %res
    133 }
    134 
    135 define <32 x i8> @test_masked_z_i8_to_32_mask0(i8 %s, <32 x i8> %mask) {
    136 ; CHECK-LABEL: test_masked_z_i8_to_32_mask0:
    137 ; CHECK:       # %bb.0:
    138 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
    139 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
    140 ; CHECK-NEXT:    retq
    141   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    142   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    143   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    144   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
    145   ret <32 x i8> %res
    146 }
    147 define <32 x i8> @test_masked_i8_to_32_mask1(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
    148 ; CHECK-LABEL: test_masked_i8_to_32_mask1:
    149 ; CHECK:       # %bb.0:
    150 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
    151 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
    152 ; CHECK-NEXT:    retq
    153   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    154   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    155   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    156   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
    157   ret <32 x i8> %res
    158 }
    159 
    160 define <32 x i8> @test_masked_z_i8_to_32_mask1(i8 %s, <32 x i8> %mask) {
    161 ; CHECK-LABEL: test_masked_z_i8_to_32_mask1:
    162 ; CHECK:       # %bb.0:
    163 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
    164 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
    165 ; CHECK-NEXT:    retq
    166   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    167   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    168   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    169   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
    170   ret <32 x i8> %res
    171 }
    172 define <32 x i8> @test_masked_i8_to_32_mask2(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
    173 ; CHECK-LABEL: test_masked_i8_to_32_mask2:
    174 ; CHECK:       # %bb.0:
    175 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
    176 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
    177 ; CHECK-NEXT:    retq
    178   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    179   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    180   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    181   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
    182   ret <32 x i8> %res
    183 }
    184 
    185 define <32 x i8> @test_masked_z_i8_to_32_mask2(i8 %s, <32 x i8> %mask) {
    186 ; CHECK-LABEL: test_masked_z_i8_to_32_mask2:
    187 ; CHECK:       # %bb.0:
    188 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
    189 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
    190 ; CHECK-NEXT:    retq
    191   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    192   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    193   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    194   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
    195   ret <32 x i8> %res
    196 }
    197 define <32 x i8> @test_masked_i8_to_32_mask3(i8 %s, <32 x i8> %default, <32 x i8> %mask) {
    198 ; CHECK-LABEL: test_masked_i8_to_32_mask3:
    199 ; CHECK:       # %bb.0:
    200 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
    201 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1}
    202 ; CHECK-NEXT:    retq
    203   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    204   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    205   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    206   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
    207   ret <32 x i8> %res
    208 }
    209 
    210 define <32 x i8> @test_masked_z_i8_to_32_mask3(i8 %s, <32 x i8> %mask) {
    211 ; CHECK-LABEL: test_masked_z_i8_to_32_mask3:
    212 ; CHECK:       # %bb.0:
    213 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
    214 ; CHECK-NEXT:    vpbroadcastb %edi, %ymm0 {%k1} {z}
    215 ; CHECK-NEXT:    retq
    216   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    217   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    218   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
    219   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
    220   ret <32 x i8> %res
    221 }
    222 define <64 x i8> @test_i8_to_64(i8 %s) {
    223 ; CHECK-LABEL: test_i8_to_64:
    224 ; CHECK:       # %bb.0:
    225 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0
    226 ; CHECK-NEXT:    retq
    227   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    228   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    229   ret <64 x i8> %res
    230 }
    231 define <64 x i8> @test_masked_i8_to_64_mask0(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
    232 ; CHECK-LABEL: test_masked_i8_to_64_mask0:
    233 ; CHECK:       # %bb.0:
    234 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
    235 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
    236 ; CHECK-NEXT:    retq
    237   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    238   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    239   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    240   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
    241   ret <64 x i8> %res
    242 }
    243 
    244 define <64 x i8> @test_masked_z_i8_to_64_mask0(i8 %s, <64 x i8> %mask) {
    245 ; CHECK-LABEL: test_masked_z_i8_to_64_mask0:
    246 ; CHECK:       # %bb.0:
    247 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
    248 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
    249 ; CHECK-NEXT:    retq
    250   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    251   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    252   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    253   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
    254   ret <64 x i8> %res
    255 }
    256 define <64 x i8> @test_masked_i8_to_64_mask1(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
    257 ; CHECK-LABEL: test_masked_i8_to_64_mask1:
    258 ; CHECK:       # %bb.0:
    259 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
    260 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
    261 ; CHECK-NEXT:    retq
    262   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    263   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    264   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    265   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
    266   ret <64 x i8> %res
    267 }
    268 
    269 define <64 x i8> @test_masked_z_i8_to_64_mask1(i8 %s, <64 x i8> %mask) {
    270 ; CHECK-LABEL: test_masked_z_i8_to_64_mask1:
    271 ; CHECK:       # %bb.0:
    272 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
    273 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
    274 ; CHECK-NEXT:    retq
    275   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    276   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    277   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    278   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
    279   ret <64 x i8> %res
    280 }
    281 define <64 x i8> @test_masked_i8_to_64_mask2(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
    282 ; CHECK-LABEL: test_masked_i8_to_64_mask2:
    283 ; CHECK:       # %bb.0:
    284 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
    285 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
    286 ; CHECK-NEXT:    retq
    287   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    288   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    289   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    290   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
    291   ret <64 x i8> %res
    292 }
    293 
    294 define <64 x i8> @test_masked_z_i8_to_64_mask2(i8 %s, <64 x i8> %mask) {
    295 ; CHECK-LABEL: test_masked_z_i8_to_64_mask2:
    296 ; CHECK:       # %bb.0:
    297 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
    298 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
    299 ; CHECK-NEXT:    retq
    300   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    301   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    302   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    303   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
    304   ret <64 x i8> %res
    305 }
    306 define <64 x i8> @test_masked_i8_to_64_mask3(i8 %s, <64 x i8> %default, <64 x i8> %mask) {
    307 ; CHECK-LABEL: test_masked_i8_to_64_mask3:
    308 ; CHECK:       # %bb.0:
    309 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
    310 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1}
    311 ; CHECK-NEXT:    retq
    312   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    313   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    314   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    315   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
    316   ret <64 x i8> %res
    317 }
    318 
    319 define <64 x i8> @test_masked_z_i8_to_64_mask3(i8 %s, <64 x i8> %mask) {
    320 ; CHECK-LABEL: test_masked_z_i8_to_64_mask3:
    321 ; CHECK:       # %bb.0:
    322 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
    323 ; CHECK-NEXT:    vpbroadcastb %edi, %zmm0 {%k1} {z}
    324 ; CHECK-NEXT:    retq
    325   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
    326   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    327   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
    328   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
    329   ret <64 x i8> %res
    330 }
    331 define <8 x i16> @test_i16_to_8(i16 %s) {
    332 ; CHECK-LABEL: test_i16_to_8:
    333 ; CHECK:       # %bb.0:
    334 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0
    335 ; CHECK-NEXT:    retq
    336   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    337   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    338   ret <8 x i16> %res
    339 }
    340 define <8 x i16> @test_masked_i16_to_8_mask0(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
    341 ; CHECK-LABEL: test_masked_i16_to_8_mask0:
    342 ; CHECK:       # %bb.0:
    343 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
    344 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
    345 ; CHECK-NEXT:    retq
    346   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    347   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    348   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    349   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
    350   ret <8 x i16> %res
    351 }
    352 
    353 define <8 x i16> @test_masked_z_i16_to_8_mask0(i16 %s, <8 x i16> %mask) {
    354 ; CHECK-LABEL: test_masked_z_i16_to_8_mask0:
    355 ; CHECK:       # %bb.0:
    356 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
    357 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
    358 ; CHECK-NEXT:    retq
    359   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    360   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    361   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    362   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
    363   ret <8 x i16> %res
    364 }
    365 define <8 x i16> @test_masked_i16_to_8_mask1(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
    366 ; CHECK-LABEL: test_masked_i16_to_8_mask1:
    367 ; CHECK:       # %bb.0:
    368 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
    369 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
    370 ; CHECK-NEXT:    retq
    371   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    372   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    373   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    374   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
    375   ret <8 x i16> %res
    376 }
    377 
    378 define <8 x i16> @test_masked_z_i16_to_8_mask1(i16 %s, <8 x i16> %mask) {
    379 ; CHECK-LABEL: test_masked_z_i16_to_8_mask1:
    380 ; CHECK:       # %bb.0:
    381 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
    382 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
    383 ; CHECK-NEXT:    retq
    384   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    385   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    386   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    387   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
    388   ret <8 x i16> %res
    389 }
    390 define <8 x i16> @test_masked_i16_to_8_mask2(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
    391 ; CHECK-LABEL: test_masked_i16_to_8_mask2:
    392 ; CHECK:       # %bb.0:
    393 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
    394 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
    395 ; CHECK-NEXT:    retq
    396   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    397   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    398   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    399   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
    400   ret <8 x i16> %res
    401 }
    402 
    403 define <8 x i16> @test_masked_z_i16_to_8_mask2(i16 %s, <8 x i16> %mask) {
    404 ; CHECK-LABEL: test_masked_z_i16_to_8_mask2:
    405 ; CHECK:       # %bb.0:
    406 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
    407 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
    408 ; CHECK-NEXT:    retq
    409   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    410   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    411   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    412   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
    413   ret <8 x i16> %res
    414 }
    415 define <8 x i16> @test_masked_i16_to_8_mask3(i16 %s, <8 x i16> %default, <8 x i16> %mask) {
    416 ; CHECK-LABEL: test_masked_i16_to_8_mask3:
    417 ; CHECK:       # %bb.0:
    418 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
    419 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1}
    420 ; CHECK-NEXT:    retq
    421   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    422   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    423   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    424   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
    425   ret <8 x i16> %res
    426 }
    427 
    428 define <8 x i16> @test_masked_z_i16_to_8_mask3(i16 %s, <8 x i16> %mask) {
    429 ; CHECK-LABEL: test_masked_z_i16_to_8_mask3:
    430 ; CHECK:       # %bb.0:
    431 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
    432 ; CHECK-NEXT:    vpbroadcastw %edi, %xmm0 {%k1} {z}
    433 ; CHECK-NEXT:    retq
    434   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    435   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    436   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
    437   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
    438   ret <8 x i16> %res
    439 }
    440 define <16 x i16> @test_i16_to_16(i16 %s) {
    441 ; CHECK-LABEL: test_i16_to_16:
    442 ; CHECK:       # %bb.0:
    443 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0
    444 ; CHECK-NEXT:    retq
    445   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    446   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    447   ret <16 x i16> %res
    448 }
    449 define <16 x i16> @test_masked_i16_to_16_mask0(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
    450 ; CHECK-LABEL: test_masked_i16_to_16_mask0:
    451 ; CHECK:       # %bb.0:
    452 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
    453 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
    454 ; CHECK-NEXT:    retq
    455   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    456   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    457   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    458   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
    459   ret <16 x i16> %res
    460 }
    461 
    462 define <16 x i16> @test_masked_z_i16_to_16_mask0(i16 %s, <16 x i16> %mask) {
    463 ; CHECK-LABEL: test_masked_z_i16_to_16_mask0:
    464 ; CHECK:       # %bb.0:
    465 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
    466 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
    467 ; CHECK-NEXT:    retq
    468   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    469   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    470   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    471   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
    472   ret <16 x i16> %res
    473 }
    474 define <16 x i16> @test_masked_i16_to_16_mask1(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
    475 ; CHECK-LABEL: test_masked_i16_to_16_mask1:
    476 ; CHECK:       # %bb.0:
    477 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
    478 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
    479 ; CHECK-NEXT:    retq
    480   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    481   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    482   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    483   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
    484   ret <16 x i16> %res
    485 }
    486 
    487 define <16 x i16> @test_masked_z_i16_to_16_mask1(i16 %s, <16 x i16> %mask) {
    488 ; CHECK-LABEL: test_masked_z_i16_to_16_mask1:
    489 ; CHECK:       # %bb.0:
    490 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
    491 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
    492 ; CHECK-NEXT:    retq
    493   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    494   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    495   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    496   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
    497   ret <16 x i16> %res
    498 }
    499 define <16 x i16> @test_masked_i16_to_16_mask2(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
    500 ; CHECK-LABEL: test_masked_i16_to_16_mask2:
    501 ; CHECK:       # %bb.0:
    502 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
    503 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
    504 ; CHECK-NEXT:    retq
    505   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    506   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    507   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    508   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
    509   ret <16 x i16> %res
    510 }
    511 
    512 define <16 x i16> @test_masked_z_i16_to_16_mask2(i16 %s, <16 x i16> %mask) {
    513 ; CHECK-LABEL: test_masked_z_i16_to_16_mask2:
    514 ; CHECK:       # %bb.0:
    515 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
    516 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
    517 ; CHECK-NEXT:    retq
    518   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    519   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    520   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    521   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
    522   ret <16 x i16> %res
    523 }
    524 define <16 x i16> @test_masked_i16_to_16_mask3(i16 %s, <16 x i16> %default, <16 x i16> %mask) {
    525 ; CHECK-LABEL: test_masked_i16_to_16_mask3:
    526 ; CHECK:       # %bb.0:
    527 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
    528 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1}
    529 ; CHECK-NEXT:    retq
    530   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    531   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    532   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    533   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
    534   ret <16 x i16> %res
    535 }
    536 
    537 define <16 x i16> @test_masked_z_i16_to_16_mask3(i16 %s, <16 x i16> %mask) {
    538 ; CHECK-LABEL: test_masked_z_i16_to_16_mask3:
    539 ; CHECK:       # %bb.0:
    540 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
    541 ; CHECK-NEXT:    vpbroadcastw %edi, %ymm0 {%k1} {z}
    542 ; CHECK-NEXT:    retq
    543   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    544   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    545   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
    546   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
    547   ret <16 x i16> %res
    548 }
    549 define <32 x i16> @test_i16_to_32(i16 %s) {
    550 ; CHECK-LABEL: test_i16_to_32:
    551 ; CHECK:       # %bb.0:
    552 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0
    553 ; CHECK-NEXT:    retq
    554   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    555   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    556   ret <32 x i16> %res
    557 }
    558 define <32 x i16> @test_masked_i16_to_32_mask0(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
    559 ; CHECK-LABEL: test_masked_i16_to_32_mask0:
    560 ; CHECK:       # %bb.0:
    561 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
    562 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
    563 ; CHECK-NEXT:    retq
    564   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    565   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    566   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    567   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
    568   ret <32 x i16> %res
    569 }
    570 
    571 define <32 x i16> @test_masked_z_i16_to_32_mask0(i16 %s, <32 x i16> %mask) {
    572 ; CHECK-LABEL: test_masked_z_i16_to_32_mask0:
    573 ; CHECK:       # %bb.0:
    574 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
    575 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
    576 ; CHECK-NEXT:    retq
    577   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    578   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    579   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    580   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
    581   ret <32 x i16> %res
    582 }
    583 define <32 x i16> @test_masked_i16_to_32_mask1(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
    584 ; CHECK-LABEL: test_masked_i16_to_32_mask1:
    585 ; CHECK:       # %bb.0:
    586 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
    587 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
    588 ; CHECK-NEXT:    retq
    589   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    590   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    591   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    592   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
    593   ret <32 x i16> %res
    594 }
    595 
    596 define <32 x i16> @test_masked_z_i16_to_32_mask1(i16 %s, <32 x i16> %mask) {
    597 ; CHECK-LABEL: test_masked_z_i16_to_32_mask1:
    598 ; CHECK:       # %bb.0:
    599 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
    600 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
    601 ; CHECK-NEXT:    retq
    602   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    603   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    604   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    605   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
    606   ret <32 x i16> %res
    607 }
    608 define <32 x i16> @test_masked_i16_to_32_mask2(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
    609 ; CHECK-LABEL: test_masked_i16_to_32_mask2:
    610 ; CHECK:       # %bb.0:
    611 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
    612 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
    613 ; CHECK-NEXT:    retq
    614   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    615   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    616   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    617   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
    618   ret <32 x i16> %res
    619 }
    620 
    621 define <32 x i16> @test_masked_z_i16_to_32_mask2(i16 %s, <32 x i16> %mask) {
    622 ; CHECK-LABEL: test_masked_z_i16_to_32_mask2:
    623 ; CHECK:       # %bb.0:
    624 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
    625 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
    626 ; CHECK-NEXT:    retq
    627   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    628   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    629   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    630   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
    631   ret <32 x i16> %res
    632 }
    633 define <32 x i16> @test_masked_i16_to_32_mask3(i16 %s, <32 x i16> %default, <32 x i16> %mask) {
    634 ; CHECK-LABEL: test_masked_i16_to_32_mask3:
    635 ; CHECK:       # %bb.0:
    636 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
    637 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1}
    638 ; CHECK-NEXT:    retq
    639   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    640   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    641   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    642   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
    643   ret <32 x i16> %res
    644 }
    645 
    646 define <32 x i16> @test_masked_z_i16_to_32_mask3(i16 %s, <32 x i16> %mask) {
    647 ; CHECK-LABEL: test_masked_z_i16_to_32_mask3:
    648 ; CHECK:       # %bb.0:
    649 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
    650 ; CHECK-NEXT:    vpbroadcastw %edi, %zmm0 {%k1} {z}
    651 ; CHECK-NEXT:    retq
    652   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
    653   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    654   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
    655   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
    656   ret <32 x i16> %res
    657 }
    658 define <4 x i32> @test_i32_to_4(i32 %s) {
    659 ; CHECK-LABEL: test_i32_to_4:
    660 ; CHECK:       # %bb.0:
    661 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0
    662 ; CHECK-NEXT:    retq
    663   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    664   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    665   ret <4 x i32> %res
    666 }
    667 define <4 x i32> @test_masked_i32_to_4_mask0(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
    668 ; CHECK-LABEL: test_masked_i32_to_4_mask0:
    669 ; CHECK:       # %bb.0:
    670 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    671 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
    672 ; CHECK-NEXT:    retq
    673   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    674   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    675   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    676   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
    677   ret <4 x i32> %res
    678 }
    679 
    680 define <4 x i32> @test_masked_z_i32_to_4_mask0(i32 %s, <4 x i32> %mask) {
    681 ; CHECK-LABEL: test_masked_z_i32_to_4_mask0:
    682 ; CHECK:       # %bb.0:
    683 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    684 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
    685 ; CHECK-NEXT:    retq
    686   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    687   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    688   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    689   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
    690   ret <4 x i32> %res
    691 }
    692 define <4 x i32> @test_masked_i32_to_4_mask1(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
    693 ; CHECK-LABEL: test_masked_i32_to_4_mask1:
    694 ; CHECK:       # %bb.0:
    695 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    696 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
    697 ; CHECK-NEXT:    retq
    698   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    699   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    700   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    701   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
    702   ret <4 x i32> %res
    703 }
    704 
    705 define <4 x i32> @test_masked_z_i32_to_4_mask1(i32 %s, <4 x i32> %mask) {
    706 ; CHECK-LABEL: test_masked_z_i32_to_4_mask1:
    707 ; CHECK:       # %bb.0:
    708 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    709 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
    710 ; CHECK-NEXT:    retq
    711   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    712   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    713   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    714   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
    715   ret <4 x i32> %res
    716 }
    717 define <4 x i32> @test_masked_i32_to_4_mask2(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
    718 ; CHECK-LABEL: test_masked_i32_to_4_mask2:
    719 ; CHECK:       # %bb.0:
    720 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    721 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
    722 ; CHECK-NEXT:    retq
    723   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    724   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    725   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    726   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
    727   ret <4 x i32> %res
    728 }
    729 
    730 define <4 x i32> @test_masked_z_i32_to_4_mask2(i32 %s, <4 x i32> %mask) {
    731 ; CHECK-LABEL: test_masked_z_i32_to_4_mask2:
    732 ; CHECK:       # %bb.0:
    733 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    734 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
    735 ; CHECK-NEXT:    retq
    736   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    737   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    738   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    739   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
    740   ret <4 x i32> %res
    741 }
    742 define <4 x i32> @test_masked_i32_to_4_mask3(i32 %s, <4 x i32> %default, <4 x i32> %mask) {
    743 ; CHECK-LABEL: test_masked_i32_to_4_mask3:
    744 ; CHECK:       # %bb.0:
    745 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
    746 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1}
    747 ; CHECK-NEXT:    retq
    748   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    749   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    750   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    751   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
    752   ret <4 x i32> %res
    753 }
    754 
    755 define <4 x i32> @test_masked_z_i32_to_4_mask3(i32 %s, <4 x i32> %mask) {
    756 ; CHECK-LABEL: test_masked_z_i32_to_4_mask3:
    757 ; CHECK:       # %bb.0:
    758 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
    759 ; CHECK-NEXT:    vpbroadcastd %edi, %xmm0 {%k1} {z}
    760 ; CHECK-NEXT:    retq
    761   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    762   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    763   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
    764   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
    765   ret <4 x i32> %res
    766 }
    767 define <8 x i32> @test_i32_to_8(i32 %s) {
    768 ; CHECK-LABEL: test_i32_to_8:
    769 ; CHECK:       # %bb.0:
    770 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0
    771 ; CHECK-NEXT:    retq
    772   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    773   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    774   ret <8 x i32> %res
    775 }
    776 define <8 x i32> @test_masked_i32_to_8_mask0(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
    777 ; CHECK-LABEL: test_masked_i32_to_8_mask0:
    778 ; CHECK:       # %bb.0:
    779 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    780 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
    781 ; CHECK-NEXT:    retq
    782   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    783   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    784   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    785   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
    786   ret <8 x i32> %res
    787 }
    788 
    789 define <8 x i32> @test_masked_z_i32_to_8_mask0(i32 %s, <8 x i32> %mask) {
    790 ; CHECK-LABEL: test_masked_z_i32_to_8_mask0:
    791 ; CHECK:       # %bb.0:
    792 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    793 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
    794 ; CHECK-NEXT:    retq
    795   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    796   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    797   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    798   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
    799   ret <8 x i32> %res
    800 }
    801 define <8 x i32> @test_masked_i32_to_8_mask1(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
    802 ; CHECK-LABEL: test_masked_i32_to_8_mask1:
    803 ; CHECK:       # %bb.0:
    804 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    805 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
    806 ; CHECK-NEXT:    retq
    807   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    808   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    809   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    810   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
    811   ret <8 x i32> %res
    812 }
    813 
    814 define <8 x i32> @test_masked_z_i32_to_8_mask1(i32 %s, <8 x i32> %mask) {
    815 ; CHECK-LABEL: test_masked_z_i32_to_8_mask1:
    816 ; CHECK:       # %bb.0:
    817 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    818 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
    819 ; CHECK-NEXT:    retq
    820   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    821   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    822   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    823   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
    824   ret <8 x i32> %res
    825 }
    826 define <8 x i32> @test_masked_i32_to_8_mask2(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
    827 ; CHECK-LABEL: test_masked_i32_to_8_mask2:
    828 ; CHECK:       # %bb.0:
    829 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    830 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
    831 ; CHECK-NEXT:    retq
    832   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    833   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    834   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    835   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
    836   ret <8 x i32> %res
    837 }
    838 
    839 define <8 x i32> @test_masked_z_i32_to_8_mask2(i32 %s, <8 x i32> %mask) {
    840 ; CHECK-LABEL: test_masked_z_i32_to_8_mask2:
    841 ; CHECK:       # %bb.0:
    842 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    843 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
    844 ; CHECK-NEXT:    retq
    845   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    846   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    847   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    848   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
    849   ret <8 x i32> %res
    850 }
    851 define <8 x i32> @test_masked_i32_to_8_mask3(i32 %s, <8 x i32> %default, <8 x i32> %mask) {
    852 ; CHECK-LABEL: test_masked_i32_to_8_mask3:
    853 ; CHECK:       # %bb.0:
    854 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
    855 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1}
    856 ; CHECK-NEXT:    retq
    857   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    858   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    859   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    860   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
    861   ret <8 x i32> %res
    862 }
    863 
    864 define <8 x i32> @test_masked_z_i32_to_8_mask3(i32 %s, <8 x i32> %mask) {
    865 ; CHECK-LABEL: test_masked_z_i32_to_8_mask3:
    866 ; CHECK:       # %bb.0:
    867 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
    868 ; CHECK-NEXT:    vpbroadcastd %edi, %ymm0 {%k1} {z}
    869 ; CHECK-NEXT:    retq
    870   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    871   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    872   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
    873   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
    874   ret <8 x i32> %res
    875 }
    876 define <16 x i32> @test_i32_to_16(i32 %s) {
    877 ; CHECK-LABEL: test_i32_to_16:
    878 ; CHECK:       # %bb.0:
    879 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0
    880 ; CHECK-NEXT:    retq
    881   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    882   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    883   ret <16 x i32> %res
    884 }
    885 define <16 x i32> @test_masked_i32_to_16_mask0(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
    886 ; CHECK-LABEL: test_masked_i32_to_16_mask0:
    887 ; CHECK:       # %bb.0:
    888 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    889 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
    890 ; CHECK-NEXT:    retq
    891   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    892   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    893   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    894   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
    895   ret <16 x i32> %res
    896 }
    897 
    898 define <16 x i32> @test_masked_z_i32_to_16_mask0(i32 %s, <16 x i32> %mask) {
    899 ; CHECK-LABEL: test_masked_z_i32_to_16_mask0:
    900 ; CHECK:       # %bb.0:
    901 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    902 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
    903 ; CHECK-NEXT:    retq
    904   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    905   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    906   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    907   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
    908   ret <16 x i32> %res
    909 }
    910 define <16 x i32> @test_masked_i32_to_16_mask1(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
    911 ; CHECK-LABEL: test_masked_i32_to_16_mask1:
    912 ; CHECK:       # %bb.0:
    913 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    914 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
    915 ; CHECK-NEXT:    retq
    916   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    917   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    918   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    919   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
    920   ret <16 x i32> %res
    921 }
    922 
    923 define <16 x i32> @test_masked_z_i32_to_16_mask1(i32 %s, <16 x i32> %mask) {
    924 ; CHECK-LABEL: test_masked_z_i32_to_16_mask1:
    925 ; CHECK:       # %bb.0:
    926 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    927 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
    928 ; CHECK-NEXT:    retq
    929   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    930   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    931   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    932   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
    933   ret <16 x i32> %res
    934 }
    935 define <16 x i32> @test_masked_i32_to_16_mask2(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
    936 ; CHECK-LABEL: test_masked_i32_to_16_mask2:
    937 ; CHECK:       # %bb.0:
    938 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    939 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
    940 ; CHECK-NEXT:    retq
    941   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    942   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    943   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    944   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
    945   ret <16 x i32> %res
    946 }
    947 
    948 define <16 x i32> @test_masked_z_i32_to_16_mask2(i32 %s, <16 x i32> %mask) {
    949 ; CHECK-LABEL: test_masked_z_i32_to_16_mask2:
    950 ; CHECK:       # %bb.0:
    951 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    952 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
    953 ; CHECK-NEXT:    retq
    954   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    955   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    956   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    957   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
    958   ret <16 x i32> %res
    959 }
    960 define <16 x i32> @test_masked_i32_to_16_mask3(i32 %s, <16 x i32> %default, <16 x i32> %mask) {
    961 ; CHECK-LABEL: test_masked_i32_to_16_mask3:
    962 ; CHECK:       # %bb.0:
    963 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
    964 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1}
    965 ; CHECK-NEXT:    retq
    966   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    967   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    968   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    969   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
    970   ret <16 x i32> %res
    971 }
    972 
    973 define <16 x i32> @test_masked_z_i32_to_16_mask3(i32 %s, <16 x i32> %mask) {
    974 ; CHECK-LABEL: test_masked_z_i32_to_16_mask3:
    975 ; CHECK:       # %bb.0:
    976 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
    977 ; CHECK-NEXT:    vpbroadcastd %edi, %zmm0 {%k1} {z}
    978 ; CHECK-NEXT:    retq
    979   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
    980   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    981   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
    982   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
    983   ret <16 x i32> %res
    984 }
    985 define <2 x i64> @test_i64_to_2(i64 %s) {
    986 ; CHECK-LABEL: test_i64_to_2:
    987 ; CHECK:       # %bb.0:
    988 ; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0
    989 ; CHECK-NEXT:    retq
    990   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
    991   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
    992   ret <2 x i64> %res
    993 }
    994 define <2 x i64> @test_masked_i64_to_2_mask0(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
    995 ; CHECK-LABEL: test_masked_i64_to_2_mask0:
    996 ; CHECK:       # %bb.0:
    997 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
    998 ; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
    999 ; CHECK-NEXT:    retq
   1000   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1001   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1002   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   1003   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
   1004   ret <2 x i64> %res
   1005 }
   1006 
   1007 define <2 x i64> @test_masked_z_i64_to_2_mask0(i64 %s, <2 x i64> %mask) {
   1008 ; CHECK-LABEL: test_masked_z_i64_to_2_mask0:
   1009 ; CHECK:       # %bb.0:
   1010 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1011 ; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} {z}
   1012 ; CHECK-NEXT:    retq
   1013   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1014   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1015   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   1016   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
   1017   ret <2 x i64> %res
   1018 }
   1019 define <2 x i64> @test_masked_i64_to_2_mask1(i64 %s, <2 x i64> %default, <2 x i64> %mask) {
   1020 ; CHECK-LABEL: test_masked_i64_to_2_mask1:
   1021 ; CHECK:       # %bb.0:
   1022 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   1023 ; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1}
   1024 ; CHECK-NEXT:    retq
   1025   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1026   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1027   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   1028   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
   1029   ret <2 x i64> %res
   1030 }
   1031 
   1032 define <2 x i64> @test_masked_z_i64_to_2_mask1(i64 %s, <2 x i64> %mask) {
   1033 ; CHECK-LABEL: test_masked_z_i64_to_2_mask1:
   1034 ; CHECK:       # %bb.0:
   1035 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   1036 ; CHECK-NEXT:    vpbroadcastq %rdi, %xmm0 {%k1} {z}
   1037 ; CHECK-NEXT:    retq
   1038   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1039   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   1040   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   1041   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
   1042   ret <2 x i64> %res
   1043 }
   1044 define <4 x i64> @test_i64_to_4(i64 %s) {
   1045 ; CHECK-LABEL: test_i64_to_4:
   1046 ; CHECK:       # %bb.0:
   1047 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0
   1048 ; CHECK-NEXT:    retq
   1049   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1050   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1051   ret <4 x i64> %res
   1052 }
   1053 define <4 x i64> @test_masked_i64_to_4_mask0(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
   1054 ; CHECK-LABEL: test_masked_i64_to_4_mask0:
   1055 ; CHECK:       # %bb.0:
   1056 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1057 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
   1058 ; CHECK-NEXT:    retq
   1059   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1060   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1061   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1062   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   1063   ret <4 x i64> %res
   1064 }
   1065 
   1066 define <4 x i64> @test_masked_z_i64_to_4_mask0(i64 %s, <4 x i64> %mask) {
   1067 ; CHECK-LABEL: test_masked_z_i64_to_4_mask0:
   1068 ; CHECK:       # %bb.0:
   1069 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1070 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
   1071 ; CHECK-NEXT:    retq
   1072   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1073   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1074   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1075   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   1076   ret <4 x i64> %res
   1077 }
   1078 define <4 x i64> @test_masked_i64_to_4_mask1(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
   1079 ; CHECK-LABEL: test_masked_i64_to_4_mask1:
   1080 ; CHECK:       # %bb.0:
   1081 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1082 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
   1083 ; CHECK-NEXT:    retq
   1084   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1085   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1086   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1087   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   1088   ret <4 x i64> %res
   1089 }
   1090 
   1091 define <4 x i64> @test_masked_z_i64_to_4_mask1(i64 %s, <4 x i64> %mask) {
   1092 ; CHECK-LABEL: test_masked_z_i64_to_4_mask1:
   1093 ; CHECK:       # %bb.0:
   1094 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1095 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
   1096 ; CHECK-NEXT:    retq
   1097   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1098   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1099   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1100   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   1101   ret <4 x i64> %res
   1102 }
   1103 define <4 x i64> @test_masked_i64_to_4_mask2(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
   1104 ; CHECK-LABEL: test_masked_i64_to_4_mask2:
   1105 ; CHECK:       # %bb.0:
   1106 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1107 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
   1108 ; CHECK-NEXT:    retq
   1109   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1110   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1111   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1112   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   1113   ret <4 x i64> %res
   1114 }
   1115 
   1116 define <4 x i64> @test_masked_z_i64_to_4_mask2(i64 %s, <4 x i64> %mask) {
   1117 ; CHECK-LABEL: test_masked_z_i64_to_4_mask2:
   1118 ; CHECK:       # %bb.0:
   1119 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1120 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
   1121 ; CHECK-NEXT:    retq
   1122   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1123   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1124   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1125   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   1126   ret <4 x i64> %res
   1127 }
   1128 define <4 x i64> @test_masked_i64_to_4_mask3(i64 %s, <4 x i64> %default, <4 x i64> %mask) {
   1129 ; CHECK-LABEL: test_masked_i64_to_4_mask3:
   1130 ; CHECK:       # %bb.0:
   1131 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   1132 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1}
   1133 ; CHECK-NEXT:    retq
   1134   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1135   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1136   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1137   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   1138   ret <4 x i64> %res
   1139 }
   1140 
   1141 define <4 x i64> @test_masked_z_i64_to_4_mask3(i64 %s, <4 x i64> %mask) {
   1142 ; CHECK-LABEL: test_masked_z_i64_to_4_mask3:
   1143 ; CHECK:       # %bb.0:
   1144 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   1145 ; CHECK-NEXT:    vpbroadcastq %rdi, %ymm0 {%k1} {z}
   1146 ; CHECK-NEXT:    retq
   1147   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1148   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1149   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   1150   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   1151   ret <4 x i64> %res
   1152 }
   1153 define <8 x i64> @test_i64_to_8(i64 %s) {
   1154 ; CHECK-LABEL: test_i64_to_8:
   1155 ; CHECK:       # %bb.0:
   1156 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0
   1157 ; CHECK-NEXT:    retq
   1158   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1159   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1160   ret <8 x i64> %res
   1161 }
   1162 define <8 x i64> @test_masked_i64_to_8_mask0(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
   1163 ; CHECK-LABEL: test_masked_i64_to_8_mask0:
   1164 ; CHECK:       # %bb.0:
   1165 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1166 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
   1167 ; CHECK-NEXT:    retq
   1168   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1169   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1170   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1171   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   1172   ret <8 x i64> %res
   1173 }
   1174 
   1175 define <8 x i64> @test_masked_z_i64_to_8_mask0(i64 %s, <8 x i64> %mask) {
   1176 ; CHECK-LABEL: test_masked_z_i64_to_8_mask0:
   1177 ; CHECK:       # %bb.0:
   1178 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1179 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
   1180 ; CHECK-NEXT:    retq
   1181   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1182   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1183   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1184   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   1185   ret <8 x i64> %res
   1186 }
   1187 define <8 x i64> @test_masked_i64_to_8_mask1(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
   1188 ; CHECK-LABEL: test_masked_i64_to_8_mask1:
   1189 ; CHECK:       # %bb.0:
   1190 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1191 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
   1192 ; CHECK-NEXT:    retq
   1193   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1194   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1195   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1196   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   1197   ret <8 x i64> %res
   1198 }
   1199 
   1200 define <8 x i64> @test_masked_z_i64_to_8_mask1(i64 %s, <8 x i64> %mask) {
   1201 ; CHECK-LABEL: test_masked_z_i64_to_8_mask1:
   1202 ; CHECK:       # %bb.0:
   1203 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1204 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
   1205 ; CHECK-NEXT:    retq
   1206   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1207   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1208   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1209   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   1210   ret <8 x i64> %res
   1211 }
   1212 define <8 x i64> @test_masked_i64_to_8_mask2(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
   1213 ; CHECK-LABEL: test_masked_i64_to_8_mask2:
   1214 ; CHECK:       # %bb.0:
   1215 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1216 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
   1217 ; CHECK-NEXT:    retq
   1218   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1219   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1220   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1221   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   1222   ret <8 x i64> %res
   1223 }
   1224 
   1225 define <8 x i64> @test_masked_z_i64_to_8_mask2(i64 %s, <8 x i64> %mask) {
   1226 ; CHECK-LABEL: test_masked_z_i64_to_8_mask2:
   1227 ; CHECK:       # %bb.0:
   1228 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1229 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
   1230 ; CHECK-NEXT:    retq
   1231   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1232   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1233   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1234   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   1235   ret <8 x i64> %res
   1236 }
   1237 define <8 x i64> @test_masked_i64_to_8_mask3(i64 %s, <8 x i64> %default, <8 x i64> %mask) {
   1238 ; CHECK-LABEL: test_masked_i64_to_8_mask3:
   1239 ; CHECK:       # %bb.0:
   1240 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   1241 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1}
   1242 ; CHECK-NEXT:    retq
   1243   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1244   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1245   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1246   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   1247   ret <8 x i64> %res
   1248 }
   1249 
   1250 define <8 x i64> @test_masked_z_i64_to_8_mask3(i64 %s, <8 x i64> %mask) {
   1251 ; CHECK-LABEL: test_masked_z_i64_to_8_mask3:
   1252 ; CHECK:       # %bb.0:
   1253 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   1254 ; CHECK-NEXT:    vpbroadcastq %rdi, %zmm0 {%k1} {z}
   1255 ; CHECK-NEXT:    retq
   1256   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   1257   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1258   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   1259   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   1260   ret <8 x i64> %res
   1261 }
   1262 define <16 x i8> @test_i8_to_16_mem(i8* %p) {
   1263 ; CHECK-LABEL: test_i8_to_16_mem:
   1264 ; CHECK:       # %bb.0:
   1265 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0
   1266 ; CHECK-NEXT:    retq
   1267   %s = load i8, i8* %p
   1268   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1269   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1270   ret <16 x i8> %res
   1271 }
   1272 define <16 x i8> @test_masked_i8_to_16_mem_mask0(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
   1273 ; CHECK-LABEL: test_masked_i8_to_16_mem_mask0:
   1274 ; CHECK:       # %bb.0:
   1275 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
   1276 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
   1277 ; CHECK-NEXT:    retq
   1278   %s = load i8, i8* %p
   1279   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1280   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1281   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1282   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
   1283   ret <16 x i8> %res
   1284 }
   1285 
   1286 define <16 x i8> @test_masked_z_i8_to_16_mem_mask0(i8* %p, <16 x i8> %mask) {
   1287 ; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask0:
   1288 ; CHECK:       # %bb.0:
   1289 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
   1290 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
   1291 ; CHECK-NEXT:    retq
   1292   %s = load i8, i8* %p
   1293   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1294   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1295   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1296   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
   1297   ret <16 x i8> %res
   1298 }
   1299 define <16 x i8> @test_masked_i8_to_16_mem_mask1(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
   1300 ; CHECK-LABEL: test_masked_i8_to_16_mem_mask1:
   1301 ; CHECK:       # %bb.0:
   1302 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
   1303 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
   1304 ; CHECK-NEXT:    retq
   1305   %s = load i8, i8* %p
   1306   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1307   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1308   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1309   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
   1310   ret <16 x i8> %res
   1311 }
   1312 
   1313 define <16 x i8> @test_masked_z_i8_to_16_mem_mask1(i8* %p, <16 x i8> %mask) {
   1314 ; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask1:
   1315 ; CHECK:       # %bb.0:
   1316 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
   1317 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
   1318 ; CHECK-NEXT:    retq
   1319   %s = load i8, i8* %p
   1320   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1321   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1322   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1323   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
   1324   ret <16 x i8> %res
   1325 }
   1326 define <16 x i8> @test_masked_i8_to_16_mem_mask2(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
   1327 ; CHECK-LABEL: test_masked_i8_to_16_mem_mask2:
   1328 ; CHECK:       # %bb.0:
   1329 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
   1330 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
   1331 ; CHECK-NEXT:    retq
   1332   %s = load i8, i8* %p
   1333   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1334   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1335   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1336   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
   1337   ret <16 x i8> %res
   1338 }
   1339 
   1340 define <16 x i8> @test_masked_z_i8_to_16_mem_mask2(i8* %p, <16 x i8> %mask) {
   1341 ; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask2:
   1342 ; CHECK:       # %bb.0:
   1343 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
   1344 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
   1345 ; CHECK-NEXT:    retq
   1346   %s = load i8, i8* %p
   1347   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1348   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1349   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1350   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
   1351   ret <16 x i8> %res
   1352 }
   1353 define <16 x i8> @test_masked_i8_to_16_mem_mask3(i8* %p, <16 x i8> %default, <16 x i8> %mask) {
   1354 ; CHECK-LABEL: test_masked_i8_to_16_mem_mask3:
   1355 ; CHECK:       # %bb.0:
   1356 ; CHECK-NEXT:    vptestnmb %xmm1, %xmm1, %k1
   1357 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1}
   1358 ; CHECK-NEXT:    retq
   1359   %s = load i8, i8* %p
   1360   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1361   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1362   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1363   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %default
   1364   ret <16 x i8> %res
   1365 }
   1366 
   1367 define <16 x i8> @test_masked_z_i8_to_16_mem_mask3(i8* %p, <16 x i8> %mask) {
   1368 ; CHECK-LABEL: test_masked_z_i8_to_16_mem_mask3:
   1369 ; CHECK:       # %bb.0:
   1370 ; CHECK-NEXT:    vptestnmb %xmm0, %xmm0, %k1
   1371 ; CHECK-NEXT:    vpbroadcastb (%rdi), %xmm0 {%k1} {z}
   1372 ; CHECK-NEXT:    retq
   1373   %s = load i8, i8* %p
   1374   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1375   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1376   %cmp = icmp eq <16 x i8> %mask, zeroinitializer
   1377   %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
   1378   ret <16 x i8> %res
   1379 }
   1380 define <32 x i8> @test_i8_to_32_mem(i8* %p) {
   1381 ; CHECK-LABEL: test_i8_to_32_mem:
   1382 ; CHECK:       # %bb.0:
   1383 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0
   1384 ; CHECK-NEXT:    retq
   1385   %s = load i8, i8* %p
   1386   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1387   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1388   ret <32 x i8> %res
   1389 }
   1390 define <32 x i8> @test_masked_i8_to_32_mem_mask0(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
   1391 ; CHECK-LABEL: test_masked_i8_to_32_mem_mask0:
   1392 ; CHECK:       # %bb.0:
   1393 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
   1394 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
   1395 ; CHECK-NEXT:    retq
   1396   %s = load i8, i8* %p
   1397   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1398   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1399   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1400   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
   1401   ret <32 x i8> %res
   1402 }
   1403 
   1404 define <32 x i8> @test_masked_z_i8_to_32_mem_mask0(i8* %p, <32 x i8> %mask) {
   1405 ; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask0:
   1406 ; CHECK:       # %bb.0:
   1407 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
   1408 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
   1409 ; CHECK-NEXT:    retq
   1410   %s = load i8, i8* %p
   1411   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1412   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1413   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1414   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
   1415   ret <32 x i8> %res
   1416 }
   1417 define <32 x i8> @test_masked_i8_to_32_mem_mask1(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
   1418 ; CHECK-LABEL: test_masked_i8_to_32_mem_mask1:
   1419 ; CHECK:       # %bb.0:
   1420 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
   1421 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
   1422 ; CHECK-NEXT:    retq
   1423   %s = load i8, i8* %p
   1424   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1425   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1426   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1427   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
   1428   ret <32 x i8> %res
   1429 }
   1430 
   1431 define <32 x i8> @test_masked_z_i8_to_32_mem_mask1(i8* %p, <32 x i8> %mask) {
   1432 ; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask1:
   1433 ; CHECK:       # %bb.0:
   1434 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
   1435 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
   1436 ; CHECK-NEXT:    retq
   1437   %s = load i8, i8* %p
   1438   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1439   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1440   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1441   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
   1442   ret <32 x i8> %res
   1443 }
   1444 define <32 x i8> @test_masked_i8_to_32_mem_mask2(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
   1445 ; CHECK-LABEL: test_masked_i8_to_32_mem_mask2:
   1446 ; CHECK:       # %bb.0:
   1447 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
   1448 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
   1449 ; CHECK-NEXT:    retq
   1450   %s = load i8, i8* %p
   1451   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1452   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1453   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1454   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
   1455   ret <32 x i8> %res
   1456 }
   1457 
   1458 define <32 x i8> @test_masked_z_i8_to_32_mem_mask2(i8* %p, <32 x i8> %mask) {
   1459 ; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask2:
   1460 ; CHECK:       # %bb.0:
   1461 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
   1462 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
   1463 ; CHECK-NEXT:    retq
   1464   %s = load i8, i8* %p
   1465   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1466   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1467   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1468   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
   1469   ret <32 x i8> %res
   1470 }
   1471 define <32 x i8> @test_masked_i8_to_32_mem_mask3(i8* %p, <32 x i8> %default, <32 x i8> %mask) {
   1472 ; CHECK-LABEL: test_masked_i8_to_32_mem_mask3:
   1473 ; CHECK:       # %bb.0:
   1474 ; CHECK-NEXT:    vptestnmb %ymm1, %ymm1, %k1
   1475 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1}
   1476 ; CHECK-NEXT:    retq
   1477   %s = load i8, i8* %p
   1478   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1479   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1480   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1481   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %default
   1482   ret <32 x i8> %res
   1483 }
   1484 
   1485 define <32 x i8> @test_masked_z_i8_to_32_mem_mask3(i8* %p, <32 x i8> %mask) {
   1486 ; CHECK-LABEL: test_masked_z_i8_to_32_mem_mask3:
   1487 ; CHECK:       # %bb.0:
   1488 ; CHECK-NEXT:    vptestnmb %ymm0, %ymm0, %k1
   1489 ; CHECK-NEXT:    vpbroadcastb (%rdi), %ymm0 {%k1} {z}
   1490 ; CHECK-NEXT:    retq
   1491   %s = load i8, i8* %p
   1492   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1493   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1494   %cmp = icmp eq <32 x i8> %mask, zeroinitializer
   1495   %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
   1496   ret <32 x i8> %res
   1497 }
   1498 define <64 x i8> @test_i8_to_64_mem(i8* %p) {
   1499 ; CHECK-LABEL: test_i8_to_64_mem:
   1500 ; CHECK:       # %bb.0:
   1501 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0
   1502 ; CHECK-NEXT:    retq
   1503   %s = load i8, i8* %p
   1504   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1505   %res = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1506   ret <64 x i8> %res
   1507 }
   1508 define <64 x i8> @test_masked_i8_to_64_mem_mask0(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
   1509 ; CHECK-LABEL: test_masked_i8_to_64_mem_mask0:
   1510 ; CHECK:       # %bb.0:
   1511 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
   1512 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
   1513 ; CHECK-NEXT:    retq
   1514   %s = load i8, i8* %p
   1515   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1516   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1517   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1518   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
   1519   ret <64 x i8> %res
   1520 }
   1521 
   1522 define <64 x i8> @test_masked_z_i8_to_64_mem_mask0(i8* %p, <64 x i8> %mask) {
   1523 ; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask0:
   1524 ; CHECK:       # %bb.0:
   1525 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
   1526 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
   1527 ; CHECK-NEXT:    retq
   1528   %s = load i8, i8* %p
   1529   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1530   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1531   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1532   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
   1533   ret <64 x i8> %res
   1534 }
   1535 define <64 x i8> @test_masked_i8_to_64_mem_mask1(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
   1536 ; CHECK-LABEL: test_masked_i8_to_64_mem_mask1:
   1537 ; CHECK:       # %bb.0:
   1538 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
   1539 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
   1540 ; CHECK-NEXT:    retq
   1541   %s = load i8, i8* %p
   1542   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1543   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1544   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1545   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
   1546   ret <64 x i8> %res
   1547 }
   1548 
   1549 define <64 x i8> @test_masked_z_i8_to_64_mem_mask1(i8* %p, <64 x i8> %mask) {
   1550 ; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask1:
   1551 ; CHECK:       # %bb.0:
   1552 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
   1553 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
   1554 ; CHECK-NEXT:    retq
   1555   %s = load i8, i8* %p
   1556   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1557   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1558   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1559   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
   1560   ret <64 x i8> %res
   1561 }
   1562 define <64 x i8> @test_masked_i8_to_64_mem_mask2(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
   1563 ; CHECK-LABEL: test_masked_i8_to_64_mem_mask2:
   1564 ; CHECK:       # %bb.0:
   1565 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
   1566 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
   1567 ; CHECK-NEXT:    retq
   1568   %s = load i8, i8* %p
   1569   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1570   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1571   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1572   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
   1573   ret <64 x i8> %res
   1574 }
   1575 
   1576 define <64 x i8> @test_masked_z_i8_to_64_mem_mask2(i8* %p, <64 x i8> %mask) {
   1577 ; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask2:
   1578 ; CHECK:       # %bb.0:
   1579 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
   1580 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
   1581 ; CHECK-NEXT:    retq
   1582   %s = load i8, i8* %p
   1583   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1584   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1585   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1586   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
   1587   ret <64 x i8> %res
   1588 }
   1589 define <64 x i8> @test_masked_i8_to_64_mem_mask3(i8* %p, <64 x i8> %default, <64 x i8> %mask) {
   1590 ; CHECK-LABEL: test_masked_i8_to_64_mem_mask3:
   1591 ; CHECK:       # %bb.0:
   1592 ; CHECK-NEXT:    vptestnmb %zmm1, %zmm1, %k1
   1593 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1}
   1594 ; CHECK-NEXT:    retq
   1595   %s = load i8, i8* %p
   1596   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1597   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1598   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1599   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %default
   1600   ret <64 x i8> %res
   1601 }
   1602 
   1603 define <64 x i8> @test_masked_z_i8_to_64_mem_mask3(i8* %p, <64 x i8> %mask) {
   1604 ; CHECK-LABEL: test_masked_z_i8_to_64_mem_mask3:
   1605 ; CHECK:       # %bb.0:
   1606 ; CHECK-NEXT:    vptestnmb %zmm0, %zmm0, %k1
   1607 ; CHECK-NEXT:    vpbroadcastb (%rdi), %zmm0 {%k1} {z}
   1608 ; CHECK-NEXT:    retq
   1609   %s = load i8, i8* %p
   1610   %vec = insertelement <2 x i8> undef, i8 %s, i32 0
   1611   %shuf = shufflevector <2 x i8> %vec, <2 x i8> undef, <64 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1612   %cmp = icmp eq <64 x i8> %mask, zeroinitializer
   1613   %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
   1614   ret <64 x i8> %res
   1615 }
   1616 define <8 x i16> @test_i16_to_8_mem(i16* %p) {
   1617 ; CHECK-LABEL: test_i16_to_8_mem:
   1618 ; CHECK:       # %bb.0:
   1619 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0
   1620 ; CHECK-NEXT:    retq
   1621   %s = load i16, i16* %p
   1622   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1623   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1624   ret <8 x i16> %res
   1625 }
   1626 define <8 x i16> @test_masked_i16_to_8_mem_mask0(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
   1627 ; CHECK-LABEL: test_masked_i16_to_8_mem_mask0:
   1628 ; CHECK:       # %bb.0:
   1629 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
   1630 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
   1631 ; CHECK-NEXT:    retq
   1632   %s = load i16, i16* %p
   1633   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1634   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1635   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1636   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
   1637   ret <8 x i16> %res
   1638 }
   1639 
   1640 define <8 x i16> @test_masked_z_i16_to_8_mem_mask0(i16* %p, <8 x i16> %mask) {
   1641 ; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask0:
   1642 ; CHECK:       # %bb.0:
   1643 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
   1644 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
   1645 ; CHECK-NEXT:    retq
   1646   %s = load i16, i16* %p
   1647   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1648   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1649   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1650   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
   1651   ret <8 x i16> %res
   1652 }
   1653 define <8 x i16> @test_masked_i16_to_8_mem_mask1(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
   1654 ; CHECK-LABEL: test_masked_i16_to_8_mem_mask1:
   1655 ; CHECK:       # %bb.0:
   1656 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
   1657 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
   1658 ; CHECK-NEXT:    retq
   1659   %s = load i16, i16* %p
   1660   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1661   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1662   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1663   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
   1664   ret <8 x i16> %res
   1665 }
   1666 
   1667 define <8 x i16> @test_masked_z_i16_to_8_mem_mask1(i16* %p, <8 x i16> %mask) {
   1668 ; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask1:
   1669 ; CHECK:       # %bb.0:
   1670 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
   1671 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
   1672 ; CHECK-NEXT:    retq
   1673   %s = load i16, i16* %p
   1674   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1675   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1676   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1677   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
   1678   ret <8 x i16> %res
   1679 }
   1680 define <8 x i16> @test_masked_i16_to_8_mem_mask2(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
   1681 ; CHECK-LABEL: test_masked_i16_to_8_mem_mask2:
   1682 ; CHECK:       # %bb.0:
   1683 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
   1684 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
   1685 ; CHECK-NEXT:    retq
   1686   %s = load i16, i16* %p
   1687   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1688   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1689   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1690   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
   1691   ret <8 x i16> %res
   1692 }
   1693 
   1694 define <8 x i16> @test_masked_z_i16_to_8_mem_mask2(i16* %p, <8 x i16> %mask) {
   1695 ; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask2:
   1696 ; CHECK:       # %bb.0:
   1697 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
   1698 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
   1699 ; CHECK-NEXT:    retq
   1700   %s = load i16, i16* %p
   1701   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1702   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1703   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1704   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
   1705   ret <8 x i16> %res
   1706 }
   1707 define <8 x i16> @test_masked_i16_to_8_mem_mask3(i16* %p, <8 x i16> %default, <8 x i16> %mask) {
   1708 ; CHECK-LABEL: test_masked_i16_to_8_mem_mask3:
   1709 ; CHECK:       # %bb.0:
   1710 ; CHECK-NEXT:    vptestnmw %xmm1, %xmm1, %k1
   1711 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1}
   1712 ; CHECK-NEXT:    retq
   1713   %s = load i16, i16* %p
   1714   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1715   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1716   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1717   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %default
   1718   ret <8 x i16> %res
   1719 }
   1720 
   1721 define <8 x i16> @test_masked_z_i16_to_8_mem_mask3(i16* %p, <8 x i16> %mask) {
   1722 ; CHECK-LABEL: test_masked_z_i16_to_8_mem_mask3:
   1723 ; CHECK:       # %bb.0:
   1724 ; CHECK-NEXT:    vptestnmw %xmm0, %xmm0, %k1
   1725 ; CHECK-NEXT:    vpbroadcastw (%rdi), %xmm0 {%k1} {z}
   1726 ; CHECK-NEXT:    retq
   1727   %s = load i16, i16* %p
   1728   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1729   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1730   %cmp = icmp eq <8 x i16> %mask, zeroinitializer
   1731   %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
   1732   ret <8 x i16> %res
   1733 }
   1734 define <16 x i16> @test_i16_to_16_mem(i16* %p) {
   1735 ; CHECK-LABEL: test_i16_to_16_mem:
   1736 ; CHECK:       # %bb.0:
   1737 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0
   1738 ; CHECK-NEXT:    retq
   1739   %s = load i16, i16* %p
   1740   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1741   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1742   ret <16 x i16> %res
   1743 }
   1744 define <16 x i16> @test_masked_i16_to_16_mem_mask0(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
   1745 ; CHECK-LABEL: test_masked_i16_to_16_mem_mask0:
   1746 ; CHECK:       # %bb.0:
   1747 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
   1748 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
   1749 ; CHECK-NEXT:    retq
   1750   %s = load i16, i16* %p
   1751   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1752   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1753   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1754   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
   1755   ret <16 x i16> %res
   1756 }
   1757 
   1758 define <16 x i16> @test_masked_z_i16_to_16_mem_mask0(i16* %p, <16 x i16> %mask) {
   1759 ; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask0:
   1760 ; CHECK:       # %bb.0:
   1761 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
   1762 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
   1763 ; CHECK-NEXT:    retq
   1764   %s = load i16, i16* %p
   1765   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1766   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1767   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1768   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
   1769   ret <16 x i16> %res
   1770 }
   1771 define <16 x i16> @test_masked_i16_to_16_mem_mask1(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
   1772 ; CHECK-LABEL: test_masked_i16_to_16_mem_mask1:
   1773 ; CHECK:       # %bb.0:
   1774 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
   1775 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
   1776 ; CHECK-NEXT:    retq
   1777   %s = load i16, i16* %p
   1778   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1779   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1780   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1781   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
   1782   ret <16 x i16> %res
   1783 }
   1784 
   1785 define <16 x i16> @test_masked_z_i16_to_16_mem_mask1(i16* %p, <16 x i16> %mask) {
   1786 ; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask1:
   1787 ; CHECK:       # %bb.0:
   1788 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
   1789 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
   1790 ; CHECK-NEXT:    retq
   1791   %s = load i16, i16* %p
   1792   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1793   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1794   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1795   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
   1796   ret <16 x i16> %res
   1797 }
   1798 define <16 x i16> @test_masked_i16_to_16_mem_mask2(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
   1799 ; CHECK-LABEL: test_masked_i16_to_16_mem_mask2:
   1800 ; CHECK:       # %bb.0:
   1801 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
   1802 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
   1803 ; CHECK-NEXT:    retq
   1804   %s = load i16, i16* %p
   1805   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1806   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1807   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1808   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
   1809   ret <16 x i16> %res
   1810 }
   1811 
   1812 define <16 x i16> @test_masked_z_i16_to_16_mem_mask2(i16* %p, <16 x i16> %mask) {
   1813 ; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask2:
   1814 ; CHECK:       # %bb.0:
   1815 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
   1816 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
   1817 ; CHECK-NEXT:    retq
   1818   %s = load i16, i16* %p
   1819   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1820   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1821   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1822   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
   1823   ret <16 x i16> %res
   1824 }
   1825 define <16 x i16> @test_masked_i16_to_16_mem_mask3(i16* %p, <16 x i16> %default, <16 x i16> %mask) {
   1826 ; CHECK-LABEL: test_masked_i16_to_16_mem_mask3:
   1827 ; CHECK:       # %bb.0:
   1828 ; CHECK-NEXT:    vptestnmw %ymm1, %ymm1, %k1
   1829 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1}
   1830 ; CHECK-NEXT:    retq
   1831   %s = load i16, i16* %p
   1832   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1833   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1834   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1835   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %default
   1836   ret <16 x i16> %res
   1837 }
   1838 
   1839 define <16 x i16> @test_masked_z_i16_to_16_mem_mask3(i16* %p, <16 x i16> %mask) {
   1840 ; CHECK-LABEL: test_masked_z_i16_to_16_mem_mask3:
   1841 ; CHECK:       # %bb.0:
   1842 ; CHECK-NEXT:    vptestnmw %ymm0, %ymm0, %k1
   1843 ; CHECK-NEXT:    vpbroadcastw (%rdi), %ymm0 {%k1} {z}
   1844 ; CHECK-NEXT:    retq
   1845   %s = load i16, i16* %p
   1846   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1847   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1848   %cmp = icmp eq <16 x i16> %mask, zeroinitializer
   1849   %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
   1850   ret <16 x i16> %res
   1851 }
   1852 define <32 x i16> @test_i16_to_32_mem(i16* %p) {
   1853 ; CHECK-LABEL: test_i16_to_32_mem:
   1854 ; CHECK:       # %bb.0:
   1855 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0
   1856 ; CHECK-NEXT:    retq
   1857   %s = load i16, i16* %p
   1858   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1859   %res = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1860   ret <32 x i16> %res
   1861 }
   1862 define <32 x i16> @test_masked_i16_to_32_mem_mask0(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
   1863 ; CHECK-LABEL: test_masked_i16_to_32_mem_mask0:
   1864 ; CHECK:       # %bb.0:
   1865 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
   1866 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
   1867 ; CHECK-NEXT:    retq
   1868   %s = load i16, i16* %p
   1869   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1870   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1871   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1872   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
   1873   ret <32 x i16> %res
   1874 }
   1875 
   1876 define <32 x i16> @test_masked_z_i16_to_32_mem_mask0(i16* %p, <32 x i16> %mask) {
   1877 ; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask0:
   1878 ; CHECK:       # %bb.0:
   1879 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
   1880 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
   1881 ; CHECK-NEXT:    retq
   1882   %s = load i16, i16* %p
   1883   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1884   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1885   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1886   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
   1887   ret <32 x i16> %res
   1888 }
   1889 define <32 x i16> @test_masked_i16_to_32_mem_mask1(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
   1890 ; CHECK-LABEL: test_masked_i16_to_32_mem_mask1:
   1891 ; CHECK:       # %bb.0:
   1892 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
   1893 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
   1894 ; CHECK-NEXT:    retq
   1895   %s = load i16, i16* %p
   1896   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1897   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1898   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1899   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
   1900   ret <32 x i16> %res
   1901 }
   1902 
   1903 define <32 x i16> @test_masked_z_i16_to_32_mem_mask1(i16* %p, <32 x i16> %mask) {
   1904 ; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask1:
   1905 ; CHECK:       # %bb.0:
   1906 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
   1907 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
   1908 ; CHECK-NEXT:    retq
   1909   %s = load i16, i16* %p
   1910   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1911   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1912   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1913   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
   1914   ret <32 x i16> %res
   1915 }
   1916 define <32 x i16> @test_masked_i16_to_32_mem_mask2(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
   1917 ; CHECK-LABEL: test_masked_i16_to_32_mem_mask2:
   1918 ; CHECK:       # %bb.0:
   1919 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
   1920 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
   1921 ; CHECK-NEXT:    retq
   1922   %s = load i16, i16* %p
   1923   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1924   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1925   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1926   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
   1927   ret <32 x i16> %res
   1928 }
   1929 
   1930 define <32 x i16> @test_masked_z_i16_to_32_mem_mask2(i16* %p, <32 x i16> %mask) {
   1931 ; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask2:
   1932 ; CHECK:       # %bb.0:
   1933 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
   1934 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
   1935 ; CHECK-NEXT:    retq
   1936   %s = load i16, i16* %p
   1937   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1938   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1939   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1940   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
   1941   ret <32 x i16> %res
   1942 }
   1943 define <32 x i16> @test_masked_i16_to_32_mem_mask3(i16* %p, <32 x i16> %default, <32 x i16> %mask) {
   1944 ; CHECK-LABEL: test_masked_i16_to_32_mem_mask3:
   1945 ; CHECK:       # %bb.0:
   1946 ; CHECK-NEXT:    vptestnmw %zmm1, %zmm1, %k1
   1947 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1}
   1948 ; CHECK-NEXT:    retq
   1949   %s = load i16, i16* %p
   1950   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1951   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1952   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1953   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %default
   1954   ret <32 x i16> %res
   1955 }
   1956 
   1957 define <32 x i16> @test_masked_z_i16_to_32_mem_mask3(i16* %p, <32 x i16> %mask) {
   1958 ; CHECK-LABEL: test_masked_z_i16_to_32_mem_mask3:
   1959 ; CHECK:       # %bb.0:
   1960 ; CHECK-NEXT:    vptestnmw %zmm0, %zmm0, %k1
   1961 ; CHECK-NEXT:    vpbroadcastw (%rdi), %zmm0 {%k1} {z}
   1962 ; CHECK-NEXT:    retq
   1963   %s = load i16, i16* %p
   1964   %vec = insertelement <2 x i16> undef, i16 %s, i32 0
   1965   %shuf = shufflevector <2 x i16> %vec, <2 x i16> undef, <32 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1966   %cmp = icmp eq <32 x i16> %mask, zeroinitializer
   1967   %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
   1968   ret <32 x i16> %res
   1969 }
   1970 define <4 x i32> @test_i32_to_4_mem(i32* %p) {
   1971 ; CHECK-LABEL: test_i32_to_4_mem:
   1972 ; CHECK:       # %bb.0:
   1973 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
   1974 ; CHECK-NEXT:    retq
   1975   %s = load i32, i32* %p
   1976   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   1977   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1978   ret <4 x i32> %res
   1979 }
   1980 define <4 x i32> @test_masked_i32_to_4_mem_mask0(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
   1981 ; CHECK-LABEL: test_masked_i32_to_4_mem_mask0:
   1982 ; CHECK:       # %bb.0:
   1983 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   1984 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
   1985 ; CHECK-NEXT:    retq
   1986   %s = load i32, i32* %p
   1987   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   1988   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1989   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   1990   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
   1991   ret <4 x i32> %res
   1992 }
   1993 
   1994 define <4 x i32> @test_masked_z_i32_to_4_mem_mask0(i32* %p, <4 x i32> %mask) {
   1995 ; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask0:
   1996 ; CHECK:       # %bb.0:
   1997 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   1998 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
   1999 ; CHECK-NEXT:    retq
   2000   %s = load i32, i32* %p
   2001   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2002   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2003   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2004   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
   2005   ret <4 x i32> %res
   2006 }
   2007 define <4 x i32> @test_masked_i32_to_4_mem_mask1(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
   2008 ; CHECK-LABEL: test_masked_i32_to_4_mem_mask1:
   2009 ; CHECK:       # %bb.0:
   2010 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2011 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
   2012 ; CHECK-NEXT:    retq
   2013   %s = load i32, i32* %p
   2014   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2015   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2016   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2017   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
   2018   ret <4 x i32> %res
   2019 }
   2020 
   2021 define <4 x i32> @test_masked_z_i32_to_4_mem_mask1(i32* %p, <4 x i32> %mask) {
   2022 ; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask1:
   2023 ; CHECK:       # %bb.0:
   2024 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   2025 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
   2026 ; CHECK-NEXT:    retq
   2027   %s = load i32, i32* %p
   2028   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2029   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2030   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2031   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
   2032   ret <4 x i32> %res
   2033 }
   2034 define <4 x i32> @test_masked_i32_to_4_mem_mask2(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
   2035 ; CHECK-LABEL: test_masked_i32_to_4_mem_mask2:
   2036 ; CHECK:       # %bb.0:
   2037 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2038 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
   2039 ; CHECK-NEXT:    retq
   2040   %s = load i32, i32* %p
   2041   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2042   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2043   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2044   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
   2045   ret <4 x i32> %res
   2046 }
   2047 
   2048 define <4 x i32> @test_masked_z_i32_to_4_mem_mask2(i32* %p, <4 x i32> %mask) {
   2049 ; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask2:
   2050 ; CHECK:       # %bb.0:
   2051 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   2052 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
   2053 ; CHECK-NEXT:    retq
   2054   %s = load i32, i32* %p
   2055   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2056   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2057   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2058   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
   2059   ret <4 x i32> %res
   2060 }
   2061 define <4 x i32> @test_masked_i32_to_4_mem_mask3(i32* %p, <4 x i32> %default, <4 x i32> %mask) {
   2062 ; CHECK-LABEL: test_masked_i32_to_4_mem_mask3:
   2063 ; CHECK:       # %bb.0:
   2064 ; CHECK-NEXT:    vptestnmd %xmm1, %xmm1, %k1
   2065 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1}
   2066 ; CHECK-NEXT:    retq
   2067   %s = load i32, i32* %p
   2068   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2069   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2070   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2071   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
   2072   ret <4 x i32> %res
   2073 }
   2074 
   2075 define <4 x i32> @test_masked_z_i32_to_4_mem_mask3(i32* %p, <4 x i32> %mask) {
   2076 ; CHECK-LABEL: test_masked_z_i32_to_4_mem_mask3:
   2077 ; CHECK:       # %bb.0:
   2078 ; CHECK-NEXT:    vptestnmd %xmm0, %xmm0, %k1
   2079 ; CHECK-NEXT:    vpbroadcastd (%rdi), %xmm0 {%k1} {z}
   2080 ; CHECK-NEXT:    retq
   2081   %s = load i32, i32* %p
   2082   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2083   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2084   %cmp = icmp eq <4 x i32> %mask, zeroinitializer
   2085   %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
   2086   ret <4 x i32> %res
   2087 }
   2088 define <8 x i32> @test_i32_to_8_mem(i32* %p) {
   2089 ; CHECK-LABEL: test_i32_to_8_mem:
   2090 ; CHECK:       # %bb.0:
   2091 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
   2092 ; CHECK-NEXT:    retq
   2093   %s = load i32, i32* %p
   2094   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2095   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2096   ret <8 x i32> %res
   2097 }
   2098 define <8 x i32> @test_masked_i32_to_8_mem_mask0(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
   2099 ; CHECK-LABEL: test_masked_i32_to_8_mem_mask0:
   2100 ; CHECK:       # %bb.0:
   2101 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2102 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
   2103 ; CHECK-NEXT:    retq
   2104   %s = load i32, i32* %p
   2105   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2106   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2107   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2108   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
   2109   ret <8 x i32> %res
   2110 }
   2111 
   2112 define <8 x i32> @test_masked_z_i32_to_8_mem_mask0(i32* %p, <8 x i32> %mask) {
   2113 ; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask0:
   2114 ; CHECK:       # %bb.0:
   2115 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2116 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
   2117 ; CHECK-NEXT:    retq
   2118   %s = load i32, i32* %p
   2119   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2120   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2121   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2122   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
   2123   ret <8 x i32> %res
   2124 }
   2125 define <8 x i32> @test_masked_i32_to_8_mem_mask1(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
   2126 ; CHECK-LABEL: test_masked_i32_to_8_mem_mask1:
   2127 ; CHECK:       # %bb.0:
   2128 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2129 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
   2130 ; CHECK-NEXT:    retq
   2131   %s = load i32, i32* %p
   2132   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2133   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2134   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2135   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
   2136   ret <8 x i32> %res
   2137 }
   2138 
   2139 define <8 x i32> @test_masked_z_i32_to_8_mem_mask1(i32* %p, <8 x i32> %mask) {
   2140 ; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask1:
   2141 ; CHECK:       # %bb.0:
   2142 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2143 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
   2144 ; CHECK-NEXT:    retq
   2145   %s = load i32, i32* %p
   2146   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2147   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2148   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2149   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
   2150   ret <8 x i32> %res
   2151 }
   2152 define <8 x i32> @test_masked_i32_to_8_mem_mask2(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
   2153 ; CHECK-LABEL: test_masked_i32_to_8_mem_mask2:
   2154 ; CHECK:       # %bb.0:
   2155 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2156 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
   2157 ; CHECK-NEXT:    retq
   2158   %s = load i32, i32* %p
   2159   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2160   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2161   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2162   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
   2163   ret <8 x i32> %res
   2164 }
   2165 
   2166 define <8 x i32> @test_masked_z_i32_to_8_mem_mask2(i32* %p, <8 x i32> %mask) {
   2167 ; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask2:
   2168 ; CHECK:       # %bb.0:
   2169 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2170 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
   2171 ; CHECK-NEXT:    retq
   2172   %s = load i32, i32* %p
   2173   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2174   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2175   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2176   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
   2177   ret <8 x i32> %res
   2178 }
   2179 define <8 x i32> @test_masked_i32_to_8_mem_mask3(i32* %p, <8 x i32> %default, <8 x i32> %mask) {
   2180 ; CHECK-LABEL: test_masked_i32_to_8_mem_mask3:
   2181 ; CHECK:       # %bb.0:
   2182 ; CHECK-NEXT:    vptestnmd %ymm1, %ymm1, %k1
   2183 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1}
   2184 ; CHECK-NEXT:    retq
   2185   %s = load i32, i32* %p
   2186   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2187   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2188   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2189   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
   2190   ret <8 x i32> %res
   2191 }
   2192 
   2193 define <8 x i32> @test_masked_z_i32_to_8_mem_mask3(i32* %p, <8 x i32> %mask) {
   2194 ; CHECK-LABEL: test_masked_z_i32_to_8_mem_mask3:
   2195 ; CHECK:       # %bb.0:
   2196 ; CHECK-NEXT:    vptestnmd %ymm0, %ymm0, %k1
   2197 ; CHECK-NEXT:    vpbroadcastd (%rdi), %ymm0 {%k1} {z}
   2198 ; CHECK-NEXT:    retq
   2199   %s = load i32, i32* %p
   2200   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2201   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2202   %cmp = icmp eq <8 x i32> %mask, zeroinitializer
   2203   %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
   2204   ret <8 x i32> %res
   2205 }
   2206 define <16 x i32> @test_i32_to_16_mem(i32* %p) {
   2207 ; CHECK-LABEL: test_i32_to_16_mem:
   2208 ; CHECK:       # %bb.0:
   2209 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0
   2210 ; CHECK-NEXT:    retq
   2211   %s = load i32, i32* %p
   2212   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2213   %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2214   ret <16 x i32> %res
   2215 }
   2216 define <16 x i32> @test_masked_i32_to_16_mem_mask0(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
   2217 ; CHECK-LABEL: test_masked_i32_to_16_mem_mask0:
   2218 ; CHECK:       # %bb.0:
   2219 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2220 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
   2221 ; CHECK-NEXT:    retq
   2222   %s = load i32, i32* %p
   2223   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2224   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2225   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2226   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
   2227   ret <16 x i32> %res
   2228 }
   2229 
   2230 define <16 x i32> @test_masked_z_i32_to_16_mem_mask0(i32* %p, <16 x i32> %mask) {
   2231 ; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask0:
   2232 ; CHECK:       # %bb.0:
   2233 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2234 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
   2235 ; CHECK-NEXT:    retq
   2236   %s = load i32, i32* %p
   2237   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2238   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2239   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2240   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
   2241   ret <16 x i32> %res
   2242 }
   2243 define <16 x i32> @test_masked_i32_to_16_mem_mask1(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
   2244 ; CHECK-LABEL: test_masked_i32_to_16_mem_mask1:
   2245 ; CHECK:       # %bb.0:
   2246 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2247 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
   2248 ; CHECK-NEXT:    retq
   2249   %s = load i32, i32* %p
   2250   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2251   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2252   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2253   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
   2254   ret <16 x i32> %res
   2255 }
   2256 
   2257 define <16 x i32> @test_masked_z_i32_to_16_mem_mask1(i32* %p, <16 x i32> %mask) {
   2258 ; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask1:
   2259 ; CHECK:       # %bb.0:
   2260 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2261 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
   2262 ; CHECK-NEXT:    retq
   2263   %s = load i32, i32* %p
   2264   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2265   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2266   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2267   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
   2268   ret <16 x i32> %res
   2269 }
   2270 define <16 x i32> @test_masked_i32_to_16_mem_mask2(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
   2271 ; CHECK-LABEL: test_masked_i32_to_16_mem_mask2:
   2272 ; CHECK:       # %bb.0:
   2273 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2274 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
   2275 ; CHECK-NEXT:    retq
   2276   %s = load i32, i32* %p
   2277   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2278   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2279   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2280   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
   2281   ret <16 x i32> %res
   2282 }
   2283 
   2284 define <16 x i32> @test_masked_z_i32_to_16_mem_mask2(i32* %p, <16 x i32> %mask) {
   2285 ; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask2:
   2286 ; CHECK:       # %bb.0:
   2287 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2288 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
   2289 ; CHECK-NEXT:    retq
   2290   %s = load i32, i32* %p
   2291   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2292   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2293   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2294   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
   2295   ret <16 x i32> %res
   2296 }
   2297 define <16 x i32> @test_masked_i32_to_16_mem_mask3(i32* %p, <16 x i32> %default, <16 x i32> %mask) {
   2298 ; CHECK-LABEL: test_masked_i32_to_16_mem_mask3:
   2299 ; CHECK:       # %bb.0:
   2300 ; CHECK-NEXT:    vptestnmd %zmm1, %zmm1, %k1
   2301 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1}
   2302 ; CHECK-NEXT:    retq
   2303   %s = load i32, i32* %p
   2304   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2305   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2306   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2307   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
   2308   ret <16 x i32> %res
   2309 }
   2310 
   2311 define <16 x i32> @test_masked_z_i32_to_16_mem_mask3(i32* %p, <16 x i32> %mask) {
   2312 ; CHECK-LABEL: test_masked_z_i32_to_16_mem_mask3:
   2313 ; CHECK:       # %bb.0:
   2314 ; CHECK-NEXT:    vptestnmd %zmm0, %zmm0, %k1
   2315 ; CHECK-NEXT:    vpbroadcastd (%rdi), %zmm0 {%k1} {z}
   2316 ; CHECK-NEXT:    retq
   2317   %s = load i32, i32* %p
   2318   %vec = insertelement <2 x i32> undef, i32 %s, i32 0
   2319   %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2320   %cmp = icmp eq <16 x i32> %mask, zeroinitializer
   2321   %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
   2322   ret <16 x i32> %res
   2323 }
   2324 define <2 x i64> @test_i64_to_2_mem(i64* %p) {
   2325 ; CHECK-LABEL: test_i64_to_2_mem:
   2326 ; CHECK:       # %bb.0:
   2327 ; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0
   2328 ; CHECK-NEXT:    retq
   2329   %s = load i64, i64* %p
   2330   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2331   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   2332   ret <2 x i64> %res
   2333 }
   2334 define <2 x i64> @test_masked_i64_to_2_mem_mask0(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
   2335 ; CHECK-LABEL: test_masked_i64_to_2_mem_mask0:
   2336 ; CHECK:       # %bb.0:
   2337 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2338 ; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
   2339 ; CHECK-NEXT:    retq
   2340   %s = load i64, i64* %p
   2341   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2342   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   2343   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   2344   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
   2345   ret <2 x i64> %res
   2346 }
   2347 
   2348 define <2 x i64> @test_masked_z_i64_to_2_mem_mask0(i64* %p, <2 x i64> %mask) {
   2349 ; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask0:
   2350 ; CHECK:       # %bb.0:
   2351 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   2352 ; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1} {z}
   2353 ; CHECK-NEXT:    retq
   2354   %s = load i64, i64* %p
   2355   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2356   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   2357   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   2358   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
   2359   ret <2 x i64> %res
   2360 }
   2361 define <2 x i64> @test_masked_i64_to_2_mem_mask1(i64* %p, <2 x i64> %default, <2 x i64> %mask) {
   2362 ; CHECK-LABEL: test_masked_i64_to_2_mem_mask1:
   2363 ; CHECK:       # %bb.0:
   2364 ; CHECK-NEXT:    vptestnmq %xmm1, %xmm1, %k1
   2365 ; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1}
   2366 ; CHECK-NEXT:    retq
   2367   %s = load i64, i64* %p
   2368   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2369   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   2370   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   2371   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> %default
   2372   ret <2 x i64> %res
   2373 }
   2374 
   2375 define <2 x i64> @test_masked_z_i64_to_2_mem_mask1(i64* %p, <2 x i64> %mask) {
   2376 ; CHECK-LABEL: test_masked_z_i64_to_2_mem_mask1:
   2377 ; CHECK:       # %bb.0:
   2378 ; CHECK-NEXT:    vptestnmq %xmm0, %xmm0, %k1
   2379 ; CHECK-NEXT:    vpbroadcastq (%rdi), %xmm0 {%k1} {z}
   2380 ; CHECK-NEXT:    retq
   2381   %s = load i64, i64* %p
   2382   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2383   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <2 x i32> <i32 0, i32 0>
   2384   %cmp = icmp eq <2 x i64> %mask, zeroinitializer
   2385   %res = select <2 x i1> %cmp, <2 x i64> %shuf, <2 x i64> zeroinitializer
   2386   ret <2 x i64> %res
   2387 }
   2388 define <4 x i64> @test_i64_to_4_mem(i64* %p) {
   2389 ; CHECK-LABEL: test_i64_to_4_mem:
   2390 ; CHECK:       # %bb.0:
   2391 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
   2392 ; CHECK-NEXT:    retq
   2393   %s = load i64, i64* %p
   2394   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2395   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2396   ret <4 x i64> %res
   2397 }
   2398 define <4 x i64> @test_masked_i64_to_4_mem_mask0(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
   2399 ; CHECK-LABEL: test_masked_i64_to_4_mem_mask0:
   2400 ; CHECK:       # %bb.0:
   2401 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2402 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
   2403 ; CHECK-NEXT:    retq
   2404   %s = load i64, i64* %p
   2405   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2406   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2407   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2408   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   2409   ret <4 x i64> %res
   2410 }
   2411 
   2412 define <4 x i64> @test_masked_z_i64_to_4_mem_mask0(i64* %p, <4 x i64> %mask) {
   2413 ; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask0:
   2414 ; CHECK:       # %bb.0:
   2415 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2416 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
   2417 ; CHECK-NEXT:    retq
   2418   %s = load i64, i64* %p
   2419   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2420   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2421   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2422   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   2423   ret <4 x i64> %res
   2424 }
   2425 define <4 x i64> @test_masked_i64_to_4_mem_mask1(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
   2426 ; CHECK-LABEL: test_masked_i64_to_4_mem_mask1:
   2427 ; CHECK:       # %bb.0:
   2428 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2429 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
   2430 ; CHECK-NEXT:    retq
   2431   %s = load i64, i64* %p
   2432   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2433   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2434   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2435   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   2436   ret <4 x i64> %res
   2437 }
   2438 
   2439 define <4 x i64> @test_masked_z_i64_to_4_mem_mask1(i64* %p, <4 x i64> %mask) {
   2440 ; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask1:
   2441 ; CHECK:       # %bb.0:
   2442 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2443 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
   2444 ; CHECK-NEXT:    retq
   2445   %s = load i64, i64* %p
   2446   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2447   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2448   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2449   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   2450   ret <4 x i64> %res
   2451 }
   2452 define <4 x i64> @test_masked_i64_to_4_mem_mask2(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
   2453 ; CHECK-LABEL: test_masked_i64_to_4_mem_mask2:
   2454 ; CHECK:       # %bb.0:
   2455 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2456 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
   2457 ; CHECK-NEXT:    retq
   2458   %s = load i64, i64* %p
   2459   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2460   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2461   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2462   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   2463   ret <4 x i64> %res
   2464 }
   2465 
   2466 define <4 x i64> @test_masked_z_i64_to_4_mem_mask2(i64* %p, <4 x i64> %mask) {
   2467 ; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask2:
   2468 ; CHECK:       # %bb.0:
   2469 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2470 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
   2471 ; CHECK-NEXT:    retq
   2472   %s = load i64, i64* %p
   2473   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2474   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2475   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2476   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   2477   ret <4 x i64> %res
   2478 }
   2479 define <4 x i64> @test_masked_i64_to_4_mem_mask3(i64* %p, <4 x i64> %default, <4 x i64> %mask) {
   2480 ; CHECK-LABEL: test_masked_i64_to_4_mem_mask3:
   2481 ; CHECK:       # %bb.0:
   2482 ; CHECK-NEXT:    vptestnmq %ymm1, %ymm1, %k1
   2483 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1}
   2484 ; CHECK-NEXT:    retq
   2485   %s = load i64, i64* %p
   2486   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2487   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2488   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2489   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
   2490   ret <4 x i64> %res
   2491 }
   2492 
   2493 define <4 x i64> @test_masked_z_i64_to_4_mem_mask3(i64* %p, <4 x i64> %mask) {
   2494 ; CHECK-LABEL: test_masked_z_i64_to_4_mem_mask3:
   2495 ; CHECK:       # %bb.0:
   2496 ; CHECK-NEXT:    vptestnmq %ymm0, %ymm0, %k1
   2497 ; CHECK-NEXT:    vpbroadcastq (%rdi), %ymm0 {%k1} {z}
   2498 ; CHECK-NEXT:    retq
   2499   %s = load i64, i64* %p
   2500   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2501   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   2502   %cmp = icmp eq <4 x i64> %mask, zeroinitializer
   2503   %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
   2504   ret <4 x i64> %res
   2505 }
   2506 define <8 x i64> @test_i64_to_8_mem(i64* %p) {
   2507 ; CHECK-LABEL: test_i64_to_8_mem:
   2508 ; CHECK:       # %bb.0:
   2509 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
   2510 ; CHECK-NEXT:    retq
   2511   %s = load i64, i64* %p
   2512   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2513   %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2514   ret <8 x i64> %res
   2515 }
   2516 define <8 x i64> @test_masked_i64_to_8_mem_mask0(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
   2517 ; CHECK-LABEL: test_masked_i64_to_8_mem_mask0:
   2518 ; CHECK:       # %bb.0:
   2519 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2520 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
   2521 ; CHECK-NEXT:    retq
   2522   %s = load i64, i64* %p
   2523   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2524   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2525   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2526   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   2527   ret <8 x i64> %res
   2528 }
   2529 
   2530 define <8 x i64> @test_masked_z_i64_to_8_mem_mask0(i64* %p, <8 x i64> %mask) {
   2531 ; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask0:
   2532 ; CHECK:       # %bb.0:
   2533 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2534 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
   2535 ; CHECK-NEXT:    retq
   2536   %s = load i64, i64* %p
   2537   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2538   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2539   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2540   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   2541   ret <8 x i64> %res
   2542 }
   2543 define <8 x i64> @test_masked_i64_to_8_mem_mask1(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
   2544 ; CHECK-LABEL: test_masked_i64_to_8_mem_mask1:
   2545 ; CHECK:       # %bb.0:
   2546 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2547 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
   2548 ; CHECK-NEXT:    retq
   2549   %s = load i64, i64* %p
   2550   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2551   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2552   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2553   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   2554   ret <8 x i64> %res
   2555 }
   2556 
   2557 define <8 x i64> @test_masked_z_i64_to_8_mem_mask1(i64* %p, <8 x i64> %mask) {
   2558 ; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask1:
   2559 ; CHECK:       # %bb.0:
   2560 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2561 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
   2562 ; CHECK-NEXT:    retq
   2563   %s = load i64, i64* %p
   2564   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2565   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2566   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2567   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   2568   ret <8 x i64> %res
   2569 }
   2570 define <8 x i64> @test_masked_i64_to_8_mem_mask2(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
   2571 ; CHECK-LABEL: test_masked_i64_to_8_mem_mask2:
   2572 ; CHECK:       # %bb.0:
   2573 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2574 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
   2575 ; CHECK-NEXT:    retq
   2576   %s = load i64, i64* %p
   2577   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2578   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2579   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2580   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   2581   ret <8 x i64> %res
   2582 }
   2583 
   2584 define <8 x i64> @test_masked_z_i64_to_8_mem_mask2(i64* %p, <8 x i64> %mask) {
   2585 ; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask2:
   2586 ; CHECK:       # %bb.0:
   2587 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2588 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
   2589 ; CHECK-NEXT:    retq
   2590   %s = load i64, i64* %p
   2591   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2592   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2593   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2594   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   2595   ret <8 x i64> %res
   2596 }
   2597 define <8 x i64> @test_masked_i64_to_8_mem_mask3(i64* %p, <8 x i64> %default, <8 x i64> %mask) {
   2598 ; CHECK-LABEL: test_masked_i64_to_8_mem_mask3:
   2599 ; CHECK:       # %bb.0:
   2600 ; CHECK-NEXT:    vptestnmq %zmm1, %zmm1, %k1
   2601 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1}
   2602 ; CHECK-NEXT:    retq
   2603   %s = load i64, i64* %p
   2604   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2605   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2606   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2607   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
   2608   ret <8 x i64> %res
   2609 }
   2610 
   2611 define <8 x i64> @test_masked_z_i64_to_8_mem_mask3(i64* %p, <8 x i64> %mask) {
   2612 ; CHECK-LABEL: test_masked_z_i64_to_8_mem_mask3:
   2613 ; CHECK:       # %bb.0:
   2614 ; CHECK-NEXT:    vptestnmq %zmm0, %zmm0, %k1
   2615 ; CHECK-NEXT:    vpbroadcastq (%rdi), %zmm0 {%k1} {z}
   2616 ; CHECK-NEXT:    retq
   2617   %s = load i64, i64* %p
   2618   %vec = insertelement <2 x i64> undef, i64 %s, i32 0
   2619   %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   2620   %cmp = icmp eq <8 x i64> %mask, zeroinitializer
   2621   %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
   2622   ret <8 x i64> %res
   2623 }
   2624