Home | History | Annotate | Download | only in avx512-shuffles
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
      3 
      4 define <4 x double> @test_double_to_4(double %s) {
      5 ; CHECK-LABEL: test_double_to_4:
      6 ; CHECK:       # %bb.0:
      7 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0
      8 ; CHECK-NEXT:    retq
      9   %vec = insertelement <2 x double> undef, double %s, i32 0
     10   %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     11   ret <4 x double> %res
     12 }
     13 define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) {
     14 ; CHECK-LABEL: test_masked_double_to_4_mask0:
     15 ; CHECK:       # %bb.0:
     16 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
     17 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
     18 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
     19 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
     20 ; CHECK-NEXT:    retq
     21   %vec = insertelement <2 x double> undef, double %s, i32 0
     22   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     23   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     24   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
     25   ret <4 x double> %res
     26 }
     27 
     28 define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) {
     29 ; CHECK-LABEL: test_masked_z_double_to_4_mask0:
     30 ; CHECK:       # %bb.0:
     31 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     32 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
     33 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
     34 ; CHECK-NEXT:    retq
     35   %vec = insertelement <2 x double> undef, double %s, i32 0
     36   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     37   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     38   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
     39   ret <4 x double> %res
     40 }
     41 define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) {
     42 ; CHECK-LABEL: test_masked_double_to_4_mask1:
     43 ; CHECK:       # %bb.0:
     44 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
     45 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
     46 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
     47 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
     48 ; CHECK-NEXT:    retq
     49   %vec = insertelement <2 x double> undef, double %s, i32 0
     50   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     51   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     52   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
     53   ret <4 x double> %res
     54 }
     55 
     56 define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) {
     57 ; CHECK-LABEL: test_masked_z_double_to_4_mask1:
     58 ; CHECK:       # %bb.0:
     59 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     60 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
     61 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
     62 ; CHECK-NEXT:    retq
     63   %vec = insertelement <2 x double> undef, double %s, i32 0
     64   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     65   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     66   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
     67   ret <4 x double> %res
     68 }
     69 define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) {
     70 ; CHECK-LABEL: test_masked_double_to_4_mask2:
     71 ; CHECK:       # %bb.0:
     72 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
     73 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
     74 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
     75 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
     76 ; CHECK-NEXT:    retq
     77   %vec = insertelement <2 x double> undef, double %s, i32 0
     78   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     79   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     80   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
     81   ret <4 x double> %res
     82 }
     83 
     84 define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) {
     85 ; CHECK-LABEL: test_masked_z_double_to_4_mask2:
     86 ; CHECK:       # %bb.0:
     87 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
     88 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
     89 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
     90 ; CHECK-NEXT:    retq
     91   %vec = insertelement <2 x double> undef, double %s, i32 0
     92   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     93   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
     94   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
     95   ret <4 x double> %res
     96 }
     97 define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) {
     98 ; CHECK-LABEL: test_masked_double_to_4_mask3:
     99 ; CHECK:       # %bb.0:
    100 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    101 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
    102 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm1 {%k1}
    103 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    104 ; CHECK-NEXT:    retq
    105   %vec = insertelement <2 x double> undef, double %s, i32 0
    106   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    107   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    108   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
    109   ret <4 x double> %res
    110 }
    111 
    112 define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) {
    113 ; CHECK-LABEL: test_masked_z_double_to_4_mask3:
    114 ; CHECK:       # %bb.0:
    115 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    116 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
    117 ; CHECK-NEXT:    vbroadcastsd %xmm0, %ymm0 {%k1} {z}
    118 ; CHECK-NEXT:    retq
    119   %vec = insertelement <2 x double> undef, double %s, i32 0
    120   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    121   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    122   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    123   ret <4 x double> %res
    124 }
    125 define <8 x double> @test_double_to_8(double %s) {
    126 ; CHECK-LABEL: test_double_to_8:
    127 ; CHECK:       # %bb.0:
    128 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0
    129 ; CHECK-NEXT:    retq
    130   %vec = insertelement <2 x double> undef, double %s, i32 0
    131   %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    132   ret <8 x double> %res
    133 }
    134 define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) {
    135 ; CHECK-LABEL: test_masked_double_to_8_mask0:
    136 ; CHECK:       # %bb.0:
    137 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    138 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
    139 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
    140 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    141 ; CHECK-NEXT:    retq
    142   %vec = insertelement <2 x double> undef, double %s, i32 0
    143   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    144   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    145   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    146   ret <8 x double> %res
    147 }
    148 
    149 define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) {
    150 ; CHECK-LABEL: test_masked_z_double_to_8_mask0:
    151 ; CHECK:       # %bb.0:
    152 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    153 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    154 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
    155 ; CHECK-NEXT:    retq
    156   %vec = insertelement <2 x double> undef, double %s, i32 0
    157   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    158   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    159   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    160   ret <8 x double> %res
    161 }
    162 define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) {
    163 ; CHECK-LABEL: test_masked_double_to_8_mask1:
    164 ; CHECK:       # %bb.0:
    165 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    166 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
    167 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
    168 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    169 ; CHECK-NEXT:    retq
    170   %vec = insertelement <2 x double> undef, double %s, i32 0
    171   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    172   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    173   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    174   ret <8 x double> %res
    175 }
    176 
    177 define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) {
    178 ; CHECK-LABEL: test_masked_z_double_to_8_mask1:
    179 ; CHECK:       # %bb.0:
    180 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    181 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    182 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
    183 ; CHECK-NEXT:    retq
    184   %vec = insertelement <2 x double> undef, double %s, i32 0
    185   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    186   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    187   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    188   ret <8 x double> %res
    189 }
    190 define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) {
    191 ; CHECK-LABEL: test_masked_double_to_8_mask2:
    192 ; CHECK:       # %bb.0:
    193 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    194 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
    195 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
    196 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    197 ; CHECK-NEXT:    retq
    198   %vec = insertelement <2 x double> undef, double %s, i32 0
    199   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    200   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    201   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    202   ret <8 x double> %res
    203 }
    204 
    205 define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) {
    206 ; CHECK-LABEL: test_masked_z_double_to_8_mask2:
    207 ; CHECK:       # %bb.0:
    208 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    209 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    210 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
    211 ; CHECK-NEXT:    retq
    212   %vec = insertelement <2 x double> undef, double %s, i32 0
    213   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    214   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    215   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    216   ret <8 x double> %res
    217 }
    218 define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) {
    219 ; CHECK-LABEL: test_masked_double_to_8_mask3:
    220 ; CHECK:       # %bb.0:
    221 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    222 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
    223 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm1 {%k1}
    224 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
    225 ; CHECK-NEXT:    retq
    226   %vec = insertelement <2 x double> undef, double %s, i32 0
    227   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    228   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    229   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    230   ret <8 x double> %res
    231 }
    232 
    233 define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) {
    234 ; CHECK-LABEL: test_masked_z_double_to_8_mask3:
    235 ; CHECK:       # %bb.0:
    236 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    237 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    238 ; CHECK-NEXT:    vbroadcastsd %xmm0, %zmm0 {%k1} {z}
    239 ; CHECK-NEXT:    retq
    240   %vec = insertelement <2 x double> undef, double %s, i32 0
    241   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    242   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    243   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    244   ret <8 x double> %res
    245 }
    246 define <4 x float> @test_float_to_4(float %s) {
    247 ; CHECK-LABEL: test_float_to_4:
    248 ; CHECK:       # %bb.0:
    249 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0
    250 ; CHECK-NEXT:    retq
    251   %vec = insertelement <2 x float> undef, float %s, i32 0
    252   %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    253   ret <4 x float> %res
    254 }
    255 define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) {
    256 ; CHECK-LABEL: test_masked_float_to_4_mask0:
    257 ; CHECK:       # %bb.0:
    258 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    259 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    260 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
    261 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    262 ; CHECK-NEXT:    retq
    263   %vec = insertelement <2 x float> undef, float %s, i32 0
    264   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    265   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    266   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    267   ret <4 x float> %res
    268 }
    269 
    270 define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) {
    271 ; CHECK-LABEL: test_masked_z_float_to_4_mask0:
    272 ; CHECK:       # %bb.0:
    273 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    274 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    275 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
    276 ; CHECK-NEXT:    retq
    277   %vec = insertelement <2 x float> undef, float %s, i32 0
    278   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    279   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    280   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    281   ret <4 x float> %res
    282 }
    283 define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) {
    284 ; CHECK-LABEL: test_masked_float_to_4_mask1:
    285 ; CHECK:       # %bb.0:
    286 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    287 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    288 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
    289 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    290 ; CHECK-NEXT:    retq
    291   %vec = insertelement <2 x float> undef, float %s, i32 0
    292   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    293   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    294   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    295   ret <4 x float> %res
    296 }
    297 
    298 define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) {
    299 ; CHECK-LABEL: test_masked_z_float_to_4_mask1:
    300 ; CHECK:       # %bb.0:
    301 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    302 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    303 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
    304 ; CHECK-NEXT:    retq
    305   %vec = insertelement <2 x float> undef, float %s, i32 0
    306   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    307   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    308   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    309   ret <4 x float> %res
    310 }
    311 define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) {
    312 ; CHECK-LABEL: test_masked_float_to_4_mask2:
    313 ; CHECK:       # %bb.0:
    314 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    315 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    316 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
    317 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    318 ; CHECK-NEXT:    retq
    319   %vec = insertelement <2 x float> undef, float %s, i32 0
    320   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    321   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    322   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    323   ret <4 x float> %res
    324 }
    325 
    326 define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) {
    327 ; CHECK-LABEL: test_masked_z_float_to_4_mask2:
    328 ; CHECK:       # %bb.0:
    329 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    330 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    331 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
    332 ; CHECK-NEXT:    retq
    333   %vec = insertelement <2 x float> undef, float %s, i32 0
    334   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    335   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    336   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    337   ret <4 x float> %res
    338 }
    339 define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) {
    340 ; CHECK-LABEL: test_masked_float_to_4_mask3:
    341 ; CHECK:       # %bb.0:
    342 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    343 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    344 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm1 {%k1}
    345 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    346 ; CHECK-NEXT:    retq
    347   %vec = insertelement <2 x float> undef, float %s, i32 0
    348   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    349   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    350   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    351   ret <4 x float> %res
    352 }
    353 
    354 define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) {
    355 ; CHECK-LABEL: test_masked_z_float_to_4_mask3:
    356 ; CHECK:       # %bb.0:
    357 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    358 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    359 ; CHECK-NEXT:    vbroadcastss %xmm0, %xmm0 {%k1} {z}
    360 ; CHECK-NEXT:    retq
    361   %vec = insertelement <2 x float> undef, float %s, i32 0
    362   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    363   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    364   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    365   ret <4 x float> %res
    366 }
    367 define <8 x float> @test_float_to_8(float %s) {
    368 ; CHECK-LABEL: test_float_to_8:
    369 ; CHECK:       # %bb.0:
    370 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0
    371 ; CHECK-NEXT:    retq
    372   %vec = insertelement <2 x float> undef, float %s, i32 0
    373   %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    374   ret <8 x float> %res
    375 }
    376 define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) {
    377 ; CHECK-LABEL: test_masked_float_to_8_mask0:
    378 ; CHECK:       # %bb.0:
    379 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    380 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    381 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
    382 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    383 ; CHECK-NEXT:    retq
    384   %vec = insertelement <2 x float> undef, float %s, i32 0
    385   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    386   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    387   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
    388   ret <8 x float> %res
    389 }
    390 
    391 define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) {
    392 ; CHECK-LABEL: test_masked_z_float_to_8_mask0:
    393 ; CHECK:       # %bb.0:
    394 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    395 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    396 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
    397 ; CHECK-NEXT:    retq
    398   %vec = insertelement <2 x float> undef, float %s, i32 0
    399   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    400   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    401   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    402   ret <8 x float> %res
    403 }
    404 define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) {
    405 ; CHECK-LABEL: test_masked_float_to_8_mask1:
    406 ; CHECK:       # %bb.0:
    407 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    408 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    409 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
    410 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    411 ; CHECK-NEXT:    retq
    412   %vec = insertelement <2 x float> undef, float %s, i32 0
    413   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    414   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    415   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
    416   ret <8 x float> %res
    417 }
    418 
    419 define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) {
    420 ; CHECK-LABEL: test_masked_z_float_to_8_mask1:
    421 ; CHECK:       # %bb.0:
    422 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    423 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    424 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
    425 ; CHECK-NEXT:    retq
    426   %vec = insertelement <2 x float> undef, float %s, i32 0
    427   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    428   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    429   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    430   ret <8 x float> %res
    431 }
    432 define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) {
    433 ; CHECK-LABEL: test_masked_float_to_8_mask2:
    434 ; CHECK:       # %bb.0:
    435 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    436 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    437 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
    438 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    439 ; CHECK-NEXT:    retq
    440   %vec = insertelement <2 x float> undef, float %s, i32 0
    441   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    442   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    443   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
    444   ret <8 x float> %res
    445 }
    446 
    447 define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) {
    448 ; CHECK-LABEL: test_masked_z_float_to_8_mask2:
    449 ; CHECK:       # %bb.0:
    450 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    451 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    452 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
    453 ; CHECK-NEXT:    retq
    454   %vec = insertelement <2 x float> undef, float %s, i32 0
    455   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    456   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    457   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    458   ret <8 x float> %res
    459 }
    460 define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) {
    461 ; CHECK-LABEL: test_masked_float_to_8_mask3:
    462 ; CHECK:       # %bb.0:
    463 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    464 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    465 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm1 {%k1}
    466 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    467 ; CHECK-NEXT:    retq
    468   %vec = insertelement <2 x float> undef, float %s, i32 0
    469   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    470   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    471   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
    472   ret <8 x float> %res
    473 }
    474 
    475 define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) {
    476 ; CHECK-LABEL: test_masked_z_float_to_8_mask3:
    477 ; CHECK:       # %bb.0:
    478 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    479 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    480 ; CHECK-NEXT:    vbroadcastss %xmm0, %ymm0 {%k1} {z}
    481 ; CHECK-NEXT:    retq
    482   %vec = insertelement <2 x float> undef, float %s, i32 0
    483   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    484   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    485   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    486   ret <8 x float> %res
    487 }
    488 define <16 x float> @test_float_to_16(float %s) {
    489 ; CHECK-LABEL: test_float_to_16:
    490 ; CHECK:       # %bb.0:
    491 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0
    492 ; CHECK-NEXT:    retq
    493   %vec = insertelement <2 x float> undef, float %s, i32 0
    494   %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    495   ret <16 x float> %res
    496 }
    497 define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) {
    498 ; CHECK-LABEL: test_masked_float_to_16_mask0:
    499 ; CHECK:       # %bb.0:
    500 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    501 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    502 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
    503 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    504 ; CHECK-NEXT:    retq
    505   %vec = insertelement <2 x float> undef, float %s, i32 0
    506   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    507   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    508   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
    509   ret <16 x float> %res
    510 }
    511 
    512 define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) {
    513 ; CHECK-LABEL: test_masked_z_float_to_16_mask0:
    514 ; CHECK:       # %bb.0:
    515 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    516 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    517 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
    518 ; CHECK-NEXT:    retq
    519   %vec = insertelement <2 x float> undef, float %s, i32 0
    520   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    521   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    522   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    523   ret <16 x float> %res
    524 }
    525 define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) {
    526 ; CHECK-LABEL: test_masked_float_to_16_mask1:
    527 ; CHECK:       # %bb.0:
    528 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    529 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    530 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
    531 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    532 ; CHECK-NEXT:    retq
    533   %vec = insertelement <2 x float> undef, float %s, i32 0
    534   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    535   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    536   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
    537   ret <16 x float> %res
    538 }
    539 
    540 define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) {
    541 ; CHECK-LABEL: test_masked_z_float_to_16_mask1:
    542 ; CHECK:       # %bb.0:
    543 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    544 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    545 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
    546 ; CHECK-NEXT:    retq
    547   %vec = insertelement <2 x float> undef, float %s, i32 0
    548   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    549   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    550   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    551   ret <16 x float> %res
    552 }
    553 define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) {
    554 ; CHECK-LABEL: test_masked_float_to_16_mask2:
    555 ; CHECK:       # %bb.0:
    556 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    557 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    558 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
    559 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    560 ; CHECK-NEXT:    retq
    561   %vec = insertelement <2 x float> undef, float %s, i32 0
    562   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    563   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    564   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
    565   ret <16 x float> %res
    566 }
    567 
    568 define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) {
    569 ; CHECK-LABEL: test_masked_z_float_to_16_mask2:
    570 ; CHECK:       # %bb.0:
    571 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    572 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    573 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
    574 ; CHECK-NEXT:    retq
    575   %vec = insertelement <2 x float> undef, float %s, i32 0
    576   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    577   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    578   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    579   ret <16 x float> %res
    580 }
    581 define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) {
    582 ; CHECK-LABEL: test_masked_float_to_16_mask3:
    583 ; CHECK:       # %bb.0:
    584 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    585 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    586 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm1 {%k1}
    587 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    588 ; CHECK-NEXT:    retq
    589   %vec = insertelement <2 x float> undef, float %s, i32 0
    590   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    591   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    592   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
    593   ret <16 x float> %res
    594 }
    595 
    596 define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) {
    597 ; CHECK-LABEL: test_masked_z_float_to_16_mask3:
    598 ; CHECK:       # %bb.0:
    599 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    600 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    601 ; CHECK-NEXT:    vbroadcastss %xmm0, %zmm0 {%k1} {z}
    602 ; CHECK-NEXT:    retq
    603   %vec = insertelement <2 x float> undef, float %s, i32 0
    604   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    605   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    606   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    607   ret <16 x float> %res
    608 }
    609 define <4 x double> @test_double_to_4_mem(double* %p) {
    610 ; CHECK-LABEL: test_double_to_4_mem:
    611 ; CHECK:       # %bb.0:
    612 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0
    613 ; CHECK-NEXT:    retq
    614   %s = load double, double* %p
    615   %vec = insertelement <2 x double> undef, double %s, i32 0
    616   %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    617   ret <4 x double> %res
    618 }
    619 define <4 x double> @test_masked_double_to_4_mem_mask0(double* %p, <4 x double> %default, <4 x double> %mask) {
    620 ; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
    621 ; CHECK:       # %bb.0:
    622 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    623 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
    624 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
    625 ; CHECK-NEXT:    retq
    626   %s = load double, double* %p
    627   %vec = insertelement <2 x double> undef, double %s, i32 0
    628   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    629   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    630   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
    631   ret <4 x double> %res
    632 }
    633 
    634 define <4 x double> @test_masked_z_double_to_4_mem_mask0(double* %p, <4 x double> %mask) {
    635 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
    636 ; CHECK:       # %bb.0:
    637 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    638 ; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
    639 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
    640 ; CHECK-NEXT:    retq
    641   %s = load double, double* %p
    642   %vec = insertelement <2 x double> undef, double %s, i32 0
    643   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    644   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    645   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    646   ret <4 x double> %res
    647 }
    648 define <4 x double> @test_masked_double_to_4_mem_mask1(double* %p, <4 x double> %default, <4 x double> %mask) {
    649 ; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
    650 ; CHECK:       # %bb.0:
    651 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    652 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
    653 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
    654 ; CHECK-NEXT:    retq
    655   %s = load double, double* %p
    656   %vec = insertelement <2 x double> undef, double %s, i32 0
    657   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    658   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    659   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
    660   ret <4 x double> %res
    661 }
    662 
    663 define <4 x double> @test_masked_z_double_to_4_mem_mask1(double* %p, <4 x double> %mask) {
    664 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
    665 ; CHECK:       # %bb.0:
    666 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    667 ; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
    668 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
    669 ; CHECK-NEXT:    retq
    670   %s = load double, double* %p
    671   %vec = insertelement <2 x double> undef, double %s, i32 0
    672   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    673   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    674   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    675   ret <4 x double> %res
    676 }
    677 define <4 x double> @test_masked_double_to_4_mem_mask2(double* %p, <4 x double> %default, <4 x double> %mask) {
    678 ; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
    679 ; CHECK:       # %bb.0:
    680 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    681 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
    682 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
    683 ; CHECK-NEXT:    retq
    684   %s = load double, double* %p
    685   %vec = insertelement <2 x double> undef, double %s, i32 0
    686   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    687   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    688   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
    689   ret <4 x double> %res
    690 }
    691 
    692 define <4 x double> @test_masked_z_double_to_4_mem_mask2(double* %p, <4 x double> %mask) {
    693 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
    694 ; CHECK:       # %bb.0:
    695 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    696 ; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
    697 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
    698 ; CHECK-NEXT:    retq
    699   %s = load double, double* %p
    700   %vec = insertelement <2 x double> undef, double %s, i32 0
    701   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    702   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    703   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    704   ret <4 x double> %res
    705 }
    706 define <4 x double> @test_masked_double_to_4_mem_mask3(double* %p, <4 x double> %default, <4 x double> %mask) {
    707 ; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
    708 ; CHECK:       # %bb.0:
    709 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    710 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
    711 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1}
    712 ; CHECK-NEXT:    retq
    713   %s = load double, double* %p
    714   %vec = insertelement <2 x double> undef, double %s, i32 0
    715   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    716   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    717   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
    718   ret <4 x double> %res
    719 }
    720 
    721 define <4 x double> @test_masked_z_double_to_4_mem_mask3(double* %p, <4 x double> %mask) {
    722 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
    723 ; CHECK:       # %bb.0:
    724 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    725 ; CHECK-NEXT:    vcmpeqpd %ymm1, %ymm0, %k1
    726 ; CHECK-NEXT:    vbroadcastsd (%rdi), %ymm0 {%k1} {z}
    727 ; CHECK-NEXT:    retq
    728   %s = load double, double* %p
    729   %vec = insertelement <2 x double> undef, double %s, i32 0
    730   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    731   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    732   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    733   ret <4 x double> %res
    734 }
    735 define <8 x double> @test_double_to_8_mem(double* %p) {
    736 ; CHECK-LABEL: test_double_to_8_mem:
    737 ; CHECK:       # %bb.0:
    738 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0
    739 ; CHECK-NEXT:    retq
    740   %s = load double, double* %p
    741   %vec = insertelement <2 x double> undef, double %s, i32 0
    742   %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    743   ret <8 x double> %res
    744 }
    745 define <8 x double> @test_masked_double_to_8_mem_mask0(double* %p, <8 x double> %default, <8 x double> %mask) {
    746 ; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
    747 ; CHECK:       # %bb.0:
    748 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    749 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    750 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
    751 ; CHECK-NEXT:    retq
    752   %s = load double, double* %p
    753   %vec = insertelement <2 x double> undef, double %s, i32 0
    754   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    755   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    756   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    757   ret <8 x double> %res
    758 }
    759 
    760 define <8 x double> @test_masked_z_double_to_8_mem_mask0(double* %p, <8 x double> %mask) {
    761 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
    762 ; CHECK:       # %bb.0:
    763 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    764 ; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
    765 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
    766 ; CHECK-NEXT:    retq
    767   %s = load double, double* %p
    768   %vec = insertelement <2 x double> undef, double %s, i32 0
    769   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    770   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    771   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    772   ret <8 x double> %res
    773 }
    774 define <8 x double> @test_masked_double_to_8_mem_mask1(double* %p, <8 x double> %default, <8 x double> %mask) {
    775 ; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
    776 ; CHECK:       # %bb.0:
    777 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    778 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    779 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
    780 ; CHECK-NEXT:    retq
    781   %s = load double, double* %p
    782   %vec = insertelement <2 x double> undef, double %s, i32 0
    783   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    784   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    785   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    786   ret <8 x double> %res
    787 }
    788 
    789 define <8 x double> @test_masked_z_double_to_8_mem_mask1(double* %p, <8 x double> %mask) {
    790 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
    791 ; CHECK:       # %bb.0:
    792 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    793 ; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
    794 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
    795 ; CHECK-NEXT:    retq
    796   %s = load double, double* %p
    797   %vec = insertelement <2 x double> undef, double %s, i32 0
    798   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    799   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    800   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    801   ret <8 x double> %res
    802 }
    803 define <8 x double> @test_masked_double_to_8_mem_mask2(double* %p, <8 x double> %default, <8 x double> %mask) {
    804 ; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
    805 ; CHECK:       # %bb.0:
    806 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    807 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    808 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
    809 ; CHECK-NEXT:    retq
    810   %s = load double, double* %p
    811   %vec = insertelement <2 x double> undef, double %s, i32 0
    812   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    813   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    814   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    815   ret <8 x double> %res
    816 }
    817 
    818 define <8 x double> @test_masked_z_double_to_8_mem_mask2(double* %p, <8 x double> %mask) {
    819 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
    820 ; CHECK:       # %bb.0:
    821 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    822 ; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
    823 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
    824 ; CHECK-NEXT:    retq
    825   %s = load double, double* %p
    826   %vec = insertelement <2 x double> undef, double %s, i32 0
    827   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    828   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    829   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    830   ret <8 x double> %res
    831 }
    832 define <8 x double> @test_masked_double_to_8_mem_mask3(double* %p, <8 x double> %default, <8 x double> %mask) {
    833 ; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
    834 ; CHECK:       # %bb.0:
    835 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    836 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
    837 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1}
    838 ; CHECK-NEXT:    retq
    839   %s = load double, double* %p
    840   %vec = insertelement <2 x double> undef, double %s, i32 0
    841   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    842   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    843   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
    844   ret <8 x double> %res
    845 }
    846 
    847 define <8 x double> @test_masked_z_double_to_8_mem_mask3(double* %p, <8 x double> %mask) {
    848 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
    849 ; CHECK:       # %bb.0:
    850 ; CHECK-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
    851 ; CHECK-NEXT:    vcmpeqpd %zmm1, %zmm0, %k1
    852 ; CHECK-NEXT:    vbroadcastsd (%rdi), %zmm0 {%k1} {z}
    853 ; CHECK-NEXT:    retq
    854   %s = load double, double* %p
    855   %vec = insertelement <2 x double> undef, double %s, i32 0
    856   %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    857   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
    858   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
    859   ret <8 x double> %res
    860 }
    861 define <4 x float> @test_float_to_4_mem(float* %p) {
    862 ; CHECK-LABEL: test_float_to_4_mem:
    863 ; CHECK:       # %bb.0:
    864 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0
    865 ; CHECK-NEXT:    retq
    866   %s = load float, float* %p
    867   %vec = insertelement <2 x float> undef, float %s, i32 0
    868   %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    869   ret <4 x float> %res
    870 }
    871 define <4 x float> @test_masked_float_to_4_mem_mask0(float* %p, <4 x float> %default, <4 x float> %mask) {
    872 ; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
    873 ; CHECK:       # %bb.0:
    874 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    875 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    876 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
    877 ; CHECK-NEXT:    retq
    878   %s = load float, float* %p
    879   %vec = insertelement <2 x float> undef, float %s, i32 0
    880   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    881   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    882   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    883   ret <4 x float> %res
    884 }
    885 
    886 define <4 x float> @test_masked_z_float_to_4_mem_mask0(float* %p, <4 x float> %mask) {
    887 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
    888 ; CHECK:       # %bb.0:
    889 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    890 ; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
    891 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
    892 ; CHECK-NEXT:    retq
    893   %s = load float, float* %p
    894   %vec = insertelement <2 x float> undef, float %s, i32 0
    895   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    896   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    897   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    898   ret <4 x float> %res
    899 }
    900 define <4 x float> @test_masked_float_to_4_mem_mask1(float* %p, <4 x float> %default, <4 x float> %mask) {
    901 ; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
    902 ; CHECK:       # %bb.0:
    903 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    904 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    905 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
    906 ; CHECK-NEXT:    retq
    907   %s = load float, float* %p
    908   %vec = insertelement <2 x float> undef, float %s, i32 0
    909   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    910   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    911   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    912   ret <4 x float> %res
    913 }
    914 
    915 define <4 x float> @test_masked_z_float_to_4_mem_mask1(float* %p, <4 x float> %mask) {
    916 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
    917 ; CHECK:       # %bb.0:
    918 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    919 ; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
    920 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
    921 ; CHECK-NEXT:    retq
    922   %s = load float, float* %p
    923   %vec = insertelement <2 x float> undef, float %s, i32 0
    924   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    925   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    926   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    927   ret <4 x float> %res
    928 }
    929 define <4 x float> @test_masked_float_to_4_mem_mask2(float* %p, <4 x float> %default, <4 x float> %mask) {
    930 ; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
    931 ; CHECK:       # %bb.0:
    932 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    933 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    934 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
    935 ; CHECK-NEXT:    retq
    936   %s = load float, float* %p
    937   %vec = insertelement <2 x float> undef, float %s, i32 0
    938   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    939   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    940   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    941   ret <4 x float> %res
    942 }
    943 
    944 define <4 x float> @test_masked_z_float_to_4_mem_mask2(float* %p, <4 x float> %mask) {
    945 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
    946 ; CHECK:       # %bb.0:
    947 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    948 ; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
    949 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
    950 ; CHECK-NEXT:    retq
    951   %s = load float, float* %p
    952   %vec = insertelement <2 x float> undef, float %s, i32 0
    953   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    954   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    955   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    956   ret <4 x float> %res
    957 }
    958 define <4 x float> @test_masked_float_to_4_mem_mask3(float* %p, <4 x float> %default, <4 x float> %mask) {
    959 ; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
    960 ; CHECK:       # %bb.0:
    961 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    962 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    963 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1}
    964 ; CHECK-NEXT:    retq
    965   %s = load float, float* %p
    966   %vec = insertelement <2 x float> undef, float %s, i32 0
    967   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    968   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    969   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
    970   ret <4 x float> %res
    971 }
    972 
    973 define <4 x float> @test_masked_z_float_to_4_mem_mask3(float* %p, <4 x float> %mask) {
    974 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
    975 ; CHECK:       # %bb.0:
    976 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
    977 ; CHECK-NEXT:    vcmpeqps %xmm1, %xmm0, %k1
    978 ; CHECK-NEXT:    vbroadcastss (%rdi), %xmm0 {%k1} {z}
    979 ; CHECK-NEXT:    retq
    980   %s = load float, float* %p
    981   %vec = insertelement <2 x float> undef, float %s, i32 0
    982   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    983   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    984   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    985   ret <4 x float> %res
    986 }
    987 define <8 x float> @test_float_to_8_mem(float* %p) {
    988 ; CHECK-LABEL: test_float_to_8_mem:
    989 ; CHECK:       # %bb.0:
    990 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0
    991 ; CHECK-NEXT:    retq
    992   %s = load float, float* %p
    993   %vec = insertelement <2 x float> undef, float %s, i32 0
    994   %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    995   ret <8 x float> %res
    996 }
    997 define <8 x float> @test_masked_float_to_8_mem_mask0(float* %p, <8 x float> %default, <8 x float> %mask) {
    998 ; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
    999 ; CHECK:       # %bb.0:
   1000 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1001 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
   1002 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
   1003 ; CHECK-NEXT:    retq
   1004   %s = load float, float* %p
   1005   %vec = insertelement <2 x float> undef, float %s, i32 0
   1006   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1007   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1008   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
   1009   ret <8 x float> %res
   1010 }
   1011 
   1012 define <8 x float> @test_masked_z_float_to_8_mem_mask0(float* %p, <8 x float> %mask) {
   1013 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
   1014 ; CHECK:       # %bb.0:
   1015 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1016 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
   1017 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
   1018 ; CHECK-NEXT:    retq
   1019   %s = load float, float* %p
   1020   %vec = insertelement <2 x float> undef, float %s, i32 0
   1021   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1022   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1023   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
   1024   ret <8 x float> %res
   1025 }
   1026 define <8 x float> @test_masked_float_to_8_mem_mask1(float* %p, <8 x float> %default, <8 x float> %mask) {
   1027 ; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
   1028 ; CHECK:       # %bb.0:
   1029 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1030 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
   1031 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
   1032 ; CHECK-NEXT:    retq
   1033   %s = load float, float* %p
   1034   %vec = insertelement <2 x float> undef, float %s, i32 0
   1035   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1036   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1037   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
   1038   ret <8 x float> %res
   1039 }
   1040 
   1041 define <8 x float> @test_masked_z_float_to_8_mem_mask1(float* %p, <8 x float> %mask) {
   1042 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
   1043 ; CHECK:       # %bb.0:
   1044 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1045 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
   1046 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
   1047 ; CHECK-NEXT:    retq
   1048   %s = load float, float* %p
   1049   %vec = insertelement <2 x float> undef, float %s, i32 0
   1050   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1051   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1052   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
   1053   ret <8 x float> %res
   1054 }
   1055 define <8 x float> @test_masked_float_to_8_mem_mask2(float* %p, <8 x float> %default, <8 x float> %mask) {
   1056 ; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
   1057 ; CHECK:       # %bb.0:
   1058 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1059 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
   1060 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
   1061 ; CHECK-NEXT:    retq
   1062   %s = load float, float* %p
   1063   %vec = insertelement <2 x float> undef, float %s, i32 0
   1064   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1065   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1066   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
   1067   ret <8 x float> %res
   1068 }
   1069 
   1070 define <8 x float> @test_masked_z_float_to_8_mem_mask2(float* %p, <8 x float> %mask) {
   1071 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
   1072 ; CHECK:       # %bb.0:
   1073 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1074 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
   1075 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
   1076 ; CHECK-NEXT:    retq
   1077   %s = load float, float* %p
   1078   %vec = insertelement <2 x float> undef, float %s, i32 0
   1079   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1080   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1081   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
   1082   ret <8 x float> %res
   1083 }
   1084 define <8 x float> @test_masked_float_to_8_mem_mask3(float* %p, <8 x float> %default, <8 x float> %mask) {
   1085 ; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
   1086 ; CHECK:       # %bb.0:
   1087 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1088 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
   1089 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1}
   1090 ; CHECK-NEXT:    retq
   1091   %s = load float, float* %p
   1092   %vec = insertelement <2 x float> undef, float %s, i32 0
   1093   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1094   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1095   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
   1096   ret <8 x float> %res
   1097 }
   1098 
   1099 define <8 x float> @test_masked_z_float_to_8_mem_mask3(float* %p, <8 x float> %mask) {
   1100 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
   1101 ; CHECK:       # %bb.0:
   1102 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1103 ; CHECK-NEXT:    vcmpeqps %ymm1, %ymm0, %k1
   1104 ; CHECK-NEXT:    vbroadcastss (%rdi), %ymm0 {%k1} {z}
   1105 ; CHECK-NEXT:    retq
   1106   %s = load float, float* %p
   1107   %vec = insertelement <2 x float> undef, float %s, i32 0
   1108   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1109   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
   1110   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
   1111   ret <8 x float> %res
   1112 }
   1113 define <16 x float> @test_float_to_16_mem(float* %p) {
   1114 ; CHECK-LABEL: test_float_to_16_mem:
   1115 ; CHECK:       # %bb.0:
   1116 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0
   1117 ; CHECK-NEXT:    retq
   1118   %s = load float, float* %p
   1119   %vec = insertelement <2 x float> undef, float %s, i32 0
   1120   %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1121   ret <16 x float> %res
   1122 }
   1123 define <16 x float> @test_masked_float_to_16_mem_mask0(float* %p, <16 x float> %default, <16 x float> %mask) {
   1124 ; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
   1125 ; CHECK:       # %bb.0:
   1126 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1127 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
   1128 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
   1129 ; CHECK-NEXT:    retq
   1130   %s = load float, float* %p
   1131   %vec = insertelement <2 x float> undef, float %s, i32 0
   1132   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1133   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1134   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
   1135   ret <16 x float> %res
   1136 }
   1137 
   1138 define <16 x float> @test_masked_z_float_to_16_mem_mask0(float* %p, <16 x float> %mask) {
   1139 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
   1140 ; CHECK:       # %bb.0:
   1141 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1142 ; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
   1143 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
   1144 ; CHECK-NEXT:    retq
   1145   %s = load float, float* %p
   1146   %vec = insertelement <2 x float> undef, float %s, i32 0
   1147   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1148   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1149   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
   1150   ret <16 x float> %res
   1151 }
   1152 define <16 x float> @test_masked_float_to_16_mem_mask1(float* %p, <16 x float> %default, <16 x float> %mask) {
   1153 ; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
   1154 ; CHECK:       # %bb.0:
   1155 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1156 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
   1157 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
   1158 ; CHECK-NEXT:    retq
   1159   %s = load float, float* %p
   1160   %vec = insertelement <2 x float> undef, float %s, i32 0
   1161   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1162   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1163   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
   1164   ret <16 x float> %res
   1165 }
   1166 
   1167 define <16 x float> @test_masked_z_float_to_16_mem_mask1(float* %p, <16 x float> %mask) {
   1168 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
   1169 ; CHECK:       # %bb.0:
   1170 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1171 ; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
   1172 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
   1173 ; CHECK-NEXT:    retq
   1174   %s = load float, float* %p
   1175   %vec = insertelement <2 x float> undef, float %s, i32 0
   1176   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1177   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1178   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
   1179   ret <16 x float> %res
   1180 }
   1181 define <16 x float> @test_masked_float_to_16_mem_mask2(float* %p, <16 x float> %default, <16 x float> %mask) {
   1182 ; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
   1183 ; CHECK:       # %bb.0:
   1184 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1185 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
   1186 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
   1187 ; CHECK-NEXT:    retq
   1188   %s = load float, float* %p
   1189   %vec = insertelement <2 x float> undef, float %s, i32 0
   1190   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1191   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1192   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
   1193   ret <16 x float> %res
   1194 }
   1195 
   1196 define <16 x float> @test_masked_z_float_to_16_mem_mask2(float* %p, <16 x float> %mask) {
   1197 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
   1198 ; CHECK:       # %bb.0:
   1199 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1200 ; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
   1201 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
   1202 ; CHECK-NEXT:    retq
   1203   %s = load float, float* %p
   1204   %vec = insertelement <2 x float> undef, float %s, i32 0
   1205   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1206   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1207   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
   1208   ret <16 x float> %res
   1209 }
   1210 define <16 x float> @test_masked_float_to_16_mem_mask3(float* %p, <16 x float> %default, <16 x float> %mask) {
   1211 ; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
   1212 ; CHECK:       # %bb.0:
   1213 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
   1214 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
   1215 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1}
   1216 ; CHECK-NEXT:    retq
   1217   %s = load float, float* %p
   1218   %vec = insertelement <2 x float> undef, float %s, i32 0
   1219   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1220   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1221   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
   1222   ret <16 x float> %res
   1223 }
   1224 
   1225 define <16 x float> @test_masked_z_float_to_16_mem_mask3(float* %p, <16 x float> %mask) {
   1226 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
   1227 ; CHECK:       # %bb.0:
   1228 ; CHECK-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1229 ; CHECK-NEXT:    vcmpeqps %zmm1, %zmm0, %k1
   1230 ; CHECK-NEXT:    vbroadcastss (%rdi), %zmm0 {%k1} {z}
   1231 ; CHECK-NEXT:    retq
   1232   %s = load float, float* %p
   1233   %vec = insertelement <2 x float> undef, float %s, i32 0
   1234   %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1235   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
   1236   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
   1237   ret <16 x float> %res
   1238 }
   1239