Home | History | Annotate | Download | only in avx512-shuffles
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
      3 
      4 define <4 x float> @test_4xfloat_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2) {
      5 ; CHECK-LABEL: test_4xfloat_shuff_mask0:
      6 ; CHECK:       # %bb.0:
      7 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,1],xmm1[3,1]
      8 ; CHECK-NEXT:    retq
      9   %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5>
     10   ret <4 x float> %res
     11 }
     12 define <4 x float> @test_4xfloat_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
     13 ; CHECK-LABEL: test_4xfloat_masked_shuff_mask0:
     14 ; CHECK:       # %bb.0:
     15 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
     16 ; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
     17 ; CHECK-NEXT:    vshufps {{.*#+}} xmm2 {%k1} = xmm0[2,1],xmm1[3,1]
     18 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
     19 ; CHECK-NEXT:    retq
     20   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5>
     21   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     22   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
     23   ret <4 x float> %res
     24 }
     25 
     26 define <4 x float> @test_4xfloat_zero_masked_shuff_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
     27 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask0:
     28 ; CHECK:       # %bb.0:
     29 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
     30 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
     31 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],xmm1[3,1]
     32 ; CHECK-NEXT:    retq
     33   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 5>
     34   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     35   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
     36   ret <4 x float> %res
     37 }
     38 define <4 x float> @test_4xfloat_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
     39 ; CHECK-LABEL: test_4xfloat_masked_shuff_mask1:
     40 ; CHECK:       # %bb.0:
     41 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
     42 ; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
     43 ; CHECK-NEXT:    vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,2],xmm1[3,2]
     44 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
     45 ; CHECK-NEXT:    retq
     46   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6>
     47   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     48   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
     49   ret <4 x float> %res
     50 }
     51 
     52 define <4 x float> @test_4xfloat_zero_masked_shuff_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
     53 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask1:
     54 ; CHECK:       # %bb.0:
     55 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
     56 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
     57 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2],xmm1[3,2]
     58 ; CHECK-NEXT:    retq
     59   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 2, i32 7, i32 6>
     60   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     61   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
     62   ret <4 x float> %res
     63 }
     64 define <4 x float> @test_4xfloat_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
     65 ; CHECK-LABEL: test_4xfloat_masked_shuff_mask2:
     66 ; CHECK:       # %bb.0:
     67 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
     68 ; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
     69 ; CHECK-NEXT:    vshufps {{.*#+}} xmm2 {%k1} = xmm0[1,3],xmm1[2,1]
     70 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
     71 ; CHECK-NEXT:    retq
     72   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5>
     73   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     74   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
     75   ret <4 x float> %res
     76 }
     77 
     78 define <4 x float> @test_4xfloat_zero_masked_shuff_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
     79 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask2:
     80 ; CHECK:       # %bb.0:
     81 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
     82 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
     83 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],xmm1[2,1]
     84 ; CHECK-NEXT:    retq
     85   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 5>
     86   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
     87   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
     88   ret <4 x float> %res
     89 }
     90 define <4 x float> @test_4xfloat_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2) {
     91 ; CHECK-LABEL: test_4xfloat_shuff_mask3:
     92 ; CHECK:       # %bb.0:
     93 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[3,3],xmm1[3,3]
     94 ; CHECK-NEXT:    retq
     95   %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7>
     96   ret <4 x float> %res
     97 }
     98 define <4 x float> @test_4xfloat_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
     99 ; CHECK-LABEL: test_4xfloat_masked_shuff_mask3:
    100 ; CHECK:       # %bb.0:
    101 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    102 ; CHECK-NEXT:    vcmpeqps %xmm4, %xmm3, %k1
    103 ; CHECK-NEXT:    vshufps {{.*#+}} xmm2 {%k1} = xmm0[3,3],xmm1[3,3]
    104 ; CHECK-NEXT:    vmovaps %xmm2, %xmm0
    105 ; CHECK-NEXT:    retq
    106   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7>
    107   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    108   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
    109   ret <4 x float> %res
    110 }
    111 
    112 define <4 x float> @test_4xfloat_zero_masked_shuff_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
    113 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mask3:
    114 ; CHECK:       # %bb.0:
    115 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    116 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    117 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],xmm1[3,3]
    118 ; CHECK-NEXT:    retq
    119   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 7, i32 7>
    120   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    121   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    122   ret <4 x float> %res
    123 }
    124 define <4 x float> @test_4xfloat_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p) {
    125 ; CHECK-LABEL: test_4xfloat_shuff_mem_mask0:
    126 ; CHECK:       # %bb.0:
    127 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,0],mem[1,2]
    128 ; CHECK-NEXT:    retq
    129   %vec2 = load <4 x float>, <4 x float>* %vec2p
    130   %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6>
    131   ret <4 x float> %res
    132 }
    133 define <4 x float> @test_4xfloat_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
    134 ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask0:
    135 ; CHECK:       # %bb.0:
    136 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    137 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    138 ; CHECK-NEXT:    vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,0],mem[1,2]
    139 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    140 ; CHECK-NEXT:    retq
    141   %vec2 = load <4 x float>, <4 x float>* %vec2p
    142   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6>
    143   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    144   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
    145   ret <4 x float> %res
    146 }
    147 
    148 define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask0(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
    149 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask0:
    150 ; CHECK:       # %bb.0:
    151 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    152 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    153 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0],mem[1,2]
    154 ; CHECK-NEXT:    retq
    155   %vec2 = load <4 x float>, <4 x float>* %vec2p
    156   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 0, i32 5, i32 6>
    157   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    158   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    159   ret <4 x float> %res
    160 }
    161 
    162 define <4 x float> @test_4xfloat_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
    163 ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask1:
    164 ; CHECK:       # %bb.0:
    165 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    166 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    167 ; CHECK-NEXT:    vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,3],mem[1,3]
    168 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    169 ; CHECK-NEXT:    retq
    170   %vec2 = load <4 x float>, <4 x float>* %vec2p
    171   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 5, i32 7>
    172   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    173   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
    174   ret <4 x float> %res
    175 }
    176 
    177 define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask1(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
    178 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask1:
    179 ; CHECK:       # %bb.0:
    180 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    181 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    182 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3],mem[1,3]
    183 ; CHECK-NEXT:    retq
    184   %vec2 = load <4 x float>, <4 x float>* %vec2p
    185   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 3, i32 3, i32 5, i32 7>
    186   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    187   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    188   ret <4 x float> %res
    189 }
    190 
    191 define <4 x float> @test_4xfloat_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
    192 ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask2:
    193 ; CHECK:       # %bb.0:
    194 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    195 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    196 ; CHECK-NEXT:    vshufps {{.*#+}} xmm1 {%k1} = xmm0[1,3],mem[2,0]
    197 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    198 ; CHECK-NEXT:    retq
    199   %vec2 = load <4 x float>, <4 x float>* %vec2p
    200   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 4>
    201   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    202   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
    203   ret <4 x float> %res
    204 }
    205 
    206 define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask2(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
    207 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask2:
    208 ; CHECK:       # %bb.0:
    209 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    210 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    211 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3],mem[2,0]
    212 ; CHECK-NEXT:    retq
    213   %vec2 = load <4 x float>, <4 x float>* %vec2p
    214   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 1, i32 3, i32 6, i32 4>
    215   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    216   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    217   ret <4 x float> %res
    218 }
    219 
    220 define <4 x float> @test_4xfloat_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p) {
    221 ; CHECK-LABEL: test_4xfloat_shuff_mem_mask3:
    222 ; CHECK:       # %bb.0:
    223 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[2,1],mem[3,2]
    224 ; CHECK-NEXT:    retq
    225   %vec2 = load <4 x float>, <4 x float>* %vec2p
    226   %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6>
    227   ret <4 x float> %res
    228 }
    229 define <4 x float> @test_4xfloat_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %vec3, <4 x float> %mask) {
    230 ; CHECK-LABEL: test_4xfloat_masked_shuff_mem_mask3:
    231 ; CHECK:       # %bb.0:
    232 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    233 ; CHECK-NEXT:    vcmpeqps %xmm3, %xmm2, %k1
    234 ; CHECK-NEXT:    vshufps {{.*#+}} xmm1 {%k1} = xmm0[2,1],mem[3,2]
    235 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    236 ; CHECK-NEXT:    retq
    237   %vec2 = load <4 x float>, <4 x float>* %vec2p
    238   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6>
    239   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    240   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
    241   ret <4 x float> %res
    242 }
    243 
    244 define <4 x float> @test_4xfloat_zero_masked_shuff_mem_mask3(<4 x float> %vec1, <4 x float>* %vec2p, <4 x float> %mask) {
    245 ; CHECK-LABEL: test_4xfloat_zero_masked_shuff_mem_mask3:
    246 ; CHECK:       # %bb.0:
    247 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    248 ; CHECK-NEXT:    vcmpeqps %xmm2, %xmm1, %k1
    249 ; CHECK-NEXT:    vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1],mem[3,2]
    250 ; CHECK-NEXT:    retq
    251   %vec2 = load <4 x float>, <4 x float>* %vec2p
    252   %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 1, i32 7, i32 6>
    253   %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
    254   %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
    255   ret <4 x float> %res
    256 }
    257 
    258 define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) {
    259 ; CHECK-LABEL: test_8xfloat_shuff_mask0:
    260 ; CHECK:       # %bb.0:
    261 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
    262 ; CHECK-NEXT:    retq
    263   %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14>
    264   ret <8 x float> %res
    265 }
    266 define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
    267 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask0:
    268 ; CHECK:       # %bb.0:
    269 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    270 ; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
    271 ; CHECK-NEXT:    vshufps {{.*#+}} ymm2 {%k1} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
    272 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
    273 ; CHECK-NEXT:    retq
    274   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14>
    275   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    276   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    277   ret <8 x float> %res
    278 }
    279 
    280 define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
    281 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0:
    282 ; CHECK:       # %bb.0:
    283 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    284 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    285 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3],ymm1[0,2],ymm0[5,7],ymm1[4,6]
    286 ; CHECK-NEXT:    retq
    287   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 1, i32 3, i32 8, i32 10, i32 5, i32 7, i32 12, i32 14>
    288   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    289   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    290   ret <8 x float> %res
    291 }
    292 define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
    293 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask1:
    294 ; CHECK:       # %bb.0:
    295 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    296 ; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
    297 ; CHECK-NEXT:    vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
    298 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
    299 ; CHECK-NEXT:    retq
    300   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13>
    301   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    302   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    303   ret <8 x float> %res
    304 }
    305 
    306 define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
    307 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1:
    308 ; CHECK:       # %bb.0:
    309 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    310 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    311 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3],ymm1[3,1],ymm0[4,7],ymm1[7,5]
    312 ; CHECK-NEXT:    retq
    313   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 3, i32 11, i32 9, i32 4, i32 7, i32 15, i32 13>
    314   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    315   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    316   ret <8 x float> %res
    317 }
    318 define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
    319 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask2:
    320 ; CHECK:       # %bb.0:
    321 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    322 ; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
    323 ; CHECK-NEXT:    vshufps {{.*#+}} ymm2 {%k1} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
    324 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
    325 ; CHECK-NEXT:    retq
    326   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14>
    327   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    328   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    329   ret <8 x float> %res
    330 }
    331 
    332 define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
    333 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2:
    334 ; CHECK:       # %bb.0:
    335 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    336 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    337 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2],ymm1[2,2],ymm0[4,6],ymm1[6,6]
    338 ; CHECK-NEXT:    retq
    339   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 2, i32 10, i32 10, i32 4, i32 6, i32 14, i32 14>
    340   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    341   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    342   ret <8 x float> %res
    343 }
    344 define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) {
    345 ; CHECK-LABEL: test_8xfloat_shuff_mask3:
    346 ; CHECK:       # %bb.0:
    347 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
    348 ; CHECK-NEXT:    retq
    349   %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14>
    350   ret <8 x float> %res
    351 }
    352 define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
    353 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask3:
    354 ; CHECK:       # %bb.0:
    355 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    356 ; CHECK-NEXT:    vcmpeqps %ymm4, %ymm3, %k1
    357 ; CHECK-NEXT:    vshufps {{.*#+}} ymm2 {%k1} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
    358 ; CHECK-NEXT:    vmovaps %ymm2, %ymm0
    359 ; CHECK-NEXT:    retq
    360   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14>
    361   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    362   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    363   ret <8 x float> %res
    364 }
    365 
    366 define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
    367 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3:
    368 ; CHECK:       # %bb.0:
    369 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    370 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    371 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2],ymm1[3,2],ymm0[7,6],ymm1[7,6]
    372 ; CHECK-NEXT:    retq
    373   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 2, i32 11, i32 10, i32 7, i32 6, i32 15, i32 14>
    374   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    375   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    376   ret <8 x float> %res
    377 }
    378 define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
    379 ; CHECK-LABEL: test_8xfloat_shuff_mem_mask0:
    380 ; CHECK:       # %bb.0:
    381 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
    382 ; CHECK-NEXT:    retq
    383   %vec2 = load <8 x float>, <8 x float>* %vec2p
    384   %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12>
    385   ret <8 x float> %res
    386 }
    387 define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
    388 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0:
    389 ; CHECK:       # %bb.0:
    390 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    391 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    392 ; CHECK-NEXT:    vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
    393 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    394 ; CHECK-NEXT:    retq
    395   %vec2 = load <8 x float>, <8 x float>* %vec2p
    396   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12>
    397   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    398   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    399   ret <8 x float> %res
    400 }
    401 
    402 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
    403 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
    404 ; CHECK:       # %bb.0:
    405 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    406 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    407 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1],mem[0,0],ymm0[6,5],mem[4,4]
    408 ; CHECK-NEXT:    retq
    409   %vec2 = load <8 x float>, <8 x float>* %vec2p
    410   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 1, i32 8, i32 8, i32 6, i32 5, i32 12, i32 12>
    411   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    412   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    413   ret <8 x float> %res
    414 }
    415 
    416 define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
    417 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1:
    418 ; CHECK:       # %bb.0:
    419 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    420 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    421 ; CHECK-NEXT:    vshufps {{.*#+}} ymm1 {%k1} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
    422 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    423 ; CHECK-NEXT:    retq
    424   %vec2 = load <8 x float>, <8 x float>* %vec2p
    425   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 2, i32 9, i32 8, i32 6, i32 6, i32 13, i32 12>
    426   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    427   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    428   ret <8 x float> %res
    429 }
    430 
    431 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
    432 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
    433 ; CHECK:       # %bb.0:
    434 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    435 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    436 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2],mem[1,0],ymm0[6,6],mem[5,4]
    437 ; CHECK-NEXT:    retq
    438   %vec2 = load <8 x float>, <8 x float>* %vec2p
    439   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 2, i32 9, i32 8, i32 6, i32 6, i32 13, i32 12>
    440   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    441   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    442   ret <8 x float> %res
    443 }
    444 
    445 define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
    446 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2:
    447 ; CHECK:       # %bb.0:
    448 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    449 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    450 ; CHECK-NEXT:    vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
    451 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    452 ; CHECK-NEXT:    retq
    453   %vec2 = load <8 x float>, <8 x float>* %vec2p
    454   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 11, i32 11, i32 7, i32 7, i32 15, i32 15>
    455   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    456   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    457   ret <8 x float> %res
    458 }
    459 
    460 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
    461 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
    462 ; CHECK:       # %bb.0:
    463 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    464 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    465 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[3,3],ymm0[7,7],mem[7,7]
    466 ; CHECK-NEXT:    retq
    467   %vec2 = load <8 x float>, <8 x float>* %vec2p
    468   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 11, i32 11, i32 7, i32 7, i32 15, i32 15>
    469   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    470   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    471   ret <8 x float> %res
    472 }
    473 
    474 define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
    475 ; CHECK-LABEL: test_8xfloat_shuff_mem_mask3:
    476 ; CHECK:       # %bb.0:
    477 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
    478 ; CHECK-NEXT:    retq
    479   %vec2 = load <8 x float>, <8 x float>* %vec2p
    480   %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13>
    481   ret <8 x float> %res
    482 }
    483 define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
    484 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3:
    485 ; CHECK:       # %bb.0:
    486 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    487 ; CHECK-NEXT:    vcmpeqps %ymm3, %ymm2, %k1
    488 ; CHECK-NEXT:    vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
    489 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    490 ; CHECK-NEXT:    retq
    491   %vec2 = load <8 x float>, <8 x float>* %vec2p
    492   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13>
    493   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    494   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
    495   ret <8 x float> %res
    496 }
    497 
    498 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
    499 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
    500 ; CHECK:       # %bb.0:
    501 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    502 ; CHECK-NEXT:    vcmpeqps %ymm2, %ymm1, %k1
    503 ; CHECK-NEXT:    vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3],mem[2,1],ymm0[7,7],mem[6,5]
    504 ; CHECK-NEXT:    retq
    505   %vec2 = load <8 x float>, <8 x float>* %vec2p
    506   %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 3, i32 3, i32 10, i32 9, i32 7, i32 7, i32 14, i32 13>
    507   %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
    508   %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
    509   ret <8 x float> %res
    510 }
    511 
    512 define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2) {
    513 ; CHECK-LABEL: test_16xfloat_shuff_mask0:
    514 ; CHECK:       # %bb.0:
    515 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
    516 ; CHECK-NEXT:    retq
    517   %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30>
    518   ret <16 x float> %res
    519 }
    520 define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
    521 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0:
    522 ; CHECK:       # %bb.0:
    523 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    524 ; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
    525 ; CHECK-NEXT:    vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
    526 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    527 ; CHECK-NEXT:    retq
    528   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30>
    529   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    530   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    531   ret <16 x float> %res
    532 }
    533 
    534 define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
    535 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0:
    536 ; CHECK:       # %bb.0:
    537 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    538 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    539 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2],zmm1[3,2],zmm0[7,6],zmm1[7,6],zmm0[11,10],zmm1[11,10],zmm0[15,14],zmm1[15,14]
    540 ; CHECK-NEXT:    retq
    541   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 2, i32 19, i32 18, i32 7, i32 6, i32 23, i32 22, i32 11, i32 10, i32 27, i32 26, i32 15, i32 14, i32 31, i32 30>
    542   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    543   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    544   ret <16 x float> %res
    545 }
    546 define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
    547 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1:
    548 ; CHECK:       # %bb.0:
    549 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    550 ; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
    551 ; CHECK-NEXT:    vshufps {{.*#+}} zmm2 {%k1} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
    552 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    553 ; CHECK-NEXT:    retq
    554   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31>
    555   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    556   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    557   ret <16 x float> %res
    558 }
    559 
    560 define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
    561 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1:
    562 ; CHECK:       # %bb.0:
    563 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    564 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    565 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2],zmm1[3,3],zmm0[5,6],zmm1[7,7],zmm0[9,10],zmm1[11,11],zmm0[13,14],zmm1[15,15]
    566 ; CHECK-NEXT:    retq
    567   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 1, i32 2, i32 19, i32 19, i32 5, i32 6, i32 23, i32 23, i32 9, i32 10, i32 27, i32 27, i32 13, i32 14, i32 31, i32 31>
    568   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    569   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    570   ret <16 x float> %res
    571 }
    572 define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
    573 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2:
    574 ; CHECK:       # %bb.0:
    575 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    576 ; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
    577 ; CHECK-NEXT:    vshufps {{.*#+}} zmm2 {%k1} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
    578 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    579 ; CHECK-NEXT:    retq
    580   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29>
    581   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    582   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    583   ret <16 x float> %res
    584 }
    585 
    586 define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
    587 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2:
    588 ; CHECK:       # %bb.0:
    589 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    590 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    591 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],zmm1[2,1],zmm0[7,4],zmm1[6,5],zmm0[11,8],zmm1[10,9],zmm0[15,12],zmm1[14,13]
    592 ; CHECK-NEXT:    retq
    593   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 18, i32 17, i32 7, i32 4, i32 22, i32 21, i32 11, i32 8, i32 26, i32 25, i32 15, i32 12, i32 30, i32 29>
    594   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    595   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    596   ret <16 x float> %res
    597 }
    598 define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) {
    599 ; CHECK-LABEL: test_16xfloat_shuff_mask3:
    600 ; CHECK:       # %bb.0:
    601 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
    602 ; CHECK-NEXT:    retq
    603   %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30>
    604   ret <16 x float> %res
    605 }
    606 define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
    607 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3:
    608 ; CHECK:       # %bb.0:
    609 ; CHECK-NEXT:    vxorps %xmm4, %xmm4, %xmm4
    610 ; CHECK-NEXT:    vcmpeqps %zmm4, %zmm3, %k1
    611 ; CHECK-NEXT:    vshufps {{.*#+}} zmm2 {%k1} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
    612 ; CHECK-NEXT:    vmovaps %zmm2, %zmm0
    613 ; CHECK-NEXT:    retq
    614   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30>
    615   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    616   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    617   ret <16 x float> %res
    618 }
    619 
    620 define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
    621 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3:
    622 ; CHECK:       # %bb.0:
    623 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    624 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    625 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3],zmm1[0,2],zmm0[6,7],zmm1[4,6],zmm0[10,11],zmm1[8,10],zmm0[14,15],zmm1[12,14]
    626 ; CHECK-NEXT:    retq
    627   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 3, i32 16, i32 18, i32 6, i32 7, i32 20, i32 22, i32 10, i32 11, i32 24, i32 26, i32 14, i32 15, i32 28, i32 30>
    628   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    629   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    630   ret <16 x float> %res
    631 }
    632 define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
    633 ; CHECK-LABEL: test_16xfloat_shuff_mem_mask0:
    634 ; CHECK:       # %bb.0:
    635 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
    636 ; CHECK-NEXT:    retq
    637   %vec2 = load <16 x float>, <16 x float>* %vec2p
    638   %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30>
    639   ret <16 x float> %res
    640 }
    641 define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
    642 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0:
    643 ; CHECK:       # %bb.0:
    644 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    645 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    646 ; CHECK-NEXT:    vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
    647 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    648 ; CHECK-NEXT:    retq
    649   %vec2 = load <16 x float>, <16 x float>* %vec2p
    650   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30>
    651   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    652   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    653   ret <16 x float> %res
    654 }
    655 
    656 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
    657 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
    658 ; CHECK:       # %bb.0:
    659 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    660 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    661 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0],mem[0,2],zmm0[7,4],mem[4,6],zmm0[11,8],mem[8,10],zmm0[15,12],mem[12,14]
    662 ; CHECK-NEXT:    retq
    663   %vec2 = load <16 x float>, <16 x float>* %vec2p
    664   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 3, i32 0, i32 16, i32 18, i32 7, i32 4, i32 20, i32 22, i32 11, i32 8, i32 24, i32 26, i32 15, i32 12, i32 28, i32 30>
    665   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    666   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    667   ret <16 x float> %res
    668 }
    669 
    670 define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
    671 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1:
    672 ; CHECK:       # %bb.0:
    673 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    674 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    675 ; CHECK-NEXT:    vshufps {{.*#+}} zmm1 {%k1} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
    676 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    677 ; CHECK-NEXT:    retq
    678   %vec2 = load <16 x float>, <16 x float>* %vec2p
    679   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 2, i32 19, i32 18, i32 4, i32 6, i32 23, i32 22, i32 8, i32 10, i32 27, i32 26, i32 12, i32 14, i32 31, i32 30>
    680   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    681   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    682   ret <16 x float> %res
    683 }
    684 
    685 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
    686 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
    687 ; CHECK:       # %bb.0:
    688 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    689 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    690 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[0,2],mem[3,2],zmm0[4,6],mem[7,6],zmm0[8,10],mem[11,10],zmm0[12,14],mem[15,14]
    691 ; CHECK-NEXT:    retq
    692   %vec2 = load <16 x float>, <16 x float>* %vec2p
    693   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 2, i32 19, i32 18, i32 4, i32 6, i32 23, i32 22, i32 8, i32 10, i32 27, i32 26, i32 12, i32 14, i32 31, i32 30>
    694   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    695   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    696   ret <16 x float> %res
    697 }
    698 
    699 define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
    700 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2:
    701 ; CHECK:       # %bb.0:
    702 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    703 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    704 ; CHECK-NEXT:    vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
    705 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    706 ; CHECK-NEXT:    retq
    707   %vec2 = load <16 x float>, <16 x float>* %vec2p
    708   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 0, i32 18, i32 18, i32 6, i32 4, i32 22, i32 22, i32 10, i32 8, i32 26, i32 26, i32 14, i32 12, i32 30, i32 30>
    709   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    710   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    711   ret <16 x float> %res
    712 }
    713 
    714 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
    715 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
    716 ; CHECK:       # %bb.0:
    717 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    718 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    719 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0],mem[2,2],zmm0[6,4],mem[6,6],zmm0[10,8],mem[10,10],zmm0[14,12],mem[14,14]
    720 ; CHECK-NEXT:    retq
    721   %vec2 = load <16 x float>, <16 x float>* %vec2p
    722   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 0, i32 18, i32 18, i32 6, i32 4, i32 22, i32 22, i32 10, i32 8, i32 26, i32 26, i32 14, i32 12, i32 30, i32 30>
    723   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    724   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    725   ret <16 x float> %res
    726 }
    727 
    728 define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
    729 ; CHECK-LABEL: test_16xfloat_shuff_mem_mask3:
    730 ; CHECK:       # %bb.0:
    731 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
    732 ; CHECK-NEXT:    retq
    733   %vec2 = load <16 x float>, <16 x float>* %vec2p
    734   %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31>
    735   ret <16 x float> %res
    736 }
    737 define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
    738 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3:
    739 ; CHECK:       # %bb.0:
    740 ; CHECK-NEXT:    vxorps %xmm3, %xmm3, %xmm3
    741 ; CHECK-NEXT:    vcmpeqps %zmm3, %zmm2, %k1
    742 ; CHECK-NEXT:    vshufps {{.*#+}} zmm1 {%k1} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
    743 ; CHECK-NEXT:    vmovaps %zmm1, %zmm0
    744 ; CHECK-NEXT:    retq
    745   %vec2 = load <16 x float>, <16 x float>* %vec2p
    746   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31>
    747   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    748   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
    749   ret <16 x float> %res
    750 }
    751 
    752 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
    753 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
    754 ; CHECK:       # %bb.0:
    755 ; CHECK-NEXT:    vxorps %xmm2, %xmm2, %xmm2
    756 ; CHECK-NEXT:    vcmpeqps %zmm2, %zmm1, %k1
    757 ; CHECK-NEXT:    vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1],mem[1,3],zmm0[6,5],mem[5,7],zmm0[10,9],mem[9,11],zmm0[14,13],mem[13,15]
    758 ; CHECK-NEXT:    retq
    759   %vec2 = load <16 x float>, <16 x float>* %vec2p
    760   %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 1, i32 17, i32 19, i32 6, i32 5, i32 21, i32 23, i32 10, i32 9, i32 25, i32 27, i32 14, i32 13, i32 29, i32 31>
    761   %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
    762   %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
    763   ret <16 x float> %res
    764 }
    765 
    766 define <2 x double> @test_2xdouble_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2) {
    767 ; CHECK-LABEL: test_2xdouble_shuff_mask0:
    768 ; CHECK:       # %bb.0:
    769 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],xmm1[0]
    770 ; CHECK-NEXT:    retq
    771   %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    772   ret <2 x double> %res
    773 }
    774 define <2 x double> @test_2xdouble_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
    775 ; CHECK-LABEL: test_2xdouble_masked_shuff_mask0:
    776 ; CHECK:       # %bb.0:
    777 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    778 ; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
    779 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
    780 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0
    781 ; CHECK-NEXT:    retq
    782   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    783   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    784   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
    785   ret <2 x double> %res
    786 }
    787 
    788 define <2 x double> @test_2xdouble_zero_masked_shuff_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
    789 ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask0:
    790 ; CHECK:       # %bb.0:
    791 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    792 ; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
    793 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
    794 ; CHECK-NEXT:    retq
    795   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    796   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    797   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
    798   ret <2 x double> %res
    799 }
    800 define <2 x double> @test_2xdouble_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
    801 ; CHECK-LABEL: test_2xdouble_masked_shuff_mask1:
    802 ; CHECK:       # %bb.0:
    803 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    804 ; CHECK-NEXT:    vcmpeqpd %xmm4, %xmm3, %k1
    805 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
    806 ; CHECK-NEXT:    vmovapd %xmm2, %xmm0
    807 ; CHECK-NEXT:    retq
    808   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    809   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    810   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
    811   ret <2 x double> %res
    812 }
    813 
    814 define <2 x double> @test_2xdouble_zero_masked_shuff_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
    815 ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mask1:
    816 ; CHECK:       # %bb.0:
    817 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    818 ; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
    819 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
    820 ; CHECK-NEXT:    retq
    821   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    822   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    823   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
    824   ret <2 x double> %res
    825 }
    826 define <2 x double> @test_2xdouble_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p) {
    827 ; CHECK-LABEL: test_2xdouble_shuff_mem_mask0:
    828 ; CHECK:       # %bb.0:
    829 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 = xmm0[1],mem[0]
    830 ; CHECK-NEXT:    retq
    831   %vec2 = load <2 x double>, <2 x double>* %vec2p
    832   %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    833   ret <2 x double> %res
    834 }
    835 define <2 x double> @test_2xdouble_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
    836 ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask0:
    837 ; CHECK:       # %bb.0:
    838 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    839 ; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
    840 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
    841 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    842 ; CHECK-NEXT:    retq
    843   %vec2 = load <2 x double>, <2 x double>* %vec2p
    844   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    845   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    846   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
    847   ret <2 x double> %res
    848 }
    849 
    850 define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask0(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
    851 ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask0:
    852 ; CHECK:       # %bb.0:
    853 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    854 ; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
    855 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
    856 ; CHECK-NEXT:    retq
    857   %vec2 = load <2 x double>, <2 x double>* %vec2p
    858   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    859   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    860   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
    861   ret <2 x double> %res
    862 }
    863 
    864 define <2 x double> @test_2xdouble_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %vec3, <2 x double> %mask) {
    865 ; CHECK-LABEL: test_2xdouble_masked_shuff_mem_mask1:
    866 ; CHECK:       # %bb.0:
    867 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    868 ; CHECK-NEXT:    vcmpeqpd %xmm3, %xmm2, %k1
    869 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[0]
    870 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    871 ; CHECK-NEXT:    retq
    872   %vec2 = load <2 x double>, <2 x double>* %vec2p
    873   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    874   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    875   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
    876   ret <2 x double> %res
    877 }
    878 
    879 define <2 x double> @test_2xdouble_zero_masked_shuff_mem_mask1(<2 x double> %vec1, <2 x double>* %vec2p, <2 x double> %mask) {
    880 ; CHECK-LABEL: test_2xdouble_zero_masked_shuff_mem_mask1:
    881 ; CHECK:       # %bb.0:
    882 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
    883 ; CHECK-NEXT:    vcmpeqpd %xmm2, %xmm1, %k1
    884 ; CHECK-NEXT:    vshufpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[0]
    885 ; CHECK-NEXT:    retq
    886   %vec2 = load <2 x double>, <2 x double>* %vec2p
    887   %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 2>
    888   %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
    889   %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
    890   ret <2 x double> %res
    891 }
    892 
    893 define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) {
    894 ; CHECK-LABEL: test_4xdouble_shuff_mask0:
    895 ; CHECK:       # %bb.0:
    896 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
    897 ; CHECK-NEXT:    retq
    898   %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
    899   ret <4 x double> %res
    900 }
    901 define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
    902 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask0:
    903 ; CHECK:       # %bb.0:
    904 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    905 ; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
    906 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
    907 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    908 ; CHECK-NEXT:    retq
    909   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
    910   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    911   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
    912   ret <4 x double> %res
    913 }
    914 
    915 define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
    916 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0:
    917 ; CHECK:       # %bb.0:
    918 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    919 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
    920 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[3]
    921 ; CHECK-NEXT:    retq
    922   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 7>
    923   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    924   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    925   ret <4 x double> %res
    926 }
    927 define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
    928 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask1:
    929 ; CHECK:       # %bb.0:
    930 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    931 ; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
    932 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
    933 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    934 ; CHECK-NEXT:    retq
    935   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6>
    936   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    937   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
    938   ret <4 x double> %res
    939 }
    940 
    941 define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
    942 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1:
    943 ; CHECK:       # %bb.0:
    944 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    945 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
    946 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[3],ymm1[2]
    947 ; CHECK-NEXT:    retq
    948   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6>
    949   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    950   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    951   ret <4 x double> %res
    952 }
    953 define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
    954 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask2:
    955 ; CHECK:       # %bb.0:
    956 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    957 ; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
    958 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
    959 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    960 ; CHECK-NEXT:    retq
    961   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
    962   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    963   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
    964   ret <4 x double> %res
    965 }
    966 
    967 define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
    968 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2:
    969 ; CHECK:       # %bb.0:
    970 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
    971 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
    972 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[0],ymm0[3],ymm1[2]
    973 ; CHECK-NEXT:    retq
    974   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 4, i32 3, i32 6>
    975   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    976   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
    977   ret <4 x double> %res
    978 }
    979 define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) {
    980 ; CHECK-LABEL: test_4xdouble_shuff_mask3:
    981 ; CHECK:       # %bb.0:
    982 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
    983 ; CHECK-NEXT:    retq
    984   %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
    985   ret <4 x double> %res
    986 }
    987 define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
    988 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask3:
    989 ; CHECK:       # %bb.0:
    990 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
    991 ; CHECK-NEXT:    vcmpeqpd %ymm4, %ymm3, %k1
    992 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
    993 ; CHECK-NEXT:    vmovapd %ymm2, %ymm0
    994 ; CHECK-NEXT:    retq
    995   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
    996   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
    997   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
    998   ret <4 x double> %res
    999 }
   1000 
   1001 define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
   1002 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3:
   1003 ; CHECK:       # %bb.0:
   1004 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1005 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
   1006 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[3]
   1007 ; CHECK-NEXT:    retq
   1008   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 7>
   1009   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1010   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
   1011   ret <4 x double> %res
   1012 }
   1013 define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
   1014 ; CHECK-LABEL: test_4xdouble_shuff_mem_mask0:
   1015 ; CHECK:       # %bb.0:
   1016 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[2]
   1017 ; CHECK-NEXT:    retq
   1018   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1019   %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6>
   1020   ret <4 x double> %res
   1021 }
   1022 define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
   1023 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0:
   1024 ; CHECK:       # %bb.0:
   1025 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1026 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
   1027 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[2]
   1028 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1029 ; CHECK-NEXT:    retq
   1030   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1031   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6>
   1032   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1033   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
   1034   ret <4 x double> %res
   1035 }
   1036 
   1037 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
   1038 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
   1039 ; CHECK:       # %bb.0:
   1040 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1041 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
   1042 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[2]
   1043 ; CHECK-NEXT:    retq
   1044   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1045   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 6>
   1046   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1047   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
   1048   ret <4 x double> %res
   1049 }
   1050 
   1051 define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
   1052 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1:
   1053 ; CHECK:       # %bb.0:
   1054 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1055 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
   1056 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[1],ymm0[2],mem[2]
   1057 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1058 ; CHECK-NEXT:    retq
   1059   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1060   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 5, i32 2, i32 6>
   1061   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1062   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
   1063   ret <4 x double> %res
   1064 }
   1065 
   1066 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
   1067 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
   1068 ; CHECK:       # %bb.0:
   1069 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1070 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
   1071 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[1],ymm0[2],mem[2]
   1072 ; CHECK-NEXT:    retq
   1073   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1074   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 5, i32 2, i32 6>
   1075   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1076   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
   1077   ret <4 x double> %res
   1078 }
   1079 
   1080 define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
   1081 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2:
   1082 ; CHECK:       # %bb.0:
   1083 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1084 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
   1085 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[3],mem[2]
   1086 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1087 ; CHECK-NEXT:    retq
   1088   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1089   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6>
   1090   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1091   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
   1092   ret <4 x double> %res
   1093 }
   1094 
   1095 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
   1096 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
   1097 ; CHECK:       # %bb.0:
   1098 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1099 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
   1100 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[3],mem[2]
   1101 ; CHECK-NEXT:    retq
   1102   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1103   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 3, i32 6>
   1104   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1105   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
   1106   ret <4 x double> %res
   1107 }
   1108 
   1109 define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
   1110 ; CHECK-LABEL: test_4xdouble_shuff_mem_mask3:
   1111 ; CHECK:       # %bb.0:
   1112 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[2],mem[2]
   1113 ; CHECK-NEXT:    retq
   1114   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1115   %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
   1116   ret <4 x double> %res
   1117 }
   1118 define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
   1119 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3:
   1120 ; CHECK:       # %bb.0:
   1121 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1122 ; CHECK-NEXT:    vcmpeqpd %ymm3, %ymm2, %k1
   1123 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[2],mem[2]
   1124 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1125 ; CHECK-NEXT:    retq
   1126   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1127   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
   1128   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1129   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
   1130   ret <4 x double> %res
   1131 }
   1132 
   1133 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
   1134 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
   1135 ; CHECK:       # %bb.0:
   1136 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1137 ; CHECK-NEXT:    vcmpeqpd %ymm2, %ymm1, %k1
   1138 ; CHECK-NEXT:    vshufpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[2],mem[2]
   1139 ; CHECK-NEXT:    retq
   1140   %vec2 = load <4 x double>, <4 x double>* %vec2p
   1141   %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 2, i32 6>
   1142   %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
   1143   %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
   1144   ret <4 x double> %res
   1145 }
   1146 
   1147 define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) {
   1148 ; CHECK-LABEL: test_8xdouble_shuff_mask0:
   1149 ; CHECK:       # %bb.0:
   1150 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
   1151 ; CHECK-NEXT:    retq
   1152   %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15>
   1153   ret <8 x double> %res
   1154 }
   1155 define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
   1156 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0:
   1157 ; CHECK:       # %bb.0:
   1158 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
   1159 ; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
   1160 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
   1161 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
   1162 ; CHECK-NEXT:    retq
   1163   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15>
   1164   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1165   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1166   ret <8 x double> %res
   1167 }
   1168 
   1169 define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
   1170 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0:
   1171 ; CHECK:       # %bb.0:
   1172 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1173 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1174 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[4],zmm1[5],zmm0[7],zmm1[7]
   1175 ; CHECK-NEXT:    retq
   1176   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 4, i32 13, i32 7, i32 15>
   1177   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1178   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1179   ret <8 x double> %res
   1180 }
   1181 define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
   1182 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1:
   1183 ; CHECK:       # %bb.0:
   1184 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
   1185 ; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
   1186 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
   1187 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
   1188 ; CHECK-NEXT:    retq
   1189   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15>
   1190   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1191   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1192   ret <8 x double> %res
   1193 }
   1194 
   1195 define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
   1196 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1:
   1197 ; CHECK:       # %bb.0:
   1198 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1199 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1200 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[3],zmm0[5],zmm1[5],zmm0[6],zmm1[7]
   1201 ; CHECK-NEXT:    retq
   1202   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 11, i32 5, i32 13, i32 6, i32 15>
   1203   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1204   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1205   ret <8 x double> %res
   1206 }
   1207 define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
   1208 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2:
   1209 ; CHECK:       # %bb.0:
   1210 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
   1211 ; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
   1212 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
   1213 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
   1214 ; CHECK-NEXT:    retq
   1215   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14>
   1216   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1217   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1218   ret <8 x double> %res
   1219 }
   1220 
   1221 define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
   1222 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2:
   1223 ; CHECK:       # %bb.0:
   1224 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1225 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1226 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[5],zmm0[6],zmm1[6]
   1227 ; CHECK-NEXT:    retq
   1228   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 13, i32 6, i32 14>
   1229   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1230   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1231   ret <8 x double> %res
   1232 }
   1233 define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) {
   1234 ; CHECK-LABEL: test_8xdouble_shuff_mask3:
   1235 ; CHECK:       # %bb.0:
   1236 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
   1237 ; CHECK-NEXT:    retq
   1238   %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15>
   1239   ret <8 x double> %res
   1240 }
   1241 define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
   1242 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3:
   1243 ; CHECK:       # %bb.0:
   1244 ; CHECK-NEXT:    vxorpd %xmm4, %xmm4, %xmm4
   1245 ; CHECK-NEXT:    vcmpeqpd %zmm4, %zmm3, %k1
   1246 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
   1247 ; CHECK-NEXT:    vmovapd %zmm2, %zmm0
   1248 ; CHECK-NEXT:    retq
   1249   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15>
   1250   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1251   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1252   ret <8 x double> %res
   1253 }
   1254 
   1255 define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
   1256 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3:
   1257 ; CHECK:       # %bb.0:
   1258 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1259 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1260 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[0],zmm0[3],zmm1[3],zmm0[4],zmm1[4],zmm0[7],zmm1[7]
   1261 ; CHECK-NEXT:    retq
   1262   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 11, i32 4, i32 12, i32 7, i32 15>
   1263   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1264   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1265   ret <8 x double> %res
   1266 }
   1267 define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
   1268 ; CHECK-LABEL: test_8xdouble_shuff_mem_mask0:
   1269 ; CHECK:       # %bb.0:
   1270 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
   1271 ; CHECK-NEXT:    retq
   1272   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1273   %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15>
   1274   ret <8 x double> %res
   1275 }
   1276 define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
   1277 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0:
   1278 ; CHECK:       # %bb.0:
   1279 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1280 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1281 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
   1282 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1283 ; CHECK-NEXT:    retq
   1284   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1285   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15>
   1286   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1287   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1288   ret <8 x double> %res
   1289 }
   1290 
   1291 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
   1292 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
   1293 ; CHECK:       # %bb.0:
   1294 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1295 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
   1296 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[5],mem[5],zmm0[6],mem[7]
   1297 ; CHECK-NEXT:    retq
   1298   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1299   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 5, i32 13, i32 6, i32 15>
   1300   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1301   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1302   ret <8 x double> %res
   1303 }
   1304 
   1305 define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
   1306 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1:
   1307 ; CHECK:       # %bb.0:
   1308 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1309 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1310 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
   1311 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1312 ; CHECK-NEXT:    retq
   1313   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1314   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 4, i32 12, i32 7, i32 15>
   1315   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1316   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1317   ret <8 x double> %res
   1318 }
   1319 
   1320 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
   1321 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
   1322 ; CHECK:       # %bb.0:
   1323 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1324 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
   1325 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[0],zmm0[3],mem[2],zmm0[4],mem[4],zmm0[7],mem[7]
   1326 ; CHECK-NEXT:    retq
   1327   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1328   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 8, i32 3, i32 10, i32 4, i32 12, i32 7, i32 15>
   1329   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1330   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1331   ret <8 x double> %res
   1332 }
   1333 
   1334 define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
   1335 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2:
   1336 ; CHECK:       # %bb.0:
   1337 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1338 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1339 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
   1340 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1341 ; CHECK-NEXT:    retq
   1342   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1343   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 10, i32 5, i32 13, i32 7, i32 15>
   1344   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1345   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1346   ret <8 x double> %res
   1347 }
   1348 
   1349 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
   1350 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
   1351 ; CHECK:       # %bb.0:
   1352 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1353 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
   1354 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[2],zmm0[5],mem[5],zmm0[7],mem[7]
   1355 ; CHECK-NEXT:    retq
   1356   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1357   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 10, i32 5, i32 13, i32 7, i32 15>
   1358   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1359   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1360   ret <8 x double> %res
   1361 }
   1362 
   1363 define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
   1364 ; CHECK-LABEL: test_8xdouble_shuff_mem_mask3:
   1365 ; CHECK:       # %bb.0:
   1366 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
   1367 ; CHECK-NEXT:    retq
   1368   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1369   %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14>
   1370   ret <8 x double> %res
   1371 }
   1372 define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
   1373 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3:
   1374 ; CHECK:       # %bb.0:
   1375 ; CHECK-NEXT:    vxorpd %xmm3, %xmm3, %xmm3
   1376 ; CHECK-NEXT:    vcmpeqpd %zmm3, %zmm2, %k1
   1377 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
   1378 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1379 ; CHECK-NEXT:    retq
   1380   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1381   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14>
   1382   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1383   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
   1384   ret <8 x double> %res
   1385 }
   1386 
   1387 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
   1388 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
   1389 ; CHECK:       # %bb.0:
   1390 ; CHECK-NEXT:    vxorpd %xmm2, %xmm2, %xmm2
   1391 ; CHECK-NEXT:    vcmpeqpd %zmm2, %zmm1, %k1
   1392 ; CHECK-NEXT:    vshufpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[2],mem[3],zmm0[4],mem[5],zmm0[6],mem[6]
   1393 ; CHECK-NEXT:    retq
   1394   %vec2 = load <8 x double>, <8 x double>* %vec2p
   1395   %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 2, i32 11, i32 4, i32 13, i32 6, i32 14>
   1396   %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
   1397   %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
   1398   ret <8 x double> %res
   1399 }
   1400 
   1401