Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+avx512bw | FileCheck %s --check-prefix=CHECK
      3 
      4 define <4 x i32> @mask_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
      5 ; CHECK-LABEL: mask_shuffle_v4i32_1234:
      6 ; CHECK:       # %bb.0:
      7 ; CHECK-NEXT:    kmovd %edi, %k1
      8 ; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[1,2,3],xmm1[0]
      9 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
     10 ; CHECK-NEXT:    retq
     11   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
     12   %mask.cast = bitcast i8 %mask to <8 x i1>
     13   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     14   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
     15   ret <4 x i32> %res
     16 }
     17 
     18 define <4 x i32> @maskz_shuffle_v4i32_1234(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
     19 ; CHECK-LABEL: maskz_shuffle_v4i32_1234:
     20 ; CHECK:       # %bb.0:
     21 ; CHECK-NEXT:    kmovd %edi, %k1
     22 ; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3],xmm1[0]
     23 ; CHECK-NEXT:    retq
     24   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
     25   %mask.cast = bitcast i8 %mask to <8 x i1>
     26   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     27   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
     28   ret <4 x i32> %res
     29 }
     30 
     31 define <4 x i32> @mask_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, <4 x i32> %passthru, i8 %mask) {
     32 ; CHECK-LABEL: mask_shuffle_v4i32_2345:
     33 ; CHECK:       # %bb.0:
     34 ; CHECK-NEXT:    kmovd %edi, %k1
     35 ; CHECK-NEXT:    valignd {{.*#+}} xmm2 {%k1} = xmm0[2,3],xmm1[0,1]
     36 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
     37 ; CHECK-NEXT:    retq
     38   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
     39   %mask.cast = bitcast i8 %mask to <8 x i1>
     40   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     41   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
     42   ret <4 x i32> %res
     43 }
     44 
     45 define <4 x i32> @maskz_shuffle_v4i32_2345(<4 x i32> %a, <4 x i32> %b, i8 %mask) {
     46 ; CHECK-LABEL: maskz_shuffle_v4i32_2345:
     47 ; CHECK:       # %bb.0:
     48 ; CHECK-NEXT:    kmovd %edi, %k1
     49 ; CHECK-NEXT:    valignd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3],xmm1[0,1]
     50 ; CHECK-NEXT:    retq
     51   %shuffle = shufflevector <4 x i32> %a, <4 x i32> %b, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
     52   %mask.cast = bitcast i8 %mask to <8 x i1>
     53   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     54   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
     55   ret <4 x i32> %res
     56 }
     57 
     58 define <2 x i64> @mask_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, <2 x i64> %passthru, i8 %mask) {
     59 ; CHECK-LABEL: mask_shuffle_v2i64_12:
     60 ; CHECK:       # %bb.0:
     61 ; CHECK-NEXT:    kmovd %edi, %k1
     62 ; CHECK-NEXT:    valignq {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[0]
     63 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
     64 ; CHECK-NEXT:    retq
     65   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
     66   %mask.cast = bitcast i8 %mask to <8 x i1>
     67   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
     68   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
     69   ret <2 x i64> %res
     70 }
     71 
     72 define <2 x i64> @maskz_shuffle_v2i64_12(<2 x i64> %a, <2 x i64> %b, i8 %mask) {
     73 ; CHECK-LABEL: maskz_shuffle_v2i64_12:
     74 ; CHECK:       # %bb.0:
     75 ; CHECK-NEXT:    kmovd %edi, %k1
     76 ; CHECK-NEXT:    valignq {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[0]
     77 ; CHECK-NEXT:    retq
     78   %shuffle = shufflevector <2 x i64> %a, <2 x i64> %b, <2 x i32> <i32 1, i32 2>
     79   %mask.cast = bitcast i8 %mask to <8 x i1>
     80   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
     81   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
     82   ret <2 x i64> %res
     83 }
     84 
     85 define <4 x i64> @mask_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, <4 x i64> %passthru, i8 %mask) {
     86 ; CHECK-LABEL: mask_shuffle_v4i64_1234:
     87 ; CHECK:       # %bb.0:
     88 ; CHECK-NEXT:    kmovd %edi, %k1
     89 ; CHECK-NEXT:    valignq {{.*#+}} ymm2 {%k1} = ymm0[1,2,3],ymm1[0]
     90 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
     91 ; CHECK-NEXT:    retq
     92   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
     93   %mask.cast = bitcast i8 %mask to <8 x i1>
     94   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
     95   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
     96   ret <4 x i64> %res
     97 }
     98 
     99 define <4 x i64> @maskz_shuffle_v4i64_1234(<4 x i64> %a, <4 x i64> %b, i8 %mask) {
    100 ; CHECK-LABEL: maskz_shuffle_v4i64_1234:
    101 ; CHECK:       # %bb.0:
    102 ; CHECK-NEXT:    kmovd %edi, %k1
    103 ; CHECK-NEXT:    valignq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3],ymm1[0]
    104 ; CHECK-NEXT:    retq
    105   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
    106   %mask.cast = bitcast i8 %mask to <8 x i1>
    107   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    108   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
    109   ret <4 x i64> %res
    110 }
    111 
    112 define <4 x i64> @mask_shuffle_v4i64_1230(<4 x i64> %a, <4 x i64> %passthru, i8 %mask) {
    113 ; CHECK-LABEL: mask_shuffle_v4i64_1230:
    114 ; CHECK:       # %bb.0:
    115 ; CHECK-NEXT:    kmovd %edi, %k1
    116 ; CHECK-NEXT:    vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,0]
    117 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    118 ; CHECK-NEXT:    retq
    119   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
    120   %mask.cast = bitcast i8 %mask to <8 x i1>
    121   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    122   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
    123   ret <4 x i64> %res
    124 }
    125 
    126 define <4 x i64> @maskz_shuffle_v4i64_1230(<4 x i64> %a, i8 %mask) {
    127 ; CHECK-LABEL: maskz_shuffle_v4i64_1230:
    128 ; CHECK:       # %bb.0:
    129 ; CHECK-NEXT:    kmovd %edi, %k1
    130 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,0]
    131 ; CHECK-NEXT:    retq
    132   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 0>
    133   %mask.cast = bitcast i8 %mask to <8 x i1>
    134   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    135   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
    136   ret <4 x i64> %res
    137 }
    138 
    139 define <8 x i32> @mask_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
    140 ; CHECK-LABEL: mask_shuffle_v8i32_12345678:
    141 ; CHECK:       # %bb.0:
    142 ; CHECK-NEXT:    kmovd %edi, %k1
    143 ; CHECK-NEXT:    valignd {{.*#+}} ymm2 {%k1} = ymm0[1,2,3,4,5,6,7],ymm1[0]
    144 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
    145 ; CHECK-NEXT:    retq
    146   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
    147   %mask.cast = bitcast i8 %mask to <8 x i1>
    148   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    149   ret <8 x i32> %res
    150 }
    151 
    152 define <8 x i32> @maskz_shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
    153 ; CHECK-LABEL: maskz_shuffle_v8i32_12345678:
    154 ; CHECK:       # %bb.0:
    155 ; CHECK-NEXT:    kmovd %edi, %k1
    156 ; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7],ymm1[0]
    157 ; CHECK-NEXT:    retq
    158   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
    159   %mask.cast = bitcast i8 %mask to <8 x i1>
    160   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    161   ret <8 x i32> %res
    162 }
    163 
    164 define <8 x i32> @mask_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, <8 x i32> %passthru, i8 %mask) {
    165 ; CHECK-LABEL: mask_shuffle_v8i32_23456789:
    166 ; CHECK:       # %bb.0:
    167 ; CHECK-NEXT:    kmovd %edi, %k1
    168 ; CHECK-NEXT:    valignd {{.*#+}} ymm2 {%k1} = ymm0[2,3,4,5,6,7],ymm1[0,1]
    169 ; CHECK-NEXT:    vmovdqa %ymm2, %ymm0
    170 ; CHECK-NEXT:    retq
    171   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
    172   %mask.cast = bitcast i8 %mask to <8 x i1>
    173   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    174   ret <8 x i32> %res
    175 }
    176 
    177 define <8 x i32> @maskz_shuffle_v8i32_23456789(<8 x i32> %a, <8 x i32> %b, i8 %mask) {
    178 ; CHECK-LABEL: maskz_shuffle_v8i32_23456789:
    179 ; CHECK:       # %bb.0:
    180 ; CHECK-NEXT:    kmovd %edi, %k1
    181 ; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,4,5,6,7],ymm1[0,1]
    182 ; CHECK-NEXT:    retq
    183   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9>
    184   %mask.cast = bitcast i8 %mask to <8 x i1>
    185   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    186   ret <8 x i32> %res
    187 }
    188 
    189 define <8 x i32> @mask_shuffle_v8i32_12345670(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
    190 ; CHECK-LABEL: mask_shuffle_v8i32_12345670:
    191 ; CHECK:       # %bb.0:
    192 ; CHECK-NEXT:    kmovd %edi, %k1
    193 ; CHECK-NEXT:    valignd {{.*#+}} ymm1 {%k1} = ymm0[1,2,3,4,5,6,7,0]
    194 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    195 ; CHECK-NEXT:    retq
    196   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
    197   %mask.cast = bitcast i8 %mask to <8 x i1>
    198   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    199   ret <8 x i32> %res
    200 }
    201 
    202 define <8 x i32> @maskz_shuffle_v8i32_12345670(<8 x i32> %a, i8 %mask) {
    203 ; CHECK-LABEL: maskz_shuffle_v8i32_12345670:
    204 ; CHECK:       # %bb.0:
    205 ; CHECK-NEXT:    kmovd %edi, %k1
    206 ; CHECK-NEXT:    valignd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,3,4,5,6,7,0]
    207 ; CHECK-NEXT:    retq
    208   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
    209   %mask.cast = bitcast i8 %mask to <8 x i1>
    210   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    211   ret <8 x i32> %res
    212 }
    213 
    214 define <8 x i32> @mask_shuffle_v8i32_23456701(<8 x i32> %a, <8 x i32> %passthru, i8 %mask) {
    215 ; CHECK-LABEL: mask_shuffle_v8i32_23456701:
    216 ; CHECK:       # %bb.0:
    217 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
    218 ; CHECK-NEXT:    kmovd %edi, %k1
    219 ; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    220 ; CHECK-NEXT:    retq
    221   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
    222   %mask.cast = bitcast i8 %mask to <8 x i1>
    223   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    224   ret <8 x i32> %res
    225 }
    226 
    227 define <8 x i32> @maskz_shuffle_v8i32_23456701(<8 x i32> %a, i8 %mask) {
    228 ; CHECK-LABEL: maskz_shuffle_v8i32_23456701:
    229 ; CHECK:       # %bb.0:
    230 ; CHECK-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,3,0]
    231 ; CHECK-NEXT:    kmovd %edi, %k1
    232 ; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    233 ; CHECK-NEXT:    retq
    234   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1>
    235   %mask.cast = bitcast i8 %mask to <8 x i1>
    236   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    237   ret <8 x i32> %res
    238 }
    239 
    240 define <4 x i32> @mask_extract_v8i32_v4i32_0(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    241 ; CHECK-LABEL: mask_extract_v8i32_v4i32_0:
    242 ; CHECK:       # %bb.0:
    243 ; CHECK-NEXT:    kmovd %edi, %k1
    244 ; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
    245 ; CHECK-NEXT:    vzeroupper
    246 ; CHECK-NEXT:    retq
    247   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    248   %mask.cast = bitcast i8 %mask to <8 x i1>
    249   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    250   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    251   ret <4 x i32> %res
    252 }
    253 
    254 define <4 x i32> @mask_extract_v8i32_v4i32_0_z(<8 x i32> %a, i8 %mask) {
    255 ; CHECK-LABEL: mask_extract_v8i32_v4i32_0_z:
    256 ; CHECK:       # %bb.0:
    257 ; CHECK-NEXT:    kmovd %edi, %k1
    258 ; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
    259 ; CHECK-NEXT:    vzeroupper
    260 ; CHECK-NEXT:    retq
    261   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    262   %mask.cast = bitcast i8 %mask to <8 x i1>
    263   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    264   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
    265   ret <4 x i32> %res
    266 }
    267 
    268 define <4 x i32> @mask_extract_v8i32_v4i32_1(<8 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    269 ; CHECK-LABEL: mask_extract_v8i32_v4i32_1:
    270 ; CHECK:       # %bb.0:
    271 ; CHECK-NEXT:    kmovd %edi, %k1
    272 ; CHECK-NEXT:    vextracti32x4 $1, %ymm0, %xmm1 {%k1}
    273 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    274 ; CHECK-NEXT:    vzeroupper
    275 ; CHECK-NEXT:    retq
    276   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    277   %mask.cast = bitcast i8 %mask to <8 x i1>
    278   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    279   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    280   ret <4 x i32> %res
    281 }
    282 
    283 define <4 x i32> @mask_extract_v8i32_v4i32_1_z(<8 x i32> %a, i8 %mask) {
    284 ; CHECK-LABEL: mask_extract_v8i32_v4i32_1_z:
    285 ; CHECK:       # %bb.0:
    286 ; CHECK-NEXT:    kmovd %edi, %k1
    287 ; CHECK-NEXT:    vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
    288 ; CHECK-NEXT:    vzeroupper
    289 ; CHECK-NEXT:    retq
    290   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    291   %mask.cast = bitcast i8 %mask to <8 x i1>
    292   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    293   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
    294   ret <4 x i32> %res
    295 }
    296 
    297 define <4 x float> @mask_extract_v8f32_v4f32_0(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
    298 ; CHECK-LABEL: mask_extract_v8f32_v4f32_0:
    299 ; CHECK:       # %bb.0:
    300 ; CHECK-NEXT:    kmovd %edi, %k1
    301 ; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
    302 ; CHECK-NEXT:    vzeroupper
    303 ; CHECK-NEXT:    retq
    304   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    305   %mask.cast = bitcast i8 %mask to <8 x i1>
    306   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    307   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    308   ret <4 x float> %res
    309 }
    310 
    311 define <4 x float> @mask_extract_v8f32_v4f32_0_z(<8 x float> %a, i8 %mask) {
    312 ; CHECK-LABEL: mask_extract_v8f32_v4f32_0_z:
    313 ; CHECK:       # %bb.0:
    314 ; CHECK-NEXT:    kmovd %edi, %k1
    315 ; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
    316 ; CHECK-NEXT:    vzeroupper
    317 ; CHECK-NEXT:    retq
    318   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    319   %mask.cast = bitcast i8 %mask to <8 x i1>
    320   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    321   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    322   ret <4 x float> %res
    323 }
    324 
    325 define <4 x float> @mask_extract_v8f32_v4f32_1(<8 x float> %a, <4 x float> %passthru, i8 %mask) {
    326 ; CHECK-LABEL: mask_extract_v8f32_v4f32_1:
    327 ; CHECK:       # %bb.0:
    328 ; CHECK-NEXT:    kmovd %edi, %k1
    329 ; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm1 {%k1}
    330 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    331 ; CHECK-NEXT:    vzeroupper
    332 ; CHECK-NEXT:    retq
    333   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    334   %mask.cast = bitcast i8 %mask to <8 x i1>
    335   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    336   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    337   ret <4 x float> %res
    338 }
    339 
    340 define <4 x float> @mask_extract_v8f32_v4f32_1_z(<8 x float> %a, i8 %mask) {
    341 ; CHECK-LABEL: mask_extract_v8f32_v4f32_1_z:
    342 ; CHECK:       # %bb.0:
    343 ; CHECK-NEXT:    kmovd %edi, %k1
    344 ; CHECK-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
    345 ; CHECK-NEXT:    vzeroupper
    346 ; CHECK-NEXT:    retq
    347   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    348   %mask.cast = bitcast i8 %mask to <8 x i1>
    349   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    350   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    351   ret <4 x float> %res
    352 }
    353 
    354 define <2 x i64> @mask_extract_v4i64_v2i64_0(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    355 ; CHECK-LABEL: mask_extract_v4i64_v2i64_0:
    356 ; CHECK:       # %bb.0:
    357 ; CHECK-NEXT:    kmovd %edi, %k1
    358 ; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
    359 ; CHECK-NEXT:    vzeroupper
    360 ; CHECK-NEXT:    retq
    361   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    362   %mask.cast = bitcast i8 %mask to <8 x i1>
    363   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    364   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    365   ret <2 x i64> %res
    366 }
    367 
    368 define <2 x i64> @mask_extract_v4i64_v2i64_0_z(<4 x i64> %a, i8 %mask) {
    369 ; CHECK-LABEL: mask_extract_v4i64_v2i64_0_z:
    370 ; CHECK:       # %bb.0:
    371 ; CHECK-NEXT:    kmovd %edi, %k1
    372 ; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
    373 ; CHECK-NEXT:    vzeroupper
    374 ; CHECK-NEXT:    retq
    375   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    376   %mask.cast = bitcast i8 %mask to <8 x i1>
    377   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    378   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
    379   ret <2 x i64> %res
    380 }
    381 
    382 define <2 x i64> @mask_extract_v4i64_v2i64_1(<4 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    383 ; CHECK-LABEL: mask_extract_v4i64_v2i64_1:
    384 ; CHECK:       # %bb.0:
    385 ; CHECK-NEXT:    kmovd %edi, %k1
    386 ; CHECK-NEXT:    vextracti64x2 $1, %ymm0, %xmm1 {%k1}
    387 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    388 ; CHECK-NEXT:    vzeroupper
    389 ; CHECK-NEXT:    retq
    390   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
    391   %mask.cast = bitcast i8 %mask to <8 x i1>
    392   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    393   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    394   ret <2 x i64> %res
    395 }
    396 
    397 define <2 x i64> @mask_extract_v4i64_v2i64_1_z(<4 x i64> %a, i8 %mask) {
    398 ; CHECK-LABEL: mask_extract_v4i64_v2i64_1_z:
    399 ; CHECK:       # %bb.0:
    400 ; CHECK-NEXT:    kmovd %edi, %k1
    401 ; CHECK-NEXT:    vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
    402 ; CHECK-NEXT:    vzeroupper
    403 ; CHECK-NEXT:    retq
    404   %shuffle = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
    405   %mask.cast = bitcast i8 %mask to <8 x i1>
    406   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    407   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
    408   ret <2 x i64> %res
    409 }
    410 
    411 define <2 x double> @mask_extract_v4f64_v2f64_0(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
    412 ; CHECK-LABEL: mask_extract_v4f64_v2f64_0:
    413 ; CHECK:       # %bb.0:
    414 ; CHECK-NEXT:    kmovd %edi, %k1
    415 ; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
    416 ; CHECK-NEXT:    vzeroupper
    417 ; CHECK-NEXT:    retq
    418   %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    419   %mask.cast = bitcast i8 %mask to <8 x i1>
    420   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    421   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    422   ret <2 x double> %res
    423 }
    424 
    425 define <2 x double> @mask_extract_v4f64_v2f64_0_z(<4 x double> %a, i8 %mask) {
    426 ; CHECK-LABEL: mask_extract_v4f64_v2f64_0_z:
    427 ; CHECK:       # %bb.0:
    428 ; CHECK-NEXT:    kmovd %edi, %k1
    429 ; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
    430 ; CHECK-NEXT:    vzeroupper
    431 ; CHECK-NEXT:    retq
    432   %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    433   %mask.cast = bitcast i8 %mask to <8 x i1>
    434   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    435   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    436   ret <2 x double> %res
    437 }
    438 
    439 define <2 x double> @mask_extract_v4f64_v2f64_1(<4 x double> %a, <2 x double> %passthru, i8 %mask) {
    440 ; CHECK-LABEL: mask_extract_v4f64_v2f64_1:
    441 ; CHECK:       # %bb.0:
    442 ; CHECK-NEXT:    kmovd %edi, %k1
    443 ; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm1 {%k1}
    444 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    445 ; CHECK-NEXT:    vzeroupper
    446 ; CHECK-NEXT:    retq
    447   %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
    448   %mask.cast = bitcast i8 %mask to <8 x i1>
    449   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    450   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    451   ret <2 x double> %res
    452 }
    453 
    454 define <2 x double> @mask_extract_v4f64_v2f64_1_z(<4 x double> %a, i8 %mask) {
    455 ; CHECK-LABEL: mask_extract_v4f64_v2f64_1_z:
    456 ; CHECK:       # %bb.0:
    457 ; CHECK-NEXT:    kmovd %edi, %k1
    458 ; CHECK-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
    459 ; CHECK-NEXT:    vzeroupper
    460 ; CHECK-NEXT:    retq
    461   %shuffle = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
    462   %mask.cast = bitcast i8 %mask to <8 x i1>
    463   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    464   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    465   ret <2 x double> %res
    466 }
    467 
    468 define <4 x i32> @mask_extract_v16i32_v4i32_0(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    469 ; CHECK-LABEL: mask_extract_v16i32_v4i32_0:
    470 ; CHECK:       # %bb.0:
    471 ; CHECK-NEXT:    kmovd %edi, %k1
    472 ; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
    473 ; CHECK-NEXT:    vzeroupper
    474 ; CHECK-NEXT:    retq
    475   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    476   %mask.cast = bitcast i8 %mask to <8 x i1>
    477   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    478   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    479   ret <4 x i32> %res
    480 }
    481 
    482 define <4 x i32> @mask_extract_v16i32_v4i32_0_z(<16 x i32> %a, i8 %mask) {
    483 ; CHECK-LABEL: mask_extract_v16i32_v4i32_0_z:
    484 ; CHECK:       # %bb.0:
    485 ; CHECK-NEXT:    kmovd %edi, %k1
    486 ; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
    487 ; CHECK-NEXT:    vzeroupper
    488 ; CHECK-NEXT:    retq
    489   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    490   %mask.cast = bitcast i8 %mask to <8 x i1>
    491   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    492   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
    493   ret <4 x i32> %res
    494 }
    495 
    496 define <4 x i32> @mask_extract_v16i32_v4i32_1(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    497 ; CHECK-LABEL: mask_extract_v16i32_v4i32_1:
    498 ; CHECK:       # %bb.0:
    499 ; CHECK-NEXT:    kmovd %edi, %k1
    500 ; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm1 {%k1}
    501 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    502 ; CHECK-NEXT:    vzeroupper
    503 ; CHECK-NEXT:    retq
    504   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    505   %mask.cast = bitcast i8 %mask to <8 x i1>
    506   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    507   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    508   ret <4 x i32> %res
    509 }
    510 
    511 define <4 x i32> @mask_extract_v16i32_v4i32_1_z(<16 x i32> %a, i8 %mask) {
    512 ; CHECK-LABEL: mask_extract_v16i32_v4i32_1_z:
    513 ; CHECK:       # %bb.0:
    514 ; CHECK-NEXT:    kmovd %edi, %k1
    515 ; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
    516 ; CHECK-NEXT:    vzeroupper
    517 ; CHECK-NEXT:    retq
    518   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    519   %mask.cast = bitcast i8 %mask to <8 x i1>
    520   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    521   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
    522   ret <4 x i32> %res
    523 }
    524 
    525 define <4 x i32> @mask_extract_v16i32_v4i32_2(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    526 ; CHECK-LABEL: mask_extract_v16i32_v4i32_2:
    527 ; CHECK:       # %bb.0:
    528 ; CHECK-NEXT:    kmovd %edi, %k1
    529 ; CHECK-NEXT:    vextracti32x4 $2, %zmm0, %xmm1 {%k1}
    530 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    531 ; CHECK-NEXT:    vzeroupper
    532 ; CHECK-NEXT:    retq
    533   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
    534   %mask.cast = bitcast i8 %mask to <8 x i1>
    535   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    536   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    537   ret <4 x i32> %res
    538 }
    539 
    540 define <4 x i32> @mask_extract_v16i32_v4i32_3(<16 x i32> %a, <4 x i32> %passthru, i8 %mask) {
    541 ; CHECK-LABEL: mask_extract_v16i32_v4i32_3:
    542 ; CHECK:       # %bb.0:
    543 ; CHECK-NEXT:    kmovd %edi, %k1
    544 ; CHECK-NEXT:    vextracti32x4 $3, %zmm0, %xmm1 {%k1}
    545 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    546 ; CHECK-NEXT:    vzeroupper
    547 ; CHECK-NEXT:    retq
    548   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
    549   %mask.cast = bitcast i8 %mask to <8 x i1>
    550   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    551   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle, <4 x i32> %passthru
    552   ret <4 x i32> %res
    553 }
    554 
    555 define <4 x float> @mask_extract_v16f32_v4f32_0(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
    556 ; CHECK-LABEL: mask_extract_v16f32_v4f32_0:
    557 ; CHECK:       # %bb.0:
    558 ; CHECK-NEXT:    kmovd %edi, %k1
    559 ; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
    560 ; CHECK-NEXT:    vzeroupper
    561 ; CHECK-NEXT:    retq
    562   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    563   %mask.cast = bitcast i8 %mask to <8 x i1>
    564   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    565   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    566   ret <4 x float> %res
    567 }
    568 
    569 define <4 x float> @mask_extract_v16f32_v4f32_0_z(<16 x float> %a, i8 %mask) {
    570 ; CHECK-LABEL: mask_extract_v16f32_v4f32_0_z:
    571 ; CHECK:       # %bb.0:
    572 ; CHECK-NEXT:    kmovd %edi, %k1
    573 ; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
    574 ; CHECK-NEXT:    vzeroupper
    575 ; CHECK-NEXT:    retq
    576   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    577   %mask.cast = bitcast i8 %mask to <8 x i1>
    578   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    579   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    580   ret <4 x float> %res
    581 }
    582 
    583 define <4 x float> @mask_extract_v16f32_v4f32_1(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
    584 ; CHECK-LABEL: mask_extract_v16f32_v4f32_1:
    585 ; CHECK:       # %bb.0:
    586 ; CHECK-NEXT:    kmovd %edi, %k1
    587 ; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm1 {%k1}
    588 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    589 ; CHECK-NEXT:    vzeroupper
    590 ; CHECK-NEXT:    retq
    591   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    592   %mask.cast = bitcast i8 %mask to <8 x i1>
    593   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    594   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    595   ret <4 x float> %res
    596 }
    597 
    598 define <4 x float> @mask_extract_v16f32_v4f32_1_z(<16 x float> %a, i8 %mask) {
    599 ; CHECK-LABEL: mask_extract_v16f32_v4f32_1_z:
    600 ; CHECK:       # %bb.0:
    601 ; CHECK-NEXT:    kmovd %edi, %k1
    602 ; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
    603 ; CHECK-NEXT:    vzeroupper
    604 ; CHECK-NEXT:    retq
    605   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    606   %mask.cast = bitcast i8 %mask to <8 x i1>
    607   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    608   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    609   ret <4 x float> %res
    610 }
    611 
    612 define <4 x float> @mask_extract_v16f32_v4f32_2(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
    613 ; CHECK-LABEL: mask_extract_v16f32_v4f32_2:
    614 ; CHECK:       # %bb.0:
    615 ; CHECK-NEXT:    kmovd %edi, %k1
    616 ; CHECK-NEXT:    vextractf32x4 $2, %zmm0, %xmm1 {%k1}
    617 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    618 ; CHECK-NEXT:    vzeroupper
    619 ; CHECK-NEXT:    retq
    620   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 8, i32 9, i32 10, i32 11>
    621   %mask.cast = bitcast i8 %mask to <8 x i1>
    622   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    623   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    624   ret <4 x float> %res
    625 }
    626 
    627 define <4 x float> @mask_extract_v16f32_v4f32_3(<16 x float> %a, <4 x float> %passthru, i8 %mask) {
    628 ; CHECK-LABEL: mask_extract_v16f32_v4f32_3:
    629 ; CHECK:       # %bb.0:
    630 ; CHECK-NEXT:    kmovd %edi, %k1
    631 ; CHECK-NEXT:    vextractf32x4 $3, %zmm0, %xmm1 {%k1}
    632 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
    633 ; CHECK-NEXT:    vzeroupper
    634 ; CHECK-NEXT:    retq
    635   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
    636   %mask.cast = bitcast i8 %mask to <8 x i1>
    637   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    638   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle, <4 x float> %passthru
    639   ret <4 x float> %res
    640 }
    641 
    642 define <8 x i32> @mask_extract_v16i32_v8i32_0(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
    643 ; CHECK-LABEL: mask_extract_v16i32_v8i32_0:
    644 ; CHECK:       # %bb.0:
    645 ; CHECK-NEXT:    kmovd %edi, %k1
    646 ; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
    647 ; CHECK-NEXT:    retq
    648   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    649   %mask.cast = bitcast i8 %mask to <8 x i1>
    650   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    651   ret <8 x i32> %res
    652 }
    653 
    654 define <8 x i32> @mask_extract_v16i32_v8i32_0_z(<16 x i32> %a, i8 %mask) {
    655 ; CHECK-LABEL: mask_extract_v16i32_v8i32_0_z:
    656 ; CHECK:       # %bb.0:
    657 ; CHECK-NEXT:    kmovd %edi, %k1
    658 ; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
    659 ; CHECK-NEXT:    retq
    660   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    661   %mask.cast = bitcast i8 %mask to <8 x i1>
    662   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    663   ret <8 x i32> %res
    664 }
    665 
    666 define <8 x i32> @mask_extract_v16i32_v8i32_1(<16 x i32> %a, <8 x i32> %passthru, i8 %mask) {
    667 ; CHECK-LABEL: mask_extract_v16i32_v8i32_1:
    668 ; CHECK:       # %bb.0:
    669 ; CHECK-NEXT:    kmovd %edi, %k1
    670 ; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm1 {%k1}
    671 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    672 ; CHECK-NEXT:    retq
    673   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    674   %mask.cast = bitcast i8 %mask to <8 x i1>
    675   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> %passthru
    676   ret <8 x i32> %res
    677 }
    678 
    679 define <8 x i32> @mask_extract_v16i32_v8i32_1_z(<16 x i32> %a, i8 %mask) {
    680 ; CHECK-LABEL: mask_extract_v16i32_v8i32_1_z:
    681 ; CHECK:       # %bb.0:
    682 ; CHECK-NEXT:    kmovd %edi, %k1
    683 ; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
    684 ; CHECK-NEXT:    retq
    685   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    686   %mask.cast = bitcast i8 %mask to <8 x i1>
    687   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle, <8 x i32> zeroinitializer
    688   ret <8 x i32> %res
    689 }
    690 
    691 define <8 x float> @mask_extract_v16f32_v8f32_0(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
    692 ; CHECK-LABEL: mask_extract_v16f32_v8f32_0:
    693 ; CHECK:       # %bb.0:
    694 ; CHECK-NEXT:    kmovd %edi, %k1
    695 ; CHECK-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
    696 ; CHECK-NEXT:    retq
    697   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    698   %mask.cast = bitcast i8 %mask to <8 x i1>
    699   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
    700   ret <8 x float> %res
    701 }
    702 
    703 define <8 x float> @mask_extract_v16f32_v8f32_0_z(<16 x float> %a, i8 %mask) {
    704 ; CHECK-LABEL: mask_extract_v16f32_v8f32_0_z:
    705 ; CHECK:       # %bb.0:
    706 ; CHECK-NEXT:    kmovd %edi, %k1
    707 ; CHECK-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
    708 ; CHECK-NEXT:    retq
    709   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    710   %mask.cast = bitcast i8 %mask to <8 x i1>
    711   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
    712   ret <8 x float> %res
    713 }
    714 
    715 define <8 x float> @mask_extract_v16f32_v8f32_1(<16 x float> %a, <8 x float> %passthru, i8 %mask) {
    716 ; CHECK-LABEL: mask_extract_v16f32_v8f32_1:
    717 ; CHECK:       # %bb.0:
    718 ; CHECK-NEXT:    kmovd %edi, %k1
    719 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
    720 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
    721 ; CHECK-NEXT:    retq
    722   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    723   %mask.cast = bitcast i8 %mask to <8 x i1>
    724   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> %passthru
    725   ret <8 x float> %res
    726 }
    727 
    728 define <8 x float> @mask_extract_v16f32_v8f32_1_z(<16 x float> %a, i8 %mask) {
    729 ; CHECK-LABEL: mask_extract_v16f32_v8f32_1_z:
    730 ; CHECK:       # %bb.0:
    731 ; CHECK-NEXT:    kmovd %edi, %k1
    732 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
    733 ; CHECK-NEXT:    retq
    734   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    735   %mask.cast = bitcast i8 %mask to <8 x i1>
    736   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle, <8 x float> zeroinitializer
    737   ret <8 x float> %res
    738 }
    739 
    740 define <2 x i64> @mask_extract_v8i64_v2i64_0(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    741 ; CHECK-LABEL: mask_extract_v8i64_v2i64_0:
    742 ; CHECK:       # %bb.0:
    743 ; CHECK-NEXT:    kmovd %edi, %k1
    744 ; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
    745 ; CHECK-NEXT:    vzeroupper
    746 ; CHECK-NEXT:    retq
    747   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
    748   %mask.cast = bitcast i8 %mask to <8 x i1>
    749   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    750   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    751   ret <2 x i64> %res
    752 }
    753 
    754 define <2 x i64> @mask_extract_v8i64_v2i64_0_z(<8 x i64> %a, i8 %mask) {
    755 ; CHECK-LABEL: mask_extract_v8i64_v2i64_0_z:
    756 ; CHECK:       # %bb.0:
    757 ; CHECK-NEXT:    kmovd %edi, %k1
    758 ; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
    759 ; CHECK-NEXT:    vzeroupper
    760 ; CHECK-NEXT:    retq
    761   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
    762   %mask.cast = bitcast i8 %mask to <8 x i1>
    763   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    764   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
    765   ret <2 x i64> %res
    766 }
    767 
    768 define <2 x i64> @mask_extract_v8i64_v2i64_1(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    769 ; CHECK-LABEL: mask_extract_v8i64_v2i64_1:
    770 ; CHECK:       # %bb.0:
    771 ; CHECK-NEXT:    kmovd %edi, %k1
    772 ; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm1 {%k1}
    773 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    774 ; CHECK-NEXT:    vzeroupper
    775 ; CHECK-NEXT:    retq
    776   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
    777   %mask.cast = bitcast i8 %mask to <8 x i1>
    778   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    779   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    780   ret <2 x i64> %res
    781 }
    782 
    783 define <2 x i64> @mask_extract_v8i64_v2i64_1_z(<8 x i64> %a, i8 %mask) {
    784 ; CHECK-LABEL: mask_extract_v8i64_v2i64_1_z:
    785 ; CHECK:       # %bb.0:
    786 ; CHECK-NEXT:    kmovd %edi, %k1
    787 ; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
    788 ; CHECK-NEXT:    vzeroupper
    789 ; CHECK-NEXT:    retq
    790   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
    791   %mask.cast = bitcast i8 %mask to <8 x i1>
    792   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    793   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
    794   ret <2 x i64> %res
    795 }
    796 
    797 define <2 x i64> @mask_extract_v8i64_v2i64_2(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    798 ; CHECK-LABEL: mask_extract_v8i64_v2i64_2:
    799 ; CHECK:       # %bb.0:
    800 ; CHECK-NEXT:    kmovd %edi, %k1
    801 ; CHECK-NEXT:    vextracti64x2 $2, %zmm0, %xmm1 {%k1}
    802 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    803 ; CHECK-NEXT:    vzeroupper
    804 ; CHECK-NEXT:    retq
    805   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
    806   %mask.cast = bitcast i8 %mask to <8 x i1>
    807   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    808   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    809   ret <2 x i64> %res
    810 }
    811 
    812 define <2 x i64> @mask_extract_v8i64_v2i64_3(<8 x i64> %a, <2 x i64> %passthru, i8 %mask) {
    813 ; CHECK-LABEL: mask_extract_v8i64_v2i64_3:
    814 ; CHECK:       # %bb.0:
    815 ; CHECK-NEXT:    kmovd %edi, %k1
    816 ; CHECK-NEXT:    vextracti64x2 $3, %zmm0, %xmm1 {%k1}
    817 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
    818 ; CHECK-NEXT:    vzeroupper
    819 ; CHECK-NEXT:    retq
    820   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
    821   %mask.cast = bitcast i8 %mask to <8 x i1>
    822   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    823   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle, <2 x i64> %passthru
    824   ret <2 x i64> %res
    825 }
    826 
    827 define <2 x double> @mask_extract_v8f64_v2f64_0(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
    828 ; CHECK-LABEL: mask_extract_v8f64_v2f64_0:
    829 ; CHECK:       # %bb.0:
    830 ; CHECK-NEXT:    kmovd %edi, %k1
    831 ; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
    832 ; CHECK-NEXT:    vzeroupper
    833 ; CHECK-NEXT:    retq
    834   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    835   %mask.cast = bitcast i8 %mask to <8 x i1>
    836   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    837   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    838   ret <2 x double> %res
    839 }
    840 
    841 define <2 x double> @mask_extract_v8f64_v2f64_0_z(<8 x double> %a, i8 %mask) {
    842 ; CHECK-LABEL: mask_extract_v8f64_v2f64_0_z:
    843 ; CHECK:       # %bb.0:
    844 ; CHECK-NEXT:    kmovd %edi, %k1
    845 ; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
    846 ; CHECK-NEXT:    vzeroupper
    847 ; CHECK-NEXT:    retq
    848   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    849   %mask.cast = bitcast i8 %mask to <8 x i1>
    850   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    851   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    852   ret <2 x double> %res
    853 }
    854 
    855 define <2 x double> @mask_extract_v8f64_v2f64_1(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
    856 ; CHECK-LABEL: mask_extract_v8f64_v2f64_1:
    857 ; CHECK:       # %bb.0:
    858 ; CHECK-NEXT:    kmovd %edi, %k1
    859 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
    860 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    861 ; CHECK-NEXT:    vzeroupper
    862 ; CHECK-NEXT:    retq
    863   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
    864   %mask.cast = bitcast i8 %mask to <8 x i1>
    865   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    866   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    867   ret <2 x double> %res
    868 }
    869 
    870 define <2 x double> @mask_extract_v8f64_v2f64_1_z(<8 x double> %a, i8 %mask) {
    871 ; CHECK-LABEL: mask_extract_v8f64_v2f64_1_z:
    872 ; CHECK:       # %bb.0:
    873 ; CHECK-NEXT:    kmovd %edi, %k1
    874 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
    875 ; CHECK-NEXT:    vzeroupper
    876 ; CHECK-NEXT:    retq
    877   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
    878   %mask.cast = bitcast i8 %mask to <8 x i1>
    879   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    880   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    881   ret <2 x double> %res
    882 }
    883 
    884 define <2 x double> @mask_extract_v8f64_v2f64_2(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
    885 ; CHECK-LABEL: mask_extract_v8f64_v2f64_2:
    886 ; CHECK:       # %bb.0:
    887 ; CHECK-NEXT:    kmovd %edi, %k1
    888 ; CHECK-NEXT:    vextractf64x2 $2, %zmm0, %xmm1 {%k1}
    889 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    890 ; CHECK-NEXT:    vzeroupper
    891 ; CHECK-NEXT:    retq
    892   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 4, i32 5>
    893   %mask.cast = bitcast i8 %mask to <8 x i1>
    894   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    895   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    896   ret <2 x double> %res
    897 }
    898 
    899 define <2 x double> @mask_extract_v8f64_v2f64_3(<8 x double> %a, <2 x double> %passthru, i8 %mask) {
    900 ; CHECK-LABEL: mask_extract_v8f64_v2f64_3:
    901 ; CHECK:       # %bb.0:
    902 ; CHECK-NEXT:    kmovd %edi, %k1
    903 ; CHECK-NEXT:    vextractf64x2 $3, %zmm0, %xmm1 {%k1}
    904 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
    905 ; CHECK-NEXT:    vzeroupper
    906 ; CHECK-NEXT:    retq
    907   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 6, i32 7>
    908   %mask.cast = bitcast i8 %mask to <8 x i1>
    909   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    910   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle, <2 x double> %passthru
    911   ret <2 x double> %res
    912 }
    913 
    914 define <4 x i64> @mask_extract_v8i64_v4i64_0(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
    915 ; CHECK-LABEL: mask_extract_v8i64_v4i64_0:
    916 ; CHECK:       # %bb.0:
    917 ; CHECK-NEXT:    kmovd %edi, %k1
    918 ; CHECK-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
    919 ; CHECK-NEXT:    retq
    920   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    921   %mask.cast = bitcast i8 %mask to <8 x i1>
    922   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    923   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
    924   ret <4 x i64> %res
    925 }
    926 
    927 define <4 x i64> @mask_extract_v8i64_v4i64_0_z(<8 x i64> %a, i8 %mask) {
    928 ; CHECK-LABEL: mask_extract_v8i64_v4i64_0_z:
    929 ; CHECK:       # %bb.0:
    930 ; CHECK-NEXT:    kmovd %edi, %k1
    931 ; CHECK-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
    932 ; CHECK-NEXT:    retq
    933   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    934   %mask.cast = bitcast i8 %mask to <8 x i1>
    935   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    936   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
    937   ret <4 x i64> %res
    938 }
    939 
    940 define <4 x i64> @mask_extract_v8i64_v4i64_1(<8 x i64> %a, <4 x i64> %passthru, i8 %mask) {
    941 ; CHECK-LABEL: mask_extract_v8i64_v4i64_1:
    942 ; CHECK:       # %bb.0:
    943 ; CHECK-NEXT:    kmovd %edi, %k1
    944 ; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 {%k1}
    945 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
    946 ; CHECK-NEXT:    retq
    947   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    948   %mask.cast = bitcast i8 %mask to <8 x i1>
    949   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    950   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> %passthru
    951   ret <4 x i64> %res
    952 }
    953 
    954 define <4 x i64> @mask_extract_v8i64_v4i64_1_z(<8 x i64> %a, i8 %mask) {
    955 ; CHECK-LABEL: mask_extract_v8i64_v4i64_1_z:
    956 ; CHECK:       # %bb.0:
    957 ; CHECK-NEXT:    kmovd %edi, %k1
    958 ; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
    959 ; CHECK-NEXT:    retq
    960   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    961   %mask.cast = bitcast i8 %mask to <8 x i1>
    962   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    963   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle, <4 x i64> zeroinitializer
    964   ret <4 x i64> %res
    965 }
    966 
    967 define <4 x double> @mask_extract_v8f64_v4f64_0(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
    968 ; CHECK-LABEL: mask_extract_v8f64_v4f64_0:
    969 ; CHECK:       # %bb.0:
    970 ; CHECK-NEXT:    kmovd %edi, %k1
    971 ; CHECK-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
    972 ; CHECK-NEXT:    retq
    973   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    974   %mask.cast = bitcast i8 %mask to <8 x i1>
    975   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    976   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
    977   ret <4 x double> %res
    978 }
    979 
    980 define <4 x double> @mask_extract_v8f64_v4f64_0_z(<8 x double> %a, i8 %mask) {
    981 ; CHECK-LABEL: mask_extract_v8f64_v4f64_0_z:
    982 ; CHECK:       # %bb.0:
    983 ; CHECK-NEXT:    kmovd %edi, %k1
    984 ; CHECK-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
    985 ; CHECK-NEXT:    retq
    986   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    987   %mask.cast = bitcast i8 %mask to <8 x i1>
    988   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    989   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
    990   ret <4 x double> %res
    991 }
    992 
    993 define <4 x double> @mask_extract_v8f64_v4f64_1(<8 x double> %a, <4 x double> %passthru, i8 %mask) {
    994 ; CHECK-LABEL: mask_extract_v8f64_v4f64_1:
    995 ; CHECK:       # %bb.0:
    996 ; CHECK-NEXT:    kmovd %edi, %k1
    997 ; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 {%k1}
    998 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
    999 ; CHECK-NEXT:    retq
   1000   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1001   %mask.cast = bitcast i8 %mask to <8 x i1>
   1002   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1003   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> %passthru
   1004   ret <4 x double> %res
   1005 }
   1006 
   1007 define <4 x double> @mask_extract_v8f64_v4f64_1_z(<8 x double> %a, i8 %mask) {
   1008 ; CHECK-LABEL: mask_extract_v8f64_v4f64_1_z:
   1009 ; CHECK:       # %bb.0:
   1010 ; CHECK-NEXT:    kmovd %edi, %k1
   1011 ; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
   1012 ; CHECK-NEXT:    retq
   1013   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1014   %mask.cast = bitcast i8 %mask to <8 x i1>
   1015   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1016   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle, <4 x double> zeroinitializer
   1017   ret <4 x double> %res
   1018 }
   1019 
   1020 define <8 x i32> @mask_cast_extract_v8i64_v8i32_0(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
   1021 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0:
   1022 ; CHECK:       # %bb.0:
   1023 ; CHECK-NEXT:    kmovd %edi, %k1
   1024 ; CHECK-NEXT:    vpblendmd %ymm0, %ymm1, %ymm0 {%k1}
   1025 ; CHECK-NEXT:    retq
   1026   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1027   %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
   1028   %mask.cast = bitcast i8 %mask to <8 x i1>
   1029   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
   1030   ret <8 x i32> %res
   1031 }
   1032 
   1033 define <8 x i32> @mask_cast_extract_v8i64_v8i32_0_z(<8 x i64> %a, i8 %mask) {
   1034 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_0_z:
   1035 ; CHECK:       # %bb.0:
   1036 ; CHECK-NEXT:    kmovd %edi, %k1
   1037 ; CHECK-NEXT:    vmovdqa32 %ymm0, %ymm0 {%k1} {z}
   1038 ; CHECK-NEXT:    retq
   1039   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1040   %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
   1041   %mask.cast = bitcast i8 %mask to <8 x i1>
   1042   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
   1043   ret <8 x i32> %res
   1044 }
   1045 
   1046 define <8 x i32> @mask_cast_extract_v8i64_v8i32_1(<8 x i64> %a, <8 x i32> %passthru, i8 %mask) {
   1047 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1:
   1048 ; CHECK:       # %bb.0:
   1049 ; CHECK-NEXT:    kmovd %edi, %k1
   1050 ; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm1 {%k1}
   1051 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
   1052 ; CHECK-NEXT:    retq
   1053   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1054   %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
   1055   %mask.cast = bitcast i8 %mask to <8 x i1>
   1056   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> %passthru
   1057   ret <8 x i32> %res
   1058 }
   1059 
   1060 define <8 x i32> @mask_cast_extract_v8i64_v8i32_1_z(<8 x i64> %a, i8 %mask) {
   1061 ; CHECK-LABEL: mask_cast_extract_v8i64_v8i32_1_z:
   1062 ; CHECK:       # %bb.0:
   1063 ; CHECK-NEXT:    kmovd %edi, %k1
   1064 ; CHECK-NEXT:    vextracti32x8 $1, %zmm0, %ymm0 {%k1} {z}
   1065 ; CHECK-NEXT:    retq
   1066   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1067   %shuffle.cast = bitcast <4 x i64> %shuffle to <8 x i32>
   1068   %mask.cast = bitcast i8 %mask to <8 x i1>
   1069   %res = select <8 x i1> %mask.cast, <8 x i32> %shuffle.cast, <8 x i32> zeroinitializer
   1070   ret <8 x i32> %res
   1071 }
   1072 
   1073 define <8 x float> @mask_cast_extract_v8f64_v8f32_0(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
   1074 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0:
   1075 ; CHECK:       # %bb.0:
   1076 ; CHECK-NEXT:    kmovd %edi, %k1
   1077 ; CHECK-NEXT:    vblendmps %ymm0, %ymm1, %ymm0 {%k1}
   1078 ; CHECK-NEXT:    retq
   1079   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1080   %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
   1081   %mask.cast = bitcast i8 %mask to <8 x i1>
   1082   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
   1083   ret <8 x float> %res
   1084 }
   1085 
   1086 define <8 x float> @mask_cast_extract_v8f64_v8f32_0_z(<8 x double> %a, i8 %mask) {
   1087 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_0_z:
   1088 ; CHECK:       # %bb.0:
   1089 ; CHECK-NEXT:    kmovd %edi, %k1
   1090 ; CHECK-NEXT:    vmovaps %ymm0, %ymm0 {%k1} {z}
   1091 ; CHECK-NEXT:    retq
   1092   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1093   %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
   1094   %mask.cast = bitcast i8 %mask to <8 x i1>
   1095   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
   1096   ret <8 x float> %res
   1097 }
   1098 
   1099 define <8 x float> @mask_cast_extract_v8f64_v8f32_1(<8 x double> %a, <8 x float> %passthru, i8 %mask) {
   1100 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1:
   1101 ; CHECK:       # %bb.0:
   1102 ; CHECK-NEXT:    kmovd %edi, %k1
   1103 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm1 {%k1}
   1104 ; CHECK-NEXT:    vmovaps %ymm1, %ymm0
   1105 ; CHECK-NEXT:    retq
   1106   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1107   %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
   1108   %mask.cast = bitcast i8 %mask to <8 x i1>
   1109   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> %passthru
   1110   ret <8 x float> %res
   1111 }
   1112 
   1113 define <8 x float> @mask_cast_extract_v8f64_v8f32_1_z(<8 x double> %a, i8 %mask) {
   1114 ; CHECK-LABEL: mask_cast_extract_v8f64_v8f32_1_z:
   1115 ; CHECK:       # %bb.0:
   1116 ; CHECK-NEXT:    kmovd %edi, %k1
   1117 ; CHECK-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
   1118 ; CHECK-NEXT:    retq
   1119   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1120   %shuffle.cast = bitcast <4 x double> %shuffle to <8 x float>
   1121   %mask.cast = bitcast i8 %mask to <8 x i1>
   1122   %res = select <8 x i1> %mask.cast, <8 x float> %shuffle.cast, <8 x float> zeroinitializer
   1123   ret <8 x float> %res
   1124 }
   1125 
   1126 define <4 x i32> @mask_cast_extract_v8i64_v4i32_0(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
   1127 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0:
   1128 ; CHECK:       # %bb.0:
   1129 ; CHECK-NEXT:    kmovd %edi, %k1
   1130 ; CHECK-NEXT:    vpblendmd %xmm0, %xmm1, %xmm0 {%k1}
   1131 ; CHECK-NEXT:    vzeroupper
   1132 ; CHECK-NEXT:    retq
   1133   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
   1134   %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
   1135   %mask.cast = bitcast i8 %mask to <8 x i1>
   1136   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1137   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
   1138   ret <4 x i32> %res
   1139 }
   1140 
   1141 define <4 x i32> @mask_cast_extract_v8i64_v4i32_0_z(<8 x i64> %a, i8 %mask) {
   1142 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_0_z:
   1143 ; CHECK:       # %bb.0:
   1144 ; CHECK-NEXT:    kmovd %edi, %k1
   1145 ; CHECK-NEXT:    vmovdqa32 %xmm0, %xmm0 {%k1} {z}
   1146 ; CHECK-NEXT:    vzeroupper
   1147 ; CHECK-NEXT:    retq
   1148   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
   1149   %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
   1150   %mask.cast = bitcast i8 %mask to <8 x i1>
   1151   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1152   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
   1153   ret <4 x i32> %res
   1154 }
   1155 
   1156 define <4 x i32> @mask_cast_extract_v8i64_v4i32_1(<8 x i64> %a, <4 x i32> %passthru, i8 %mask) {
   1157 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1:
   1158 ; CHECK:       # %bb.0:
   1159 ; CHECK-NEXT:    kmovd %edi, %k1
   1160 ; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm1 {%k1}
   1161 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
   1162 ; CHECK-NEXT:    vzeroupper
   1163 ; CHECK-NEXT:    retq
   1164   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
   1165   %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
   1166   %mask.cast = bitcast i8 %mask to <8 x i1>
   1167   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1168   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> %passthru
   1169   ret <4 x i32> %res
   1170 }
   1171 
   1172 define <4 x i32> @mask_cast_extract_v8i64_v4i32_1_z(<8 x i64> %a, i8 %mask) {
   1173 ; CHECK-LABEL: mask_cast_extract_v8i64_v4i32_1_z:
   1174 ; CHECK:       # %bb.0:
   1175 ; CHECK-NEXT:    kmovd %edi, %k1
   1176 ; CHECK-NEXT:    vextracti32x4 $1, %zmm0, %xmm0 {%k1} {z}
   1177 ; CHECK-NEXT:    vzeroupper
   1178 ; CHECK-NEXT:    retq
   1179   %shuffle = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
   1180   %shuffle.cast = bitcast <2 x i64> %shuffle to <4 x i32>
   1181   %mask.cast = bitcast i8 %mask to <8 x i1>
   1182   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1183   %res = select <4 x i1> %mask.extract, <4 x i32> %shuffle.cast, <4 x i32> zeroinitializer
   1184   ret <4 x i32> %res
   1185 }
   1186 
   1187 define <4 x float> @mask_cast_extract_v8f64_v4f32_0(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
   1188 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0:
   1189 ; CHECK:       # %bb.0:
   1190 ; CHECK-NEXT:    kmovd %edi, %k1
   1191 ; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
   1192 ; CHECK-NEXT:    vzeroupper
   1193 ; CHECK-NEXT:    retq
   1194   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
   1195   %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
   1196   %mask.cast = bitcast i8 %mask to <8 x i1>
   1197   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1198   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
   1199   ret <4 x float> %res
   1200 }
   1201 
   1202 define <4 x float> @mask_cast_extract_v8f64_v4f32_0_z(<8 x double> %a, i8 %mask) {
   1203 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_0_z:
   1204 ; CHECK:       # %bb.0:
   1205 ; CHECK-NEXT:    kmovd %edi, %k1
   1206 ; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
   1207 ; CHECK-NEXT:    vzeroupper
   1208 ; CHECK-NEXT:    retq
   1209   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
   1210   %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
   1211   %mask.cast = bitcast i8 %mask to <8 x i1>
   1212   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1213   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
   1214   ret <4 x float> %res
   1215 }
   1216 
   1217 define <4 x float> @mask_cast_extract_v8f64_v4f32_1(<8 x double> %a, <4 x float> %passthru, i8 %mask) {
   1218 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1:
   1219 ; CHECK:       # %bb.0:
   1220 ; CHECK-NEXT:    kmovd %edi, %k1
   1221 ; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm1 {%k1}
   1222 ; CHECK-NEXT:    vmovaps %xmm1, %xmm0
   1223 ; CHECK-NEXT:    vzeroupper
   1224 ; CHECK-NEXT:    retq
   1225   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   1226   %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
   1227   %mask.cast = bitcast i8 %mask to <8 x i1>
   1228   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1229   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> %passthru
   1230   ret <4 x float> %res
   1231 }
   1232 
   1233 define <4 x float> @mask_cast_extract_v8f64_v4f32_1_z(<8 x double> %a, i8 %mask) {
   1234 ; CHECK-LABEL: mask_cast_extract_v8f64_v4f32_1_z:
   1235 ; CHECK:       # %bb.0:
   1236 ; CHECK-NEXT:    kmovd %edi, %k1
   1237 ; CHECK-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
   1238 ; CHECK-NEXT:    vzeroupper
   1239 ; CHECK-NEXT:    retq
   1240   %shuffle = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 2, i32 3>
   1241   %shuffle.cast = bitcast <2 x double> %shuffle to <4 x float>
   1242   %mask.cast = bitcast i8 %mask to <8 x i1>
   1243   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1244   %res = select <4 x i1> %mask.extract, <4 x float> %shuffle.cast, <4 x float> zeroinitializer
   1245   ret <4 x float> %res
   1246 }
   1247 
   1248 define <4 x i64> @mask_cast_extract_v16i32_v4i64_0(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
   1249 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0:
   1250 ; CHECK:       # %bb.0:
   1251 ; CHECK-NEXT:    kmovd %edi, %k1
   1252 ; CHECK-NEXT:    vpblendmq %ymm0, %ymm1, %ymm0 {%k1}
   1253 ; CHECK-NEXT:    retq
   1254   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1255   %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
   1256   %mask.cast = bitcast i8 %mask to <8 x i1>
   1257   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1258   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
   1259   ret <4 x i64> %res
   1260 }
   1261 
   1262 define <4 x i64> @mask_cast_extract_v16i32_v4i64_0_z(<16 x i32> %a, i8 %mask) {
   1263 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_0_z:
   1264 ; CHECK:       # %bb.0:
   1265 ; CHECK-NEXT:    kmovd %edi, %k1
   1266 ; CHECK-NEXT:    vmovdqa64 %ymm0, %ymm0 {%k1} {z}
   1267 ; CHECK-NEXT:    retq
   1268   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1269   %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
   1270   %mask.cast = bitcast i8 %mask to <8 x i1>
   1271   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1272   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
   1273   ret <4 x i64> %res
   1274 }
   1275 
   1276 define <4 x i64> @mask_cast_extract_v16i32_v4i64_1(<16 x i32> %a, <4 x i64> %passthru, i8 %mask) {
   1277 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1:
   1278 ; CHECK:       # %bb.0:
   1279 ; CHECK-NEXT:    kmovd %edi, %k1
   1280 ; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm1 {%k1}
   1281 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
   1282 ; CHECK-NEXT:    retq
   1283   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1284   %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
   1285   %mask.cast = bitcast i8 %mask to <8 x i1>
   1286   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1287   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> %passthru
   1288   ret <4 x i64> %res
   1289 }
   1290 
   1291 define <4 x i64> @mask_cast_extract_v16i32_v4i64_1_z(<16 x i32> %a, i8 %mask) {
   1292 ; CHECK-LABEL: mask_cast_extract_v16i32_v4i64_1_z:
   1293 ; CHECK:       # %bb.0:
   1294 ; CHECK-NEXT:    kmovd %edi, %k1
   1295 ; CHECK-NEXT:    vextracti64x4 $1, %zmm0, %ymm0 {%k1} {z}
   1296 ; CHECK-NEXT:    retq
   1297   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1298   %shuffle.cast = bitcast <8 x i32> %shuffle to <4 x i64>
   1299   %mask.cast = bitcast i8 %mask to <8 x i1>
   1300   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1301   %res = select <4 x i1> %mask.extract, <4 x i64> %shuffle.cast, <4 x i64> zeroinitializer
   1302   ret <4 x i64> %res
   1303 }
   1304 
   1305 define <4 x double> @mask_cast_extract_v16f32_v4f64_0(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
   1306 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0:
   1307 ; CHECK:       # %bb.0:
   1308 ; CHECK-NEXT:    kmovd %edi, %k1
   1309 ; CHECK-NEXT:    vblendmpd %ymm0, %ymm1, %ymm0 {%k1}
   1310 ; CHECK-NEXT:    retq
   1311   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1312   %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
   1313   %mask.cast = bitcast i8 %mask to <8 x i1>
   1314   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1315   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
   1316   ret <4 x double> %res
   1317 }
   1318 
   1319 define <4 x double> @mask_cast_extract_v16f32_v4f64_0_z(<16 x float> %a, i8 %mask) {
   1320 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_0_z:
   1321 ; CHECK:       # %bb.0:
   1322 ; CHECK-NEXT:    kmovd %edi, %k1
   1323 ; CHECK-NEXT:    vmovapd %ymm0, %ymm0 {%k1} {z}
   1324 ; CHECK-NEXT:    retq
   1325   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1326   %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
   1327   %mask.cast = bitcast i8 %mask to <8 x i1>
   1328   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1329   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
   1330   ret <4 x double> %res
   1331 }
   1332 
   1333 define <4 x double> @mask_cast_extract_v16f32_v4f64_1(<16 x float> %a, <4 x double> %passthru, i8 %mask) {
   1334 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1:
   1335 ; CHECK:       # %bb.0:
   1336 ; CHECK-NEXT:    kmovd %edi, %k1
   1337 ; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm1 {%k1}
   1338 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1339 ; CHECK-NEXT:    retq
   1340   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1341   %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
   1342   %mask.cast = bitcast i8 %mask to <8 x i1>
   1343   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1344   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> %passthru
   1345   ret <4 x double> %res
   1346 }
   1347 
   1348 define <4 x double> @mask_cast_extract_v16f32_v4f64_1_z(<16 x float> %a, i8 %mask) {
   1349 ; CHECK-LABEL: mask_cast_extract_v16f32_v4f64_1_z:
   1350 ; CHECK:       # %bb.0:
   1351 ; CHECK-NEXT:    kmovd %edi, %k1
   1352 ; CHECK-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
   1353 ; CHECK-NEXT:    retq
   1354   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   1355   %shuffle.cast = bitcast <8 x float> %shuffle to <4 x double>
   1356   %mask.cast = bitcast i8 %mask to <8 x i1>
   1357   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1358   %res = select <4 x i1> %mask.extract, <4 x double> %shuffle.cast, <4 x double> zeroinitializer
   1359   ret <4 x double> %res
   1360 }
   1361 
   1362 define <2 x i64> @mask_cast_extract_v16i32_v2i64_0(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
   1363 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0:
   1364 ; CHECK:       # %bb.0:
   1365 ; CHECK-NEXT:    kmovd %edi, %k1
   1366 ; CHECK-NEXT:    vpblendmq %xmm0, %xmm1, %xmm0 {%k1}
   1367 ; CHECK-NEXT:    vzeroupper
   1368 ; CHECK-NEXT:    retq
   1369   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1370   %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
   1371   %mask.cast = bitcast i8 %mask to <8 x i1>
   1372   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1373   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
   1374   ret <2 x i64> %res
   1375 }
   1376 
   1377 define <2 x i64> @mask_cast_extract_v16i32_v2i64_0_z(<16 x i32> %a, i8 %mask) {
   1378 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_0_z:
   1379 ; CHECK:       # %bb.0:
   1380 ; CHECK-NEXT:    kmovd %edi, %k1
   1381 ; CHECK-NEXT:    vmovdqa64 %xmm0, %xmm0 {%k1} {z}
   1382 ; CHECK-NEXT:    vzeroupper
   1383 ; CHECK-NEXT:    retq
   1384   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1385   %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
   1386   %mask.cast = bitcast i8 %mask to <8 x i1>
   1387   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1388   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
   1389   ret <2 x i64> %res
   1390 }
   1391 
   1392 define <2 x i64> @mask_cast_extract_v16i32_v2i64_1(<16 x i32> %a, <2 x i64> %passthru, i8 %mask) {
   1393 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1:
   1394 ; CHECK:       # %bb.0:
   1395 ; CHECK-NEXT:    kmovd %edi, %k1
   1396 ; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm1 {%k1}
   1397 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
   1398 ; CHECK-NEXT:    vzeroupper
   1399 ; CHECK-NEXT:    retq
   1400   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1401   %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
   1402   %mask.cast = bitcast i8 %mask to <8 x i1>
   1403   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1404   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> %passthru
   1405   ret <2 x i64> %res
   1406 }
   1407 
   1408 define <2 x i64> @mask_cast_extract_v16i32_v2i64_1_z(<16 x i32> %a, i8 %mask) {
   1409 ; CHECK-LABEL: mask_cast_extract_v16i32_v2i64_1_z:
   1410 ; CHECK:       # %bb.0:
   1411 ; CHECK-NEXT:    kmovd %edi, %k1
   1412 ; CHECK-NEXT:    vextracti64x2 $1, %zmm0, %xmm0 {%k1} {z}
   1413 ; CHECK-NEXT:    vzeroupper
   1414 ; CHECK-NEXT:    retq
   1415   %shuffle = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1416   %shuffle.cast = bitcast <4 x i32> %shuffle to <2 x i64>
   1417   %mask.cast = bitcast i8 %mask to <8 x i1>
   1418   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1419   %res = select <2 x i1> %mask.extract, <2 x i64> %shuffle.cast, <2 x i64> zeroinitializer
   1420   ret <2 x i64> %res
   1421 }
   1422 
   1423 define <2 x double> @mask_cast_extract_v16f32_v2f64_0(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
   1424 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0:
   1425 ; CHECK:       # %bb.0:
   1426 ; CHECK-NEXT:    kmovd %edi, %k1
   1427 ; CHECK-NEXT:    vblendmpd %xmm0, %xmm1, %xmm0 {%k1}
   1428 ; CHECK-NEXT:    vzeroupper
   1429 ; CHECK-NEXT:    retq
   1430   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1431   %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
   1432   %mask.cast = bitcast i8 %mask to <8 x i1>
   1433   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1434   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
   1435   ret <2 x double> %res
   1436 }
   1437 
   1438 define <2 x double> @mask_cast_extract_v16f32_v2f64_0_z(<16 x float> %a, i8 %mask) {
   1439 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_0_z:
   1440 ; CHECK:       # %bb.0:
   1441 ; CHECK-NEXT:    kmovd %edi, %k1
   1442 ; CHECK-NEXT:    vmovapd %xmm0, %xmm0 {%k1} {z}
   1443 ; CHECK-NEXT:    vzeroupper
   1444 ; CHECK-NEXT:    retq
   1445   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1446   %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
   1447   %mask.cast = bitcast i8 %mask to <8 x i1>
   1448   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1449   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
   1450   ret <2 x double> %res
   1451 }
   1452 
   1453 define <2 x double> @mask_cast_extract_v16f32_v2f64_1(<16 x float> %a, <2 x double> %passthru, i8 %mask) {
   1454 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1:
   1455 ; CHECK:       # %bb.0:
   1456 ; CHECK-NEXT:    kmovd %edi, %k1
   1457 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm1 {%k1}
   1458 ; CHECK-NEXT:    vmovapd %xmm1, %xmm0
   1459 ; CHECK-NEXT:    vzeroupper
   1460 ; CHECK-NEXT:    retq
   1461   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1462   %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
   1463   %mask.cast = bitcast i8 %mask to <8 x i1>
   1464   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1465   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> %passthru
   1466   ret <2 x double> %res
   1467 }
   1468 
   1469 define <2 x double> @mask_cast_extract_v16f32_v2f64_1_z(<16 x float> %a, i8 %mask) {
   1470 ; CHECK-LABEL: mask_cast_extract_v16f32_v2f64_1_z:
   1471 ; CHECK:       # %bb.0:
   1472 ; CHECK-NEXT:    kmovd %edi, %k1
   1473 ; CHECK-NEXT:    vextractf64x2 $1, %zmm0, %xmm0 {%k1} {z}
   1474 ; CHECK-NEXT:    vzeroupper
   1475 ; CHECK-NEXT:    retq
   1476   %shuffle = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   1477   %shuffle.cast = bitcast <4 x float> %shuffle to <2 x double>
   1478   %mask.cast = bitcast i8 %mask to <8 x i1>
   1479   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1480   %res = select <2 x i1> %mask.extract, <2 x double> %shuffle.cast, <2 x double> zeroinitializer
   1481   ret <2 x double> %res
   1482 }
   1483 
   1484 define <2 x double> @broadcast_v4f32_0101_from_v2f32_mask(double* %x, <2 x double> %passthru, i8 %mask) {
   1485 ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_mask:
   1486 ; CHECK:       # %bb.0:
   1487 ; CHECK-NEXT:    kmovd %esi, %k1
   1488 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
   1489 ; CHECK-NEXT:    retq
   1490   %q = load double, double* %x, align 1
   1491   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
   1492   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   1493   %mask.cast = bitcast i8 %mask to <8 x i1>
   1494   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1495   %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> %passthru
   1496   ret <2 x double> %res
   1497 }
   1498 
   1499 define <2 x double> @broadcast_v4f32_0101_from_v2f32_maskz(double* %x, i8 %mask) {
   1500 ; CHECK-LABEL: broadcast_v4f32_0101_from_v2f32_maskz:
   1501 ; CHECK:       # %bb.0:
   1502 ; CHECK-NEXT:    kmovd %esi, %k1
   1503 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
   1504 ; CHECK-NEXT:    retq
   1505   %q = load double, double* %x, align 1
   1506   %vecinit.i = insertelement <2 x double> undef, double %q, i32 0
   1507   %vecinit2.i = insertelement <2 x double> %vecinit.i, double %q, i32 1
   1508   %mask.cast = bitcast i8 %mask to <8 x i1>
   1509   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
   1510   %res = select <2 x i1> %mask.extract, <2 x double> %vecinit2.i, <2 x double> zeroinitializer
   1511   ret <2 x double> %res
   1512 }
   1513 
   1514 define <8 x float> @test_broadcast_2f64_8f32_mask(<2 x double> *%p, i8 %mask, <8 x float> %passthru) nounwind {
   1515 ; CHECK-LABEL: test_broadcast_2f64_8f32_mask:
   1516 ; CHECK:       # %bb.0:
   1517 ; CHECK-NEXT:    kmovd %esi, %k1
   1518 ; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   1519 ; CHECK-NEXT:    retq
   1520  %1 = load <2 x double>, <2 x double> *%p
   1521  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1522  %3 = bitcast <4 x double> %2 to <8 x float>
   1523  %mask.cast = bitcast i8 %mask to <8 x i1>
   1524  %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> %passthru
   1525  ret <8 x float> %res
   1526 }
   1527 
   1528 define <8 x float> @test_broadcast_2f64_8f32_maskz(<2 x double> *%p, i8 %mask) nounwind {
   1529 ; CHECK-LABEL: test_broadcast_2f64_8f32_maskz:
   1530 ; CHECK:       # %bb.0:
   1531 ; CHECK-NEXT:    kmovd %esi, %k1
   1532 ; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
   1533 ; CHECK-NEXT:    retq
   1534  %1 = load <2 x double>, <2 x double> *%p
   1535  %2 = shufflevector <2 x double> %1, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1536  %3 = bitcast <4 x double> %2 to <8 x float>
   1537  %mask.cast = bitcast i8 %mask to <8 x i1>
   1538  %res = select <8 x i1> %mask.cast, <8 x float> %3, <8 x float> zeroinitializer
   1539  ret <8 x float> %res
   1540 }
   1541 
   1542 define <8 x i32> @test_broadcast_2i64_8i32_mask(<2 x i64> *%p, i8 %mask, <8 x i32> %passthru) nounwind {
   1543 ; CHECK-LABEL: test_broadcast_2i64_8i32_mask:
   1544 ; CHECK:       # %bb.0:
   1545 ; CHECK-NEXT:    kmovd %esi, %k1
   1546 ; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   1547 ; CHECK-NEXT:    retq
   1548  %1 = load <2 x i64>, <2 x i64> *%p
   1549  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1550  %3 = bitcast <4 x i64> %2 to <8 x i32>
   1551  %mask.cast = bitcast i8 %mask to <8 x i1>
   1552  %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> %passthru
   1553  ret <8 x i32> %res
   1554 }
   1555 
   1556 define <8 x i32> @test_broadcast_2i64_8i32_maskz(<2 x i64> *%p, i8 %mask) nounwind {
   1557 ; CHECK-LABEL: test_broadcast_2i64_8i32_maskz:
   1558 ; CHECK:       # %bb.0:
   1559 ; CHECK-NEXT:    kmovd %esi, %k1
   1560 ; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
   1561 ; CHECK-NEXT:    retq
   1562  %1 = load <2 x i64>, <2 x i64> *%p
   1563  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1564  %3 = bitcast <4 x i64> %2 to <8 x i32>
   1565  %mask.cast = bitcast i8 %mask to <8 x i1>
   1566  %res = select <8 x i1> %mask.cast, <8 x i32> %3, <8 x i32> zeroinitializer
   1567  ret <8 x i32> %res
   1568 }
   1569 
   1570 define <16 x float> @test_broadcast_2f64_16f32_mask(<2 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
   1571 ; CHECK-LABEL: test_broadcast_2f64_16f32_mask:
   1572 ; CHECK:       # %bb.0:
   1573 ; CHECK-NEXT:    kmovd %esi, %k1
   1574 ; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   1575 ; CHECK-NEXT:    retq
   1576  %1 = load <2 x double>, <2 x double> *%p
   1577  %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1578  %3 = bitcast <8 x double> %2 to <16 x float>
   1579  %mask.cast = bitcast i16 %mask to <16 x i1>
   1580  %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
   1581  ret <16 x float> %res
   1582 }
   1583 
   1584 define <16 x float> @test_broadcast_2f64_16f32_maskz(<2 x double> *%p, i16 %mask) nounwind {
   1585 ; CHECK-LABEL: test_broadcast_2f64_16f32_maskz:
   1586 ; CHECK:       # %bb.0:
   1587 ; CHECK-NEXT:    kmovd %esi, %k1
   1588 ; CHECK-NEXT:    vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   1589 ; CHECK-NEXT:    retq
   1590  %1 = load <2 x double>, <2 x double> *%p
   1591  %2 = shufflevector <2 x double> %1, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1592  %3 = bitcast <8 x double> %2 to <16 x float>
   1593  %mask.cast = bitcast i16 %mask to <16 x i1>
   1594  %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
   1595  ret <16 x float> %res
   1596 }
   1597 
   1598 define <16 x i32> @test_broadcast_2i64_16i32_mask(<2 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
   1599 ; CHECK-LABEL: test_broadcast_2i64_16i32_mask:
   1600 ; CHECK:       # %bb.0:
   1601 ; CHECK-NEXT:    kmovd %esi, %k1
   1602 ; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   1603 ; CHECK-NEXT:    retq
   1604  %1 = load <2 x i64>, <2 x i64> *%p
   1605  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1606  %3 = bitcast <8 x i64> %2 to <16 x i32>
   1607  %mask.cast = bitcast i16 %mask to <16 x i1>
   1608  %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
   1609  ret <16 x i32> %res
   1610 }
   1611 
   1612 define <16 x i32> @test_broadcast_2i64_16i32_maskz(<2 x i64> *%p, i16 %mask) nounwind {
   1613 ; CHECK-LABEL: test_broadcast_2i64_16i32_maskz:
   1614 ; CHECK:       # %bb.0:
   1615 ; CHECK-NEXT:    kmovd %esi, %k1
   1616 ; CHECK-NEXT:    vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
   1617 ; CHECK-NEXT:    retq
   1618  %1 = load <2 x i64>, <2 x i64> *%p
   1619  %2 = shufflevector <2 x i64> %1, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1620  %3 = bitcast <8 x i64> %2 to <16 x i32>
   1621  %mask.cast = bitcast i16 %mask to <16 x i1>
   1622  %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
   1623  ret <16 x i32> %res
   1624 }
   1625 
   1626 define <16 x float> @test_broadcast_4f64_16f32_mask(<4 x double> *%p, i16 %mask, <16 x float> %passthru) nounwind {
   1627 ; CHECK-LABEL: test_broadcast_4f64_16f32_mask:
   1628 ; CHECK:       # %bb.0:
   1629 ; CHECK-NEXT:    kmovd %esi, %k1
   1630 ; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
   1631 ; CHECK-NEXT:    retq
   1632  %1 = load <4 x double>, <4 x double> *%p
   1633  %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1634  %3 = bitcast <8 x double> %2 to <16 x float>
   1635  %mask.cast = bitcast i16 %mask to <16 x i1>
   1636  %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> %passthru
   1637  ret <16 x float> %res
   1638 }
   1639 
   1640 define <16 x float> @test_broadcast_4f64_16f32_maskz(<4 x double> *%p, i16 %mask) nounwind {
   1641 ; CHECK-LABEL: test_broadcast_4f64_16f32_maskz:
   1642 ; CHECK:       # %bb.0:
   1643 ; CHECK-NEXT:    kmovd %esi, %k1
   1644 ; CHECK-NEXT:    vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
   1645 ; CHECK-NEXT:    retq
   1646  %1 = load <4 x double>, <4 x double> *%p
   1647  %2 = shufflevector <4 x double> %1, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1648  %3 = bitcast <8 x double> %2 to <16 x float>
   1649  %mask.cast = bitcast i16 %mask to <16 x i1>
   1650  %res = select <16 x i1> %mask.cast, <16 x float> %3, <16 x float> zeroinitializer
   1651  ret <16 x float> %res
   1652 }
   1653 
   1654 define <16 x i32> @test_broadcast_4i64_16i32_mask(<4 x i64> *%p, i16 %mask, <16 x i32> %passthru) nounwind {
   1655 ; CHECK-LABEL: test_broadcast_4i64_16i32_mask:
   1656 ; CHECK:       # %bb.0:
   1657 ; CHECK-NEXT:    kmovd %esi, %k1
   1658 ; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
   1659 ; CHECK-NEXT:    retq
   1660  %1 = load <4 x i64>, <4 x i64> *%p
   1661  %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1662  %3 = bitcast <8 x i64> %2 to <16 x i32>
   1663  %mask.cast = bitcast i16 %mask to <16 x i1>
   1664  %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> %passthru
   1665  ret <16 x i32> %res
   1666 }
   1667 
   1668 define <16 x i32> @test_broadcast_4i64_16i32_maskz(<4 x i64> *%p, i16 %mask) nounwind {
   1669 ; CHECK-LABEL: test_broadcast_4i64_16i32_maskz:
   1670 ; CHECK:       # %bb.0:
   1671 ; CHECK-NEXT:    kmovd %esi, %k1
   1672 ; CHECK-NEXT:    vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
   1673 ; CHECK-NEXT:    retq
   1674  %1 = load <4 x i64>, <4 x i64> *%p
   1675  %2 = shufflevector <4 x i64> %1, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1676  %3 = bitcast <8 x i64> %2 to <16 x i32>
   1677  %mask.cast = bitcast i16 %mask to <16 x i1>
   1678  %res = select <16 x i1> %mask.cast, <16 x i32> %3, <16 x i32> zeroinitializer
   1679  ret <16 x i32> %res
   1680 }
   1681 
   1682 define <4 x double> @test_broadcast_4f32_4f64_mask(<4 x float> *%p, i8 %mask, <4 x double> %passthru) nounwind {
   1683 ; CHECK-LABEL: test_broadcast_4f32_4f64_mask:
   1684 ; CHECK:       # %bb.0:
   1685 ; CHECK-NEXT:    kmovd %esi, %k1
   1686 ; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
   1687 ; CHECK-NEXT:    retq
   1688  %1 = load <4 x float>, <4 x float> *%p
   1689  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1690  %3 = bitcast <8 x float> %2 to <4 x double>
   1691  %mask.cast = bitcast i8 %mask to <8 x i1>
   1692  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1693  %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> %passthru
   1694  ret <4 x double> %res
   1695 }
   1696 
   1697 define <4 x double> @test_broadcast_4f32_4f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
   1698 ; CHECK-LABEL: test_broadcast_4f32_4f64_maskz:
   1699 ; CHECK:       # %bb.0:
   1700 ; CHECK-NEXT:    kmovd %esi, %k1
   1701 ; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
   1702 ; CHECK-NEXT:    retq
   1703  %1 = load <4 x float>, <4 x float> *%p
   1704  %2 = shufflevector <4 x float> %1, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1705  %3 = bitcast <8 x float> %2 to <4 x double>
   1706  %mask.cast = bitcast i8 %mask to <8 x i1>
   1707  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1708  %res = select <4 x i1> %mask.extract, <4 x double> %3, <4 x double> zeroinitializer
   1709  ret <4 x double> %res
   1710 }
   1711 
   1712 define <4 x i64> @test_broadcast_4i32_4i64_mask(<4 x i32> *%p, i8 %mask, <4 x i64> %passthru) nounwind {
   1713 ; CHECK-LABEL: test_broadcast_4i32_4i64_mask:
   1714 ; CHECK:       # %bb.0:
   1715 ; CHECK-NEXT:    kmovd %esi, %k1
   1716 ; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
   1717 ; CHECK-NEXT:    retq
   1718  %1 = load <4 x i32>, <4 x i32> *%p
   1719  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1720  %3 = bitcast <8 x i32> %2 to <4 x i64>
   1721  %mask.cast = bitcast i8 %mask to <8 x i1>
   1722  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1723  %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> %passthru
   1724  ret <4 x i64> %res
   1725 }
   1726 
   1727 define <4 x i64> @test_broadcast_4i32_4i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
   1728 ; CHECK-LABEL: test_broadcast_4i32_4i64_maskz:
   1729 ; CHECK:       # %bb.0:
   1730 ; CHECK-NEXT:    kmovd %esi, %k1
   1731 ; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
   1732 ; CHECK-NEXT:    retq
   1733  %1 = load <4 x i32>, <4 x i32> *%p
   1734  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1735  %3 = bitcast <8 x i32> %2 to <4 x i64>
   1736  %mask.cast = bitcast i8 %mask to <8 x i1>
   1737  %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1738  %res = select <4 x i1> %mask.extract, <4 x i64> %3, <4 x i64> zeroinitializer
   1739  ret <4 x i64> %res
   1740 }
   1741 
   1742 define <8 x double> @test_broadcast_4f32_8f64_mask(<4 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
   1743 ; CHECK-LABEL: test_broadcast_4f32_8f64_mask:
   1744 ; CHECK:       # %bb.0:
   1745 ; CHECK-NEXT:    kmovd %esi, %k1
   1746 ; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
   1747 ; CHECK-NEXT:    retq
   1748  %1 = load <4 x float>, <4 x float> *%p
   1749  %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1750  %3 = bitcast <16 x float> %2 to <8 x double>
   1751  %mask.cast = bitcast i8 %mask to <8 x i1>
   1752  %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
   1753  ret <8 x double> %res
   1754 }
   1755 
   1756 define <8 x double> @test_broadcast_4f32_8f64_maskz(<4 x float> *%p, i8 %mask) nounwind {
   1757 ; CHECK-LABEL: test_broadcast_4f32_8f64_maskz:
   1758 ; CHECK:       # %bb.0:
   1759 ; CHECK-NEXT:    kmovd %esi, %k1
   1760 ; CHECK-NEXT:    vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
   1761 ; CHECK-NEXT:    retq
   1762  %1 = load <4 x float>, <4 x float> *%p
   1763  %2 = shufflevector <4 x float> %1, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1764  %3 = bitcast <16 x float> %2 to <8 x double>
   1765  %mask.cast = bitcast i8 %mask to <8 x i1>
   1766  %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
   1767  ret <8 x double> %res
   1768 }
   1769 
   1770 define <8 x i64> @test_broadcast_4i32_8i64_mask(<4 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
   1771 ; CHECK-LABEL: test_broadcast_4i32_8i64_mask:
   1772 ; CHECK:       # %bb.0:
   1773 ; CHECK-NEXT:    kmovd %esi, %k1
   1774 ; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
   1775 ; CHECK-NEXT:    retq
   1776  %1 = load <4 x i32>, <4 x i32> *%p
   1777  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1778  %3 = bitcast <16 x i32> %2 to <8 x i64>
   1779  %mask.cast = bitcast i8 %mask to <8 x i1>
   1780  %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
   1781  ret <8 x i64> %res
   1782 }
   1783 
   1784 define <8 x i64> @test_broadcast_4i32_8i64_maskz(<4 x i32> *%p, i8 %mask) nounwind {
   1785 ; CHECK-LABEL: test_broadcast_4i32_8i64_maskz:
   1786 ; CHECK:       # %bb.0:
   1787 ; CHECK-NEXT:    kmovd %esi, %k1
   1788 ; CHECK-NEXT:    vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
   1789 ; CHECK-NEXT:    retq
   1790  %1 = load <4 x i32>, <4 x i32> *%p
   1791  %2 = shufflevector <4 x i32> %1, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
   1792  %3 = bitcast <16 x i32> %2 to <8 x i64>
   1793  %mask.cast = bitcast i8 %mask to <8 x i1>
   1794  %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
   1795  ret <8 x i64> %res
   1796 }
   1797 
   1798 define <8 x double> @test_broadcast_8f32_8f64_mask(<8 x float> *%p, i8 %mask, <8 x double> %passthru) nounwind {
   1799 ; CHECK-LABEL: test_broadcast_8f32_8f64_mask:
   1800 ; CHECK:       # %bb.0:
   1801 ; CHECK-NEXT:    kmovd %esi, %k1
   1802 ; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   1803 ; CHECK-NEXT:    retq
   1804  %1 = load <8 x float>, <8 x float> *%p
   1805  %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1806  %3 = bitcast <16 x float> %2 to <8 x double>
   1807  %mask.cast = bitcast i8 %mask to <8 x i1>
   1808  %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> %passthru
   1809  ret <8 x double> %res
   1810 }
   1811 
   1812 define <8 x double> @test_broadcast_8f32_8f64_maskz(<8 x float> *%p, i8 %mask) nounwind {
   1813 ; CHECK-LABEL: test_broadcast_8f32_8f64_maskz:
   1814 ; CHECK:       # %bb.0:
   1815 ; CHECK-NEXT:    kmovd %esi, %k1
   1816 ; CHECK-NEXT:    vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
   1817 ; CHECK-NEXT:    retq
   1818  %1 = load <8 x float>, <8 x float> *%p
   1819  %2 = shufflevector <8 x float> %1, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1820  %3 = bitcast <16 x float> %2 to <8 x double>
   1821  %mask.cast = bitcast i8 %mask to <8 x i1>
   1822  %res = select <8 x i1> %mask.cast, <8 x double> %3, <8 x double> zeroinitializer
   1823  ret <8 x double> %res
   1824 }
   1825 
   1826 define <8 x i64> @test_broadcast_8i32_8i64_mask(<8 x i32> *%p, i8 %mask, <8 x i64> %passthru) nounwind {
   1827 ; CHECK-LABEL: test_broadcast_8i32_8i64_mask:
   1828 ; CHECK:       # %bb.0:
   1829 ; CHECK-NEXT:    kmovd %esi, %k1
   1830 ; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
   1831 ; CHECK-NEXT:    retq
   1832  %1 = load <8 x i32>, <8 x i32> *%p
   1833  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1834  %3 = bitcast <16 x i32> %2 to <8 x i64>
   1835  %mask.cast = bitcast i8 %mask to <8 x i1>
   1836  %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> %passthru
   1837  ret <8 x i64> %res
   1838 }
   1839 
   1840 define <8 x i64> @test_broadcast_8i32_8i64_maskz(<8 x i32> *%p, i8 %mask) nounwind {
   1841 ; CHECK-LABEL: test_broadcast_8i32_8i64_maskz:
   1842 ; CHECK:       # %bb.0:
   1843 ; CHECK-NEXT:    kmovd %esi, %k1
   1844 ; CHECK-NEXT:    vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
   1845 ; CHECK-NEXT:    retq
   1846  %1 = load <8 x i32>, <8 x i32> *%p
   1847  %2 = shufflevector <8 x i32> %1, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1848  %3 = bitcast <16 x i32> %2 to <8 x i64>
   1849  %mask.cast = bitcast i8 %mask to <8 x i1>
   1850  %res = select <8 x i1> %mask.cast, <8 x i64> %3, <8 x i64> zeroinitializer
   1851  ret <8 x i64> %res
   1852 }
   1853 
   1854 define <4 x float> @test_broadcastf32x2_v4f32(<4 x float> %vec, <4 x float> %passthru, i8 %mask) {
   1855 ; CHECK-LABEL: test_broadcastf32x2_v4f32:
   1856 ; CHECK:       # %bb.0:
   1857 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1858 ; CHECK-NEXT:    kmovd %edi, %k1
   1859 ; CHECK-NEXT:    vblendmps %xmm0, %xmm1, %xmm0 {%k1}
   1860 ; CHECK-NEXT:    retq
   1861   %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1862   %mask.cast = bitcast i8 %mask to <8 x i1>
   1863   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1864   %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> %passthru
   1865   ret <4 x float> %res
   1866 }
   1867 
   1868 define <4 x float> @test_broadcastf32x2_v4f32_z(<4 x float> %vec, i8 %mask) {
   1869 ; CHECK-LABEL: test_broadcastf32x2_v4f32_z:
   1870 ; CHECK:       # %bb.0:
   1871 ; CHECK-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1872 ; CHECK-NEXT:    kmovd %edi, %k1
   1873 ; CHECK-NEXT:    vmovaps %xmm0, %xmm0 {%k1} {z}
   1874 ; CHECK-NEXT:    retq
   1875   %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1876   %mask.cast = bitcast i8 %mask to <8 x i1>
   1877   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1878   %res = select <4 x i1> %mask.extract, <4 x float> %shuf, <4 x float> zeroinitializer
   1879   ret <4 x float> %res
   1880 }
   1881 
   1882 define <4 x i32> @test_broadcasti32x2_v4i32(<4 x i32> %vec, <4 x i32> %passthru, i8 %mask) {
   1883 ; CHECK-LABEL: test_broadcasti32x2_v4i32:
   1884 ; CHECK:       # %bb.0:
   1885 ; CHECK-NEXT:    kmovd %edi, %k1
   1886 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
   1887 ; CHECK-NEXT:    vmovdqa %xmm1, %xmm0
   1888 ; CHECK-NEXT:    retq
   1889   %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1890   %mask.cast = bitcast i8 %mask to <8 x i1>
   1891   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1892   %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> %passthru
   1893   ret <4 x i32> %res
   1894 }
   1895 
   1896 define <4 x i32> @test_broadcasti32x2_v4i32_z(<4 x i32> %vec, i8 %mask) {
   1897 ; CHECK-LABEL: test_broadcasti32x2_v4i32_z:
   1898 ; CHECK:       # %bb.0:
   1899 ; CHECK-NEXT:    kmovd %edi, %k1
   1900 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
   1901 ; CHECK-NEXT:    retq
   1902   %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1903   %mask.cast = bitcast i8 %mask to <8 x i1>
   1904   %mask.extract = shufflevector <8 x i1> %mask.cast, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1905   %res = select <4 x i1> %mask.extract, <4 x i32> %shuf, <4 x i32> zeroinitializer
   1906   ret <4 x i32> %res
   1907 }
   1908 
   1909 define <8 x float> @test_broadcastf32x2_v8f32(<8 x float> %vec, <8 x float> %passthru, i8 %mask) {
   1910 ; CHECK-LABEL: test_broadcastf32x2_v8f32:
   1911 ; CHECK:       # %bb.0:
   1912 ; CHECK-NEXT:    kmovd %edi, %k1
   1913 ; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
   1914 ; CHECK-NEXT:    vmovapd %ymm1, %ymm0
   1915 ; CHECK-NEXT:    retq
   1916   %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1917   %mask.cast = bitcast i8 %mask to <8 x i1>
   1918   %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> %passthru
   1919   ret <8 x float> %res
   1920 }
   1921 
   1922 define <8 x float> @test_broadcastf32x2_v8f32_z(<8 x float> %vec, i8 %mask) {
   1923 ; CHECK-LABEL: test_broadcastf32x2_v8f32_z:
   1924 ; CHECK:       # %bb.0:
   1925 ; CHECK-NEXT:    kmovd %edi, %k1
   1926 ; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
   1927 ; CHECK-NEXT:    retq
   1928   %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1929   %mask.cast = bitcast i8 %mask to <8 x i1>
   1930   %res = select <8 x i1> %mask.cast, <8 x float> %shuf, <8 x float> zeroinitializer
   1931   ret <8 x float> %res
   1932 }
   1933 
   1934 define <8 x i32> @test_broadcasti32x2_v8i32(<8 x i32> %vec, <8 x i32> %passthru, i8 %mask) {
   1935 ; CHECK-LABEL: test_broadcasti32x2_v8i32:
   1936 ; CHECK:       # %bb.0:
   1937 ; CHECK-NEXT:    kmovd %edi, %k1
   1938 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
   1939 ; CHECK-NEXT:    vmovdqa %ymm1, %ymm0
   1940 ; CHECK-NEXT:    retq
   1941   %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1942   %mask.cast = bitcast i8 %mask to <8 x i1>
   1943   %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> %passthru
   1944   ret <8 x i32> %res
   1945 }
   1946 
   1947 define <8 x i32> @test_broadcasti32x2_v8i32_z(<8 x i32> %vec, i8 %mask) {
   1948 ; CHECK-LABEL: test_broadcasti32x2_v8i32_z:
   1949 ; CHECK:       # %bb.0:
   1950 ; CHECK-NEXT:    kmovd %edi, %k1
   1951 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
   1952 ; CHECK-NEXT:    retq
   1953   %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1954   %mask.cast = bitcast i8 %mask to <8 x i1>
   1955   %res = select <8 x i1> %mask.cast, <8 x i32> %shuf, <8 x i32> zeroinitializer
   1956   ret <8 x i32> %res
   1957 }
   1958 
   1959 define <16 x float> @test_broadcastf32x2_v16f32_z(<16 x float> %vec, i16 %mask) {
   1960 ; CHECK-LABEL: test_broadcastf32x2_v16f32_z:
   1961 ; CHECK:       # %bb.0:
   1962 ; CHECK-NEXT:    kmovd %edi, %k1
   1963 ; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1964 ; CHECK-NEXT:    retq
   1965   %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1966   %mask.cast = bitcast i16 %mask to <16 x i1>
   1967   %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> zeroinitializer
   1968   ret <16 x float> %res
   1969 }
   1970 
   1971 define <16 x i32> @test_broadcasti32x2_v16i32(<16 x i32> %vec, <16 x i32> %passthru, i16 %mask) {
   1972 ; CHECK-LABEL: test_broadcasti32x2_v16i32:
   1973 ; CHECK:       # %bb.0:
   1974 ; CHECK-NEXT:    kmovd %edi, %k1
   1975 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1976 ; CHECK-NEXT:    vmovdqa64 %zmm1, %zmm0
   1977 ; CHECK-NEXT:    retq
   1978   %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1979   %mask.cast = bitcast i16 %mask to <16 x i1>
   1980   %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> %passthru
   1981   ret <16 x i32> %res
   1982 }
   1983 
   1984 define <16 x float> @test_broadcastf32x2_v16f32(<16 x float> %vec, <16 x float> %passthru, i16 %mask) {
   1985 ; CHECK-LABEL: test_broadcastf32x2_v16f32:
   1986 ; CHECK:       # %bb.0:
   1987 ; CHECK-NEXT:    kmovd %edi, %k1
   1988 ; CHECK-NEXT:    vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   1989 ; CHECK-NEXT:    vmovapd %zmm1, %zmm0
   1990 ; CHECK-NEXT:    retq
   1991   %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   1992   %mask.cast = bitcast i16 %mask to <16 x i1>
   1993   %res = select <16 x i1> %mask.cast, <16 x float> %shuf, <16 x float> %passthru
   1994   ret <16 x float> %res
   1995 }
   1996 
   1997 define <16 x i32> @test_broadcasti32x2_v16i32_z(<16 x i32> %vec, i16 %mask) {
   1998 ; CHECK-LABEL: test_broadcasti32x2_v16i32_z:
   1999 ; CHECK:       # %bb.0:
   2000 ; CHECK-NEXT:    kmovd %edi, %k1
   2001 ; CHECK-NEXT:    vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
   2002 ; CHECK-NEXT:    retq
   2003   %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
   2004   %mask.cast = bitcast i16 %mask to <16 x i1>
   2005   %res = select <16 x i1> %mask.cast, <16 x i32> %shuf, <16 x i32> zeroinitializer
   2006   ret <16 x i32> %res
   2007 }
   2008 
   2009 define <16 x i8> @mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
   2010 ; CHECK-LABEL: mask_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
   2011 ; CHECK:       # %bb.0:
   2012 ; CHECK-NEXT:    kmovd %edi, %k1
   2013 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
   2014 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
   2015 ; CHECK-NEXT:    retq
   2016   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
   2017   %mask.cast = bitcast i16 %mask to <16 x i1>
   2018   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
   2019   ret <16 x i8> %res
   2020 }
   2021 
   2022 define <16 x i8> @maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   2023 ; CHECK-LABEL: maskz_shuffle_v16i8_1_2_3_4_5_6_7_8_9_10_11_12_13_14_15_16:
   2024 ; CHECK:       # %bb.0:
   2025 ; CHECK-NEXT:    kmovd %edi, %k1
   2026 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0]
   2027 ; CHECK-NEXT:    retq
   2028   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>
   2029   %mask.cast = bitcast i16 %mask to <16 x i1>
   2030   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
   2031   ret <16 x i8> %res
   2032 }
   2033 
   2034 define <16 x i8> @mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
   2035 ; CHECK-LABEL: mask_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
   2036 ; CHECK:       # %bb.0:
   2037 ; CHECK-NEXT:    kmovd %edi, %k1
   2038 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
   2039 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
   2040 ; CHECK-NEXT:    retq
   2041   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
   2042   %mask.cast = bitcast i16 %mask to <16 x i1>
   2043   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
   2044   ret <16 x i8> %res
   2045 }
   2046 
   2047 define <16 x i8> @maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   2048 ; CHECK-LABEL: maskz_shuffle_v16i8_4_5_6_7_8_9_10_11_12_13_14_15_16_17_18_19:
   2049 ; CHECK:       # %bb.0:
   2050 ; CHECK-NEXT:    kmovd %edi, %k1
   2051 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[4,5,6,7,8,9,10,11,12,13,14,15],xmm1[0,1,2,3]
   2052 ; CHECK-NEXT:    retq
   2053   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19>
   2054   %mask.cast = bitcast i16 %mask to <16 x i1>
   2055   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
   2056   ret <16 x i8> %res
   2057 }
   2058 
   2059 define <16 x i8> @mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, <16 x i8> %passthru, i16 %mask) {
   2060 ; CHECK-LABEL: mask_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
   2061 ; CHECK:       # %bb.0:
   2062 ; CHECK-NEXT:    kmovd %edi, %k1
   2063 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm2 {%k1} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
   2064 ; CHECK-NEXT:    vmovdqa %xmm2, %xmm0
   2065 ; CHECK-NEXT:    retq
   2066   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   2067   %mask.cast = bitcast i16 %mask to <16 x i1>
   2068   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> %passthru
   2069   ret <16 x i8> %res
   2070 }
   2071 
   2072 define <16 x i8> @maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23(<16 x i8> %a, <16 x i8> %b, i16 %mask) {
   2073 ; CHECK-LABEL: maskz_shuffle_v16i8_8_9_10_11_12_13_14_15_16_17_18_19_20_21_22_23:
   2074 ; CHECK:       # %bb.0:
   2075 ; CHECK-NEXT:    kmovd %edi, %k1
   2076 ; CHECK-NEXT:    vpalignr {{.*#+}} xmm0 {%k1} {z} = xmm0[8,9,10,11,12,13,14,15],xmm1[0,1,2,3,4,5,6,7]
   2077 ; CHECK-NEXT:    retq
   2078   %shuffle = shufflevector <16 x i8> %a, <16 x i8> %b, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
   2079   %mask.cast = bitcast i16 %mask to <16 x i1>
   2080   %res = select <16 x i1> %mask.cast, <16 x i8> %shuffle, <16 x i8> zeroinitializer
   2081   ret <16 x i8> %res
   2082 }
   2083