Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
      3 
      4 
      5 define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
      6 ; SKX-LABEL: extract_subvector128_v32i16:
      7 ; SKX:       ## %bb.0:
      8 ; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
      9 ; SKX-NEXT:    vzeroupper
     10 ; SKX-NEXT:    retq
     11   %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
     12   ret <8 x i16> %r1
     13 }
     14 
     15 define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
     16 ; SKX-LABEL: extract_subvector128_v32i16_first_element:
     17 ; SKX:       ## %bb.0:
     18 ; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
     19 ; SKX-NEXT:    vzeroupper
     20 ; SKX-NEXT:    retq
     21   %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
     22   ret <8 x i16> %r1
     23 }
     24 
     25 define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
     26 ; SKX-LABEL: extract_subvector128_v64i8:
     27 ; SKX:       ## %bb.0:
     28 ; SKX-NEXT:    vextractf32x4 $2, %zmm0, %xmm0
     29 ; SKX-NEXT:    vzeroupper
     30 ; SKX-NEXT:    retq
     31   %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
     32   ret <16 x i8> %r1
     33 }
     34 
     35 define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
     36 ; SKX-LABEL: extract_subvector128_v64i8_first_element:
     37 ; SKX:       ## %bb.0:
     38 ; SKX-NEXT:    ## kill: def $xmm0 killed $xmm0 killed $zmm0
     39 ; SKX-NEXT:    vzeroupper
     40 ; SKX-NEXT:    retq
     41   %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
     42   ret <16 x i8> %r1
     43 }
     44 
     45 
     46 define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
     47 ; SKX-LABEL: extract_subvector256_v32i16:
     48 ; SKX:       ## %bb.0:
     49 ; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
     50 ; SKX-NEXT:    retq
     51   %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
     52   ret <16 x i16> %r1
     53 }
     54 
     55 define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
     56 ; SKX-LABEL: extract_subvector256_v64i8:
     57 ; SKX:       ## %bb.0:
     58 ; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0
     59 ; SKX-NEXT:    retq
     60   %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
     61   ret <32 x i8> %r1
     62 }
     63 
     64 define void @extract_subvector256_v8f64_store(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
     65 ; SKX-LABEL: extract_subvector256_v8f64_store:
     66 ; SKX:       ## %bb.0: ## %entry
     67 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
     68 ; SKX-NEXT:    vzeroupper
     69 ; SKX-NEXT:    retq
     70 entry:
     71   %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
     72   %1 = bitcast double* %addr to <2 x double>*
     73   store <2 x double> %0, <2 x double>* %1, align 1
     74   ret void
     75 }
     76 
     77 define void @extract_subvector256_v8f32_store(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
     78 ; SKX-LABEL: extract_subvector256_v8f32_store:
     79 ; SKX:       ## %bb.0: ## %entry
     80 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
     81 ; SKX-NEXT:    vzeroupper
     82 ; SKX-NEXT:    retq
     83 entry:
     84   %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
     85   %1 = bitcast float* %addr to <4 x float>*
     86   store <4 x float> %0, <4 x float>* %1, align 1
     87   ret void
     88 }
     89 
     90 define void @extract_subvector256_v4i64_store(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
     91 ; SKX-LABEL: extract_subvector256_v4i64_store:
     92 ; SKX:       ## %bb.0: ## %entry
     93 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
     94 ; SKX-NEXT:    vzeroupper
     95 ; SKX-NEXT:    retq
     96 entry:
     97   %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
     98   %1 = bitcast i64* %addr to <2 x i64>*
     99   store <2 x i64> %0, <2 x i64>* %1, align 1
    100   ret void
    101 }
    102 
    103 define void @extract_subvector256_v8i32_store(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
    104 ; SKX-LABEL: extract_subvector256_v8i32_store:
    105 ; SKX:       ## %bb.0: ## %entry
    106 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
    107 ; SKX-NEXT:    vzeroupper
    108 ; SKX-NEXT:    retq
    109 entry:
    110   %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    111   %1 = bitcast i32* %addr to <4 x i32>*
    112   store <4 x i32> %0, <4 x i32>* %1, align 1
    113   ret void
    114 }
    115 
    116 define void @extract_subvector256_v16i16_store(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
    117 ; SKX-LABEL: extract_subvector256_v16i16_store:
    118 ; SKX:       ## %bb.0: ## %entry
    119 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
    120 ; SKX-NEXT:    vzeroupper
    121 ; SKX-NEXT:    retq
    122 entry:
    123   %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    124   %1 = bitcast i16* %addr to <8 x i16>*
    125   store <8 x i16> %0, <8 x i16>* %1, align 1
    126   ret void
    127 }
    128 
    129 define void @extract_subvector256_v32i8_store(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
    130 ; SKX-LABEL: extract_subvector256_v32i8_store:
    131 ; SKX:       ## %bb.0: ## %entry
    132 ; SKX-NEXT:    vextractf128 $1, %ymm0, (%rdi)
    133 ; SKX-NEXT:    vzeroupper
    134 ; SKX-NEXT:    retq
    135 entry:
    136   %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    137   %1 = bitcast i8* %addr to <16 x i8>*
    138   store <16 x i8> %0, <16 x i8>* %1, align 1
    139   ret void
    140 }
    141 
    142 define void @extract_subvector256_v4f64_store_lo(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
    143 ; SKX-LABEL: extract_subvector256_v4f64_store_lo:
    144 ; SKX:       ## %bb.0: ## %entry
    145 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    146 ; SKX-NEXT:    vzeroupper
    147 ; SKX-NEXT:    retq
    148 entry:
    149   %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    150   %1 = bitcast double* %addr to <2 x double>*
    151   store <2 x double> %0, <2 x double>* %1, align 1
    152   ret void
    153 }
    154 
    155 define void @extract_subvector256_v4f64_store_lo_align_16(double* nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
    156 ; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
    157 ; SKX:       ## %bb.0: ## %entry
    158 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    159 ; SKX-NEXT:    vzeroupper
    160 ; SKX-NEXT:    retq
    161 entry:
    162   %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
    163   %1 = bitcast double* %addr to <2 x double>*
    164   store <2 x double> %0, <2 x double>* %1, align 16
    165   ret void
    166 }
    167 
    168 define void @extract_subvector256_v4f32_store_lo(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
    169 ; SKX-LABEL: extract_subvector256_v4f32_store_lo:
    170 ; SKX:       ## %bb.0: ## %entry
    171 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    172 ; SKX-NEXT:    vzeroupper
    173 ; SKX-NEXT:    retq
    174 entry:
    175   %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    176   %1 = bitcast float* %addr to <4 x float>*
    177   store <4 x float> %0, <4 x float>* %1, align 1
    178   ret void
    179 }
    180 
    181 define void @extract_subvector256_v4f32_store_lo_align_16(float* nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
    182 ; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
    183 ; SKX:       ## %bb.0: ## %entry
    184 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    185 ; SKX-NEXT:    vzeroupper
    186 ; SKX-NEXT:    retq
    187 entry:
    188   %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    189   %1 = bitcast float* %addr to <4 x float>*
    190   store <4 x float> %0, <4 x float>* %1, align 16
    191   ret void
    192 }
    193 
    194 define void @extract_subvector256_v2i64_store_lo(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
    195 ; SKX-LABEL: extract_subvector256_v2i64_store_lo:
    196 ; SKX:       ## %bb.0: ## %entry
    197 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    198 ; SKX-NEXT:    vzeroupper
    199 ; SKX-NEXT:    retq
    200 entry:
    201   %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    202   %1 = bitcast i64* %addr to <2 x i64>*
    203   store <2 x i64> %0, <2 x i64>* %1, align 1
    204   ret void
    205 }
    206 
    207 define void @extract_subvector256_v2i64_store_lo_align_16(i64* nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
    208 ; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
    209 ; SKX:       ## %bb.0: ## %entry
    210 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    211 ; SKX-NEXT:    vzeroupper
    212 ; SKX-NEXT:    retq
    213 entry:
    214   %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
    215   %1 = bitcast i64* %addr to <2 x i64>*
    216   store <2 x i64> %0, <2 x i64>* %1, align 16
    217   ret void
    218 }
    219 
    220 define void @extract_subvector256_v4i32_store_lo(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
    221 ; SKX-LABEL: extract_subvector256_v4i32_store_lo:
    222 ; SKX:       ## %bb.0: ## %entry
    223 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    224 ; SKX-NEXT:    vzeroupper
    225 ; SKX-NEXT:    retq
    226 entry:
    227   %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    228   %1 = bitcast i32* %addr to <4 x i32>*
    229   store <4 x i32> %0, <4 x i32>* %1, align 1
    230   ret void
    231 }
    232 
    233 define void @extract_subvector256_v4i32_store_lo_align_16(i32* nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
    234 ; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
    235 ; SKX:       ## %bb.0: ## %entry
    236 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    237 ; SKX-NEXT:    vzeroupper
    238 ; SKX-NEXT:    retq
    239 entry:
    240   %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    241   %1 = bitcast i32* %addr to <4 x i32>*
    242   store <4 x i32> %0, <4 x i32>* %1, align 16
    243   ret void
    244 }
    245 
    246 define void @extract_subvector256_v8i16_store_lo(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
    247 ; SKX-LABEL: extract_subvector256_v8i16_store_lo:
    248 ; SKX:       ## %bb.0: ## %entry
    249 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    250 ; SKX-NEXT:    vzeroupper
    251 ; SKX-NEXT:    retq
    252 entry:
    253   %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    254   %1 = bitcast i16* %addr to <8 x i16>*
    255   store <8 x i16> %0, <8 x i16>* %1, align 1
    256   ret void
    257 }
    258 
    259 define void @extract_subvector256_v8i16_store_lo_align_16(i16* nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
    260 ; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
    261 ; SKX:       ## %bb.0: ## %entry
    262 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    263 ; SKX-NEXT:    vzeroupper
    264 ; SKX-NEXT:    retq
    265 entry:
    266   %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    267   %1 = bitcast i16* %addr to <8 x i16>*
    268   store <8 x i16> %0, <8 x i16>* %1, align 16
    269   ret void
    270 }
    271 
    272 define void @extract_subvector256_v16i8_store_lo(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
    273 ; SKX-LABEL: extract_subvector256_v16i8_store_lo:
    274 ; SKX:       ## %bb.0: ## %entry
    275 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    276 ; SKX-NEXT:    vzeroupper
    277 ; SKX-NEXT:    retq
    278 entry:
    279   %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    280   %1 = bitcast i8* %addr to <16 x i8>*
    281   store <16 x i8> %0, <16 x i8>* %1, align 1
    282   ret void
    283 }
    284 
    285 define void @extract_subvector256_v16i8_store_lo_align_16(i8* nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
    286 ; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
    287 ; SKX:       ## %bb.0: ## %entry
    288 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    289 ; SKX-NEXT:    vzeroupper
    290 ; SKX-NEXT:    retq
    291 entry:
    292   %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    293   %1 = bitcast i8* %addr to <16 x i8>*
    294   store <16 x i8> %0, <16 x i8>* %1, align 16
    295   ret void
    296 }
    297 
    298 define void @extract_subvector512_v2f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
    299 ; SKX-LABEL: extract_subvector512_v2f64_store_lo:
    300 ; SKX:       ## %bb.0: ## %entry
    301 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    302 ; SKX-NEXT:    vzeroupper
    303 ; SKX-NEXT:    retq
    304 entry:
    305   %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    306   %1 = bitcast double* %addr to <2 x double>*
    307   store <2 x double> %0, <2 x double>* %1, align 1
    308   ret void
    309 }
    310 
    311 define void @extract_subvector512_v2f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
    312 ; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
    313 ; SKX:       ## %bb.0: ## %entry
    314 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    315 ; SKX-NEXT:    vzeroupper
    316 ; SKX-NEXT:    retq
    317 entry:
    318   %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
    319   %1 = bitcast double* %addr to <2 x double>*
    320   store <2 x double> %0, <2 x double>* %1, align 16
    321   ret void
    322 }
    323 
    324 define void @extract_subvector512_v4f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
    325 ; SKX-LABEL: extract_subvector512_v4f32_store_lo:
    326 ; SKX:       ## %bb.0: ## %entry
    327 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    328 ; SKX-NEXT:    vzeroupper
    329 ; SKX-NEXT:    retq
    330 entry:
    331   %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    332   %1 = bitcast float* %addr to <4 x float>*
    333   store <4 x float> %0, <4 x float>* %1, align 1
    334   ret void
    335 }
    336 
    337 define void @extract_subvector512_v4f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
    338 ; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
    339 ; SKX:       ## %bb.0: ## %entry
    340 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    341 ; SKX-NEXT:    vzeroupper
    342 ; SKX-NEXT:    retq
    343 entry:
    344   %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    345   %1 = bitcast float* %addr to <4 x float>*
    346   store <4 x float> %0, <4 x float>* %1, align 16
    347   ret void
    348 }
    349 
    350 define void @extract_subvector512_v2i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
    351 ; SKX-LABEL: extract_subvector512_v2i64_store_lo:
    352 ; SKX:       ## %bb.0: ## %entry
    353 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    354 ; SKX-NEXT:    vzeroupper
    355 ; SKX-NEXT:    retq
    356 entry:
    357   %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
    358   %1 = bitcast i64* %addr to <2 x i64>*
    359   store <2 x i64> %0, <2 x i64>* %1, align 1
    360   ret void
    361 }
    362 
    363 define void @extract_subvector512_v2i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
    364 ; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
    365 ; SKX:       ## %bb.0: ## %entry
    366 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    367 ; SKX-NEXT:    vzeroupper
    368 ; SKX-NEXT:    retq
    369 entry:
    370   %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
    371   %1 = bitcast i64* %addr to <2 x i64>*
    372   store <2 x i64> %0, <2 x i64>* %1, align 16
    373   ret void
    374 }
    375 
    376 define void @extract_subvector512_v4i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
    377 ; SKX-LABEL: extract_subvector512_v4i32_store_lo:
    378 ; SKX:       ## %bb.0: ## %entry
    379 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    380 ; SKX-NEXT:    vzeroupper
    381 ; SKX-NEXT:    retq
    382 entry:
    383   %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    384   %1 = bitcast i32* %addr to <4 x i32>*
    385   store <4 x i32> %0, <4 x i32>* %1, align 1
    386   ret void
    387 }
    388 
    389 define void @extract_subvector512_v4i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
    390 ; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
    391 ; SKX:       ## %bb.0: ## %entry
    392 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    393 ; SKX-NEXT:    vzeroupper
    394 ; SKX-NEXT:    retq
    395 entry:
    396   %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    397   %1 = bitcast i32* %addr to <4 x i32>*
    398   store <4 x i32> %0, <4 x i32>* %1, align 16
    399   ret void
    400 }
    401 
    402 define void @extract_subvector512_v8i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
    403 ; SKX-LABEL: extract_subvector512_v8i16_store_lo:
    404 ; SKX:       ## %bb.0: ## %entry
    405 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    406 ; SKX-NEXT:    vzeroupper
    407 ; SKX-NEXT:    retq
    408 entry:
    409   %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    410   %1 = bitcast i16* %addr to <8 x i16>*
    411   store <8 x i16> %0, <8 x i16>* %1, align 1
    412   ret void
    413 }
    414 
    415 define void @extract_subvector512_v16i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
    416 ; SKX-LABEL: extract_subvector512_v16i8_store_lo:
    417 ; SKX:       ## %bb.0: ## %entry
    418 ; SKX-NEXT:    vmovups %xmm0, (%rdi)
    419 ; SKX-NEXT:    vzeroupper
    420 ; SKX-NEXT:    retq
    421 entry:
    422   %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    423   %1 = bitcast i8* %addr to <16 x i8>*
    424   store <16 x i8> %0, <16 x i8>* %1, align 1
    425   ret void
    426 }
    427 
    428 define void @extract_subvector512_v16i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
    429 ; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
    430 ; SKX:       ## %bb.0: ## %entry
    431 ; SKX-NEXT:    vmovaps %xmm0, (%rdi)
    432 ; SKX-NEXT:    vzeroupper
    433 ; SKX-NEXT:    retq
    434 entry:
    435   %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    436   %1 = bitcast i8* %addr to <16 x i8>*
    437   store <16 x i8> %0, <16 x i8>* %1, align 16
    438   ret void
    439 }
    440 
    441 define void @extract_subvector512_v4f64_store_lo(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
    442 ; SKX-LABEL: extract_subvector512_v4f64_store_lo:
    443 ; SKX:       ## %bb.0: ## %entry
    444 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    445 ; SKX-NEXT:    vzeroupper
    446 ; SKX-NEXT:    retq
    447 entry:
    448   %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    449   %1 = bitcast double* %addr to <4 x double>*
    450   store <4 x double> %0, <4 x double>* %1, align 1
    451   ret void
    452 }
    453 
    454 define void @extract_subvector512_v4f64_store_lo_align_16(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
    455 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
    456 ; SKX:       ## %bb.0: ## %entry
    457 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    458 ; SKX-NEXT:    vzeroupper
    459 ; SKX-NEXT:    retq
    460 entry:
    461   %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    462   %1 = bitcast double* %addr to <4 x double>*
    463   store <4 x double> %0, <4 x double>* %1, align 16
    464   ret void
    465 }
    466 
    467 define void @extract_subvector512_v4f64_store_lo_align_32(double* nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
    468 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
    469 ; SKX:       ## %bb.0: ## %entry
    470 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    471 ; SKX-NEXT:    vzeroupper
    472 ; SKX-NEXT:    retq
    473 entry:
    474   %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    475   %1 = bitcast double* %addr to <4 x double>*
    476   store <4 x double> %0, <4 x double>* %1, align 32
    477   ret void
    478 }
    479 
    480 define void @extract_subvector512_v8f32_store_lo(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
    481 ; SKX-LABEL: extract_subvector512_v8f32_store_lo:
    482 ; SKX:       ## %bb.0: ## %entry
    483 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    484 ; SKX-NEXT:    vzeroupper
    485 ; SKX-NEXT:    retq
    486 entry:
    487   %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    488   %1 = bitcast float* %addr to <8 x float>*
    489   store <8 x float> %0, <8 x float>* %1, align 1
    490   ret void
    491 }
    492 
    493 define void @extract_subvector512_v8f32_store_lo_align_16(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
    494 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
    495 ; SKX:       ## %bb.0: ## %entry
    496 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    497 ; SKX-NEXT:    vzeroupper
    498 ; SKX-NEXT:    retq
    499 entry:
    500   %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    501   %1 = bitcast float* %addr to <8 x float>*
    502   store <8 x float> %0, <8 x float>* %1, align 16
    503   ret void
    504 }
    505 
    506 define void @extract_subvector512_v8f32_store_lo_align_32(float* nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
    507 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
    508 ; SKX:       ## %bb.0: ## %entry
    509 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    510 ; SKX-NEXT:    vzeroupper
    511 ; SKX-NEXT:    retq
    512 entry:
    513   %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    514   %1 = bitcast float* %addr to <8 x float>*
    515   store <8 x float> %0, <8 x float>* %1, align 32
    516   ret void
    517 }
    518 
    519 define void @extract_subvector512_v4i64_store_lo(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
    520 ; SKX-LABEL: extract_subvector512_v4i64_store_lo:
    521 ; SKX:       ## %bb.0: ## %entry
    522 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    523 ; SKX-NEXT:    vzeroupper
    524 ; SKX-NEXT:    retq
    525 entry:
    526   %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    527   %1 = bitcast i64* %addr to <4 x i64>*
    528   store <4 x i64> %0, <4 x i64>* %1, align 1
    529   ret void
    530 }
    531 
    532 define void @extract_subvector512_v4i64_store_lo_align_16(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
    533 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
    534 ; SKX:       ## %bb.0: ## %entry
    535 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    536 ; SKX-NEXT:    vzeroupper
    537 ; SKX-NEXT:    retq
    538 entry:
    539   %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    540   %1 = bitcast i64* %addr to <4 x i64>*
    541   store <4 x i64> %0, <4 x i64>* %1, align 16
    542   ret void
    543 }
    544 
    545 define void @extract_subvector512_v4i64_store_lo_align_32(i64* nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
    546 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
    547 ; SKX:       ## %bb.0: ## %entry
    548 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    549 ; SKX-NEXT:    vzeroupper
    550 ; SKX-NEXT:    retq
    551 entry:
    552   %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    553   %1 = bitcast i64* %addr to <4 x i64>*
    554   store <4 x i64> %0, <4 x i64>* %1, align 32
    555   ret void
    556 }
    557 
    558 define void @extract_subvector512_v8i32_store_lo(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
    559 ; SKX-LABEL: extract_subvector512_v8i32_store_lo:
    560 ; SKX:       ## %bb.0: ## %entry
    561 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    562 ; SKX-NEXT:    vzeroupper
    563 ; SKX-NEXT:    retq
    564 entry:
    565   %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    566   %1 = bitcast i32* %addr to <8 x i32>*
    567   store <8 x i32> %0, <8 x i32>* %1, align 1
    568   ret void
    569 }
    570 
    571 define void @extract_subvector512_v8i32_store_lo_align_16(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
    572 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
    573 ; SKX:       ## %bb.0: ## %entry
    574 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    575 ; SKX-NEXT:    vzeroupper
    576 ; SKX-NEXT:    retq
    577 entry:
    578   %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    579   %1 = bitcast i32* %addr to <8 x i32>*
    580   store <8 x i32> %0, <8 x i32>* %1, align 16
    581   ret void
    582 }
    583 
    584 define void @extract_subvector512_v8i32_store_lo_align_32(i32* nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
    585 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
    586 ; SKX:       ## %bb.0: ## %entry
    587 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    588 ; SKX-NEXT:    vzeroupper
    589 ; SKX-NEXT:    retq
    590 entry:
    591   %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
    592   %1 = bitcast i32* %addr to <8 x i32>*
    593   store <8 x i32> %0, <8 x i32>* %1, align 32
    594   ret void
    595 }
    596 
    597 define void @extract_subvector512_v16i16_store_lo(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
    598 ; SKX-LABEL: extract_subvector512_v16i16_store_lo:
    599 ; SKX:       ## %bb.0: ## %entry
    600 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    601 ; SKX-NEXT:    vzeroupper
    602 ; SKX-NEXT:    retq
    603 entry:
    604   %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    605   %1 = bitcast i16* %addr to <16 x i16>*
    606   store <16 x i16> %0, <16 x i16>* %1, align 1
    607   ret void
    608 }
    609 
    610 define void @extract_subvector512_v16i16_store_lo_align_16(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
    611 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
    612 ; SKX:       ## %bb.0: ## %entry
    613 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    614 ; SKX-NEXT:    vzeroupper
    615 ; SKX-NEXT:    retq
    616 entry:
    617   %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    618   %1 = bitcast i16* %addr to <16 x i16>*
    619   store <16 x i16> %0, <16 x i16>* %1, align 16
    620   ret void
    621 }
    622 
    623 define void @extract_subvector512_v16i16_store_lo_align_32(i16* nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
    624 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
    625 ; SKX:       ## %bb.0: ## %entry
    626 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    627 ; SKX-NEXT:    vzeroupper
    628 ; SKX-NEXT:    retq
    629 entry:
    630   %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    631   %1 = bitcast i16* %addr to <16 x i16>*
    632   store <16 x i16> %0, <16 x i16>* %1, align 32
    633   ret void
    634 }
    635 
    636 define void @extract_subvector512_v32i8_store_lo(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
    637 ; SKX-LABEL: extract_subvector512_v32i8_store_lo:
    638 ; SKX:       ## %bb.0: ## %entry
    639 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    640 ; SKX-NEXT:    vzeroupper
    641 ; SKX-NEXT:    retq
    642 entry:
    643   %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    644   %1 = bitcast i8* %addr to <32 x i8>*
    645   store <32 x i8> %0, <32 x i8>* %1, align 1
    646   ret void
    647 }
    648 
    649 define void @extract_subvector512_v32i8_store_lo_align_16(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
    650 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
    651 ; SKX:       ## %bb.0: ## %entry
    652 ; SKX-NEXT:    vmovups %ymm0, (%rdi)
    653 ; SKX-NEXT:    vzeroupper
    654 ; SKX-NEXT:    retq
    655 entry:
    656   %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    657   %1 = bitcast i8* %addr to <32 x i8>*
    658   store <32 x i8> %0, <32 x i8>* %1, align 16
    659   ret void
    660 }
    661 
    662 define void @extract_subvector512_v32i8_store_lo_align_32(i8* nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
    663 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
    664 ; SKX:       ## %bb.0: ## %entry
    665 ; SKX-NEXT:    vmovaps %ymm0, (%rdi)
    666 ; SKX-NEXT:    vzeroupper
    667 ; SKX-NEXT:    retq
    668 entry:
    669   %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
    670   %1 = bitcast i8* %addr to <32 x i8>*
    671   store <32 x i8> %0, <32 x i8>* %1, align 32
    672   ret void
    673 }
    674 
    675 define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
    676 ; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
    677 ; SKX:       ## %bb.0: ## %entry
    678 ; SKX-NEXT:    kmovd %edi, %k1
    679 ; SKX-NEXT:    vextractf64x4 $1, %zmm1, %ymm0 {%k1}
    680 ; SKX-NEXT:    retq
    681 entry:
    682   %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    683   %0 = bitcast i8 %__U to <8 x i1>
    684   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    685   %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W
    686   ret <4 x double> %1
    687 }
    688 
    689 define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
    690 ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
    691 ; SKX:       ## %bb.0: ## %entry
    692 ; SKX-NEXT:    kmovd %edi, %k1
    693 ; SKX-NEXT:    vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
    694 ; SKX-NEXT:    retq
    695 entry:
    696   %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    697   %0 = bitcast i8 %__U to <8 x i1>
    698   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    699   %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer
    700   ret <4 x double> %1
    701 }
    702 
    703 define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
    704 ; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
    705 ; SKX:       ## %bb.0: ## %entry
    706 ; SKX-NEXT:    kmovd %edi, %k1
    707 ; SKX-NEXT:    vextractf32x4 $1, %zmm1, %xmm0 {%k1}
    708 ; SKX-NEXT:    vzeroupper
    709 ; SKX-NEXT:    retq
    710 entry:
    711   %0 = bitcast <8 x double> %__A to <16 x float>
    712   %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    713   %1 = bitcast i8 %__U to <8 x i1>
    714   %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    715   %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
    716   ret <4 x float> %2
    717 }
    718 
    719 define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
    720 ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
    721 ; SKX:       ## %bb.0: ## %entry
    722 ; SKX-NEXT:    kmovd %edi, %k1
    723 ; SKX-NEXT:    vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
    724 ; SKX-NEXT:    vzeroupper
    725 ; SKX-NEXT:    retq
    726 entry:
    727   %0 = bitcast <8 x double> %__A to <16 x float>
    728   %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    729   %1 = bitcast i8 %__U to <8 x i1>
    730   %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    731   %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    732   ret <4 x float> %2
    733 }
    734 
    735 define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
    736 ; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
    737 ; SKX:       ## %bb.0: ## %entry
    738 ; SKX-NEXT:    kmovd %edi, %k1
    739 ; SKX-NEXT:    vextractf64x2 $1, %ymm1, %xmm0 {%k1}
    740 ; SKX-NEXT:    vzeroupper
    741 ; SKX-NEXT:    retq
    742 entry:
    743   %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
    744   %0 = bitcast i8 %__U to <8 x i1>
    745   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    746   %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
    747   ret <2 x double> %1
    748 }
    749 
    750 define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
    751 ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
    752 ; SKX:       ## %bb.0: ## %entry
    753 ; SKX-NEXT:    kmovd %edi, %k1
    754 ; SKX-NEXT:    vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
    755 ; SKX-NEXT:    vzeroupper
    756 ; SKX-NEXT:    retq
    757 entry:
    758   %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
    759   %0 = bitcast i8 %__U to <8 x i1>
    760   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    761   %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    762   ret <2 x double> %1
    763 }
    764 
    765 define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
    766 ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
    767 ; SKX:       ## %bb.0: ## %entry
    768 ; SKX-NEXT:    kmovd %edi, %k1
    769 ; SKX-NEXT:    vextracti64x2 $1, %ymm1, %xmm0 {%k1}
    770 ; SKX-NEXT:    vzeroupper
    771 ; SKX-NEXT:    retq
    772 entry:
    773   %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
    774   %0 = bitcast i8 %__U to <8 x i1>
    775   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    776   %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W
    777   ret <2 x i64> %1
    778 }
    779 
    780 define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
    781 ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
    782 ; SKX:       ## %bb.0: ## %entry
    783 ; SKX-NEXT:    kmovd %edi, %k1
    784 ; SKX-NEXT:    vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
    785 ; SKX-NEXT:    vzeroupper
    786 ; SKX-NEXT:    retq
    787 entry:
    788   %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
    789   %0 = bitcast i8 %__U to <8 x i1>
    790   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    791   %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
    792   ret <2 x i64> %1
    793 }
    794 
    795 define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
    796 ; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
    797 ; SKX:       ## %bb.0: ## %entry
    798 ; SKX-NEXT:    kmovd %edi, %k1
    799 ; SKX-NEXT:    vextractf32x4 $1, %ymm1, %xmm0 {%k1}
    800 ; SKX-NEXT:    vzeroupper
    801 ; SKX-NEXT:    retq
    802 entry:
    803   %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    804   %0 = bitcast i8 %__U to <8 x i1>
    805   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    806   %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
    807   ret <4 x float> %1
    808 }
    809 
    810 define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
    811 ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
    812 ; SKX:       ## %bb.0: ## %entry
    813 ; SKX-NEXT:    kmovd %edi, %k1
    814 ; SKX-NEXT:    vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
    815 ; SKX-NEXT:    vzeroupper
    816 ; SKX-NEXT:    retq
    817 entry:
    818   %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    819   %0 = bitcast i8 %__U to <8 x i1>
    820   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    821   %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
    822   ret <4 x float> %1
    823 }
    824 
    825 define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
    826 ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
    827 ; SKX:       ## %bb.0: ## %entry
    828 ; SKX-NEXT:    kmovd %edi, %k1
    829 ; SKX-NEXT:    vextracti32x4 $1, %ymm1, %xmm0 {%k1}
    830 ; SKX-NEXT:    vzeroupper
    831 ; SKX-NEXT:    retq
    832 entry:
    833   %0 = bitcast <4 x i64> %__A to <8 x i32>
    834   %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    835   %1 = bitcast <2 x i64> %__W to <4 x i32>
    836   %2 = bitcast i8 %__U to <8 x i1>
    837   %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    838   %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1
    839   %4 = bitcast <4 x i32> %3 to <2 x i64>
    840   ret <2 x i64> %4
    841 }
    842 
    843 define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
    844 ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
    845 ; SKX:       ## %bb.0: ## %entry
    846 ; SKX-NEXT:    kmovd %edi, %k1
    847 ; SKX-NEXT:    vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
    848 ; SKX-NEXT:    vzeroupper
    849 ; SKX-NEXT:    retq
    850 entry:
    851   %0 = bitcast <4 x i64> %__A to <8 x i32>
    852   %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
    853   %1 = bitcast i8 %__U to <8 x i1>
    854   %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
    855   %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
    856   %3 = bitcast <4 x i32> %2 to <2 x i64>
    857   ret <2 x i64> %3
    858 }
    859 
    860 define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
    861 ; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
    862 ; SKX:       ## %bb.0: ## %entry
    863 ; SKX-NEXT:    kmovd %edi, %k1
    864 ; SKX-NEXT:    vextractf32x8 $1, %zmm1, %ymm0 {%k1}
    865 ; SKX-NEXT:    retq
    866 entry:
    867   %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    868   %0 = bitcast i8 %__U to <8 x i1>
    869   %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W
    870   ret <8 x float> %1
    871 }
    872 
    873 define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
    874 ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
    875 ; SKX:       ## %bb.0: ## %entry
    876 ; SKX-NEXT:    kmovd %edi, %k1
    877 ; SKX-NEXT:    vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
    878 ; SKX-NEXT:    retq
    879 entry:
    880   %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
    881   %0 = bitcast i8 %__U to <8 x i1>
    882   %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer
    883   ret <8 x float> %1
    884 }
    885 
    886 define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
    887 ; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
    888 ; SKX:       ## %bb.0: ## %entry
    889 ; SKX-NEXT:    kmovd %edi, %k1
    890 ; SKX-NEXT:    vextractf64x2 $3, %zmm1, %xmm0 {%k1}
    891 ; SKX-NEXT:    vzeroupper
    892 ; SKX-NEXT:    retq
    893 entry:
    894   %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
    895   %0 = bitcast i8 %__U to <8 x i1>
    896   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    897   %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
    898   ret <2 x double> %1
    899 }
    900 
    901 define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
    902 ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
    903 ; SKX:       ## %bb.0: ## %entry
    904 ; SKX-NEXT:    kmovd %edi, %k1
    905 ; SKX-NEXT:    vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
    906 ; SKX-NEXT:    vzeroupper
    907 ; SKX-NEXT:    retq
    908 entry:
    909   %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
    910   %0 = bitcast i8 %__U to <8 x i1>
    911   %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
    912   %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
    913   ret <2 x double> %1
    914 }
    915