Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 ; RUN: llc < %s -mcpu=knl -mattr=+avx512vl | FileCheck %s --check-prefix=ALL --check-prefix=AVX512VL
      5 
      6 target triple = "x86_64-unknown-unknown"
      7 
      8 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
      9 ; AVX1-LABEL: shuffle_v4f64_0000:
     10 ; AVX1:       # BB#0:
     11 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     12 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     13 ; AVX1-NEXT:    retq
     14 ;
     15 ; AVX2-LABEL: shuffle_v4f64_0000:
     16 ; AVX2:       # BB#0:
     17 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
     18 ; AVX2-NEXT:    retq
     19 ;
     20 ; AVX512VL-LABEL: shuffle_v4f64_0000:
     21 ; AVX512VL:       # BB#0:
     22 ; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
     23 ; AVX512VL-NEXT:    retq
     24   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     25   ret <4 x double> %shuffle
     26 }
     27 
     28 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
     29 ; AVX1-LABEL: shuffle_v4f64_0001:
     30 ; AVX1:       # BB#0:
     31 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
     32 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     33 ; AVX1-NEXT:    retq
     34 ;
     35 ; AVX2-LABEL: shuffle_v4f64_0001:
     36 ; AVX2:       # BB#0:
     37 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
     38 ; AVX2-NEXT:    retq
     39 ;
     40 ; AVX512VL-LABEL: shuffle_v4f64_0001:
     41 ; AVX512VL:       # BB#0:
     42 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
     43 ; AVX512VL-NEXT:    retq
     44   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
     45   ret <4 x double> %shuffle
     46 }
     47 
     48 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
     49 ; AVX1-LABEL: shuffle_v4f64_0020:
     50 ; AVX1:       # BB#0:
     51 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
     52 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     53 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     54 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     55 ; AVX1-NEXT:    retq
     56 ;
     57 ; AVX2-LABEL: shuffle_v4f64_0020:
     58 ; AVX2:       # BB#0:
     59 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
     60 ; AVX2-NEXT:    retq
     61 ;
     62 ; AVX512VL-LABEL: shuffle_v4f64_0020:
     63 ; AVX512VL:       # BB#0:
     64 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
     65 ; AVX512VL-NEXT:    retq
     66   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
     67   ret <4 x double> %shuffle
     68 }
     69 
     70 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
     71 ; AVX1-LABEL: shuffle_v4f64_0300:
     72 ; AVX1:       # BB#0:
     73 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     74 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
     75 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
     76 ; AVX1-NEXT:    retq
     77 ;
     78 ; AVX2-LABEL: shuffle_v4f64_0300:
     79 ; AVX2:       # BB#0:
     80 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
     81 ; AVX2-NEXT:    retq
     82 ;
     83 ; AVX512VL-LABEL: shuffle_v4f64_0300:
     84 ; AVX512VL:       # BB#0:
     85 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
     86 ; AVX512VL-NEXT:    retq
     87   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
     88   ret <4 x double> %shuffle
     89 }
     90 
     91 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
     92 ; AVX1-LABEL: shuffle_v4f64_1000:
     93 ; AVX1:       # BB#0:
     94 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
     95 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     96 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     97 ; AVX1-NEXT:    retq
     98 ;
     99 ; AVX2-LABEL: shuffle_v4f64_1000:
    100 ; AVX2:       # BB#0:
    101 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
    102 ; AVX2-NEXT:    retq
    103 ;
    104 ; AVX512VL-LABEL: shuffle_v4f64_1000:
    105 ; AVX512VL:       # BB#0:
    106 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
    107 ; AVX512VL-NEXT:    retq
    108   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
    109   ret <4 x double> %shuffle
    110 }
    111 
    112 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
    113 ; AVX1-LABEL: shuffle_v4f64_2200:
    114 ; AVX1:       # BB#0:
    115 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    116 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    117 ; AVX1-NEXT:    retq
    118 ;
    119 ; AVX2-LABEL: shuffle_v4f64_2200:
    120 ; AVX2:       # BB#0:
    121 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
    122 ; AVX2-NEXT:    retq
    123 ;
    124 ; AVX512VL-LABEL: shuffle_v4f64_2200:
    125 ; AVX512VL:       # BB#0:
    126 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
    127 ; AVX512VL-NEXT:    retq
    128   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
    129   ret <4 x double> %shuffle
    130 }
    131 
    132 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
    133 ; AVX1-LABEL: shuffle_v4f64_3330:
    134 ; AVX1:       # BB#0:
    135 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    136 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
    137 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
    138 ; AVX1-NEXT:    retq
    139 ;
    140 ; AVX2-LABEL: shuffle_v4f64_3330:
    141 ; AVX2:       # BB#0:
    142 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
    143 ; AVX2-NEXT:    retq
    144 ;
    145 ; AVX512VL-LABEL: shuffle_v4f64_3330:
    146 ; AVX512VL:       # BB#0:
    147 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
    148 ; AVX512VL-NEXT:    retq
    149   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    150   ret <4 x double> %shuffle
    151 }
    152 
    153 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
    154 ; AVX1-LABEL: shuffle_v4f64_3210:
    155 ; AVX1:       # BB#0:
    156 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    157 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    158 ; AVX1-NEXT:    retq
    159 ;
    160 ; AVX2-LABEL: shuffle_v4f64_3210:
    161 ; AVX2:       # BB#0:
    162 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
    163 ; AVX2-NEXT:    retq
    164 ;
    165 ; AVX512VL-LABEL: shuffle_v4f64_3210:
    166 ; AVX512VL:       # BB#0:
    167 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
    168 ; AVX512VL-NEXT:    retq
    169   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    170   ret <4 x double> %shuffle
    171 }
    172 
    173 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
    174 ; ALL-LABEL: shuffle_v4f64_0023:
    175 ; ALL:       # BB#0:
    176 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
    177 ; ALL-NEXT:    retq
    178 
    179   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
    180   ret <4 x double> %shuffle
    181 }
    182 
    183 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
    184 ; ALL-LABEL: shuffle_v4f64_0022:
    185 ; ALL:       # BB#0:
    186 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    187 ; ALL-NEXT:    retq
    188   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    189   ret <4 x double> %shuffle
    190 }
    191 
    192 define <4 x double> @shuffle_v4f64mem_0022(<4 x double>* %ptr, <4 x double> %b) {
    193 ; ALL-LABEL: shuffle_v4f64mem_0022:
    194 ; ALL:       # BB#0:
    195 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
    196 ; ALL-NEXT:    retq
    197   %a = load  <4 x double>,  <4 x double>* %ptr
    198   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    199   ret <4 x double> %shuffle
    200 }
    201 
    202 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
    203 ; ALL-LABEL: shuffle_v4f64_1032:
    204 ; ALL:       # BB#0:
    205 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    206 ; ALL-NEXT:    retq
    207   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    208   ret <4 x double> %shuffle
    209 }
    210 
    211 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
    212 ; ALL-LABEL: shuffle_v4f64_1133:
    213 ; ALL:       # BB#0:
    214 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
    215 ; ALL-NEXT:    retq
    216   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    217   ret <4 x double> %shuffle
    218 }
    219 
    220 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
    221 ; ALL-LABEL: shuffle_v4f64_1023:
    222 ; ALL:       # BB#0:
    223 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
    224 ; ALL-NEXT:    retq
    225   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
    226   ret <4 x double> %shuffle
    227 }
    228 
    229 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
    230 ; ALL-LABEL: shuffle_v4f64_1022:
    231 ; ALL:       # BB#0:
    232 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
    233 ; ALL-NEXT:    retq
    234   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
    235   ret <4 x double> %shuffle
    236 }
    237 
    238 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
    239 ; AVX1-LABEL: shuffle_v4f64_0423:
    240 ; AVX1:       # BB#0:
    241 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    242 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    243 ; AVX1-NEXT:    retq
    244 ;
    245 ; AVX2-LABEL: shuffle_v4f64_0423:
    246 ; AVX2:       # BB#0:
    247 ; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
    248 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    249 ; AVX2-NEXT:    retq
    250 ;
    251 ; AVX512VL-LABEL: shuffle_v4f64_0423:
    252 ; AVX512VL:       # BB#0:
    253 ; AVX512VL-NEXT:    vbroadcastsd %xmm1, %ymm1
    254 ; AVX512VL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    255 ; AVX512VL-NEXT:    retq
    256   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
    257   ret <4 x double> %shuffle
    258 }
    259 
    260 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
    261 ; ALL-LABEL: shuffle_v4f64_0462:
    262 ; ALL:       # BB#0:
    263 ; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    264 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    265 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
    266 ; ALL-NEXT:    retq
    267   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
    268   ret <4 x double> %shuffle
    269 }
    270 
    271 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
    272 ; ALL-LABEL: shuffle_v4f64_0426:
    273 ; ALL:       # BB#0:
    274 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
    275 ; ALL-NEXT:    retq
    276   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    277   ret <4 x double> %shuffle
    278 }
    279 
    280 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
    281 ; ALL-LABEL: shuffle_v4f64_1537:
    282 ; ALL:       # BB#0:
    283 ; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
    284 ; ALL-NEXT:    retq
    285   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    286   ret <4 x double> %shuffle
    287 }
    288 
    289 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
    290 ; ALL-LABEL: shuffle_v4f64_4062:
    291 ; ALL:       # BB#0:
    292 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    293 ; ALL-NEXT:    retq
    294   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
    295   ret <4 x double> %shuffle
    296 }
    297 
    298 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
    299 ; ALL-LABEL: shuffle_v4f64_5173:
    300 ; ALL:       # BB#0:
    301 ; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
    302 ; ALL-NEXT:    retq
    303   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
    304   ret <4 x double> %shuffle
    305 }
    306 
    307 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
    308 ; ALL-LABEL: shuffle_v4f64_5163:
    309 ; ALL:       # BB#0:
    310 ; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
    311 ; ALL-NEXT:    retq
    312   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
    313   ret <4 x double> %shuffle
    314 }
    315 
    316 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
    317 ; ALL-LABEL: shuffle_v4f64_0527:
    318 ; ALL:       # BB#0:
    319 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
    320 ; ALL-NEXT:    retq
    321   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    322   ret <4 x double> %shuffle
    323 }
    324 
    325 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
    326 ; ALL-LABEL: shuffle_v4f64_4163:
    327 ; ALL:       # BB#0:
    328 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
    329 ; ALL-NEXT:    retq
    330   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    331   ret <4 x double> %shuffle
    332 }
    333 
    334 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
    335 ; AVX1-LABEL: shuffle_v4f64_0145:
    336 ; AVX1:       # BB#0:
    337 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    338 ; AVX1-NEXT:    retq
    339 ;
    340 ; AVX2-LABEL: shuffle_v4f64_0145:
    341 ; AVX2:       # BB#0:
    342 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    343 ; AVX2-NEXT:    retq
    344 ;
    345 ; AVX512VL-LABEL: shuffle_v4f64_0145:
    346 ; AVX512VL:       # BB#0:
    347 ; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
    348 ; AVX512VL-NEXT:    retq
    349   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    350   ret <4 x double> %shuffle
    351 }
    352 
    353 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
    354 ; AVX1-LABEL: shuffle_v4f64_4501:
    355 ; AVX1:       # BB#0:
    356 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    357 ; AVX1-NEXT:    retq
    358 ;
    359 ; AVX2-LABEL: shuffle_v4f64_4501:
    360 ; AVX2:       # BB#0:
    361 ; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    362 ; AVX2-NEXT:    retq
    363 ;
    364 ; AVX512VL-LABEL: shuffle_v4f64_4501:
    365 ; AVX512VL:       # BB#0:
    366 ; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm1, %ymm0
    367 ; AVX512VL-NEXT:    retq
    368   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    369   ret <4 x double> %shuffle
    370 }
    371 
    372 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
    373 ; ALL-LABEL: shuffle_v4f64_0167:
    374 ; ALL:       # BB#0:
    375 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    376 ; ALL-NEXT:    retq
    377   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    378   ret <4 x double> %shuffle
    379 }
    380 
    381 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
    382 ; AVX1-LABEL: shuffle_v4f64_1054:
    383 ; AVX1:       # BB#0:
    384 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    385 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    386 ; AVX1-NEXT:    retq
    387 ;
    388 ; AVX2-LABEL: shuffle_v4f64_1054:
    389 ; AVX2:       # BB#0:
    390 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    391 ; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    392 ; AVX2-NEXT:    retq
    393 ;
    394 ; AVX512VL-LABEL: shuffle_v4f64_1054:
    395 ; AVX512VL:       # BB#0:
    396 ; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm1, %ymm0, %ymm0
    397 ; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    398 ; AVX512VL-NEXT:    retq
    399   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
    400   ret <4 x double> %shuffle
    401 }
    402 
    403 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
    404 ; AVX1-LABEL: shuffle_v4f64_3254:
    405 ; AVX1:       # BB#0:
    406 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    407 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    408 ; AVX1-NEXT:    retq
    409 ;
    410 ; AVX2-LABEL: shuffle_v4f64_3254:
    411 ; AVX2:       # BB#0:
    412 ; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    413 ; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    414 ; AVX2-NEXT:    retq
    415 ;
    416 ; AVX512VL-LABEL: shuffle_v4f64_3254:
    417 ; AVX512VL:       # BB#0:
    418 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    419 ; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    420 ; AVX512VL-NEXT:    retq
    421   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
    422   ret <4 x double> %shuffle
    423 }
    424 
    425 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
    426 ; AVX1-LABEL: shuffle_v4f64_3276:
    427 ; AVX1:       # BB#0:
    428 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    429 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    430 ; AVX1-NEXT:    retq
    431 ;
    432 ; AVX2-LABEL: shuffle_v4f64_3276:
    433 ; AVX2:       # BB#0:
    434 ; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    435 ; AVX2-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    436 ; AVX2-NEXT:    retq
    437 ;
    438 ; AVX512VL-LABEL: shuffle_v4f64_3276:
    439 ; AVX512VL:       # BB#0:
    440 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    441 ; AVX512VL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    442 ; AVX512VL-NEXT:    retq
    443   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
    444   ret <4 x double> %shuffle
    445 }
    446 
    447 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
    448 ; ALL-LABEL: shuffle_v4f64_1076:
    449 ; ALL:       # BB#0:
    450 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    451 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    452 ; ALL-NEXT:    retq
    453   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
    454   ret <4 x double> %shuffle
    455 }
    456 
    457 define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
    458 ; AVX1-LABEL: shuffle_v4f64_0415:
    459 ; AVX1:       # BB#0:
    460 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
    461 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    462 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    463 ; AVX1-NEXT:    retq
    464 ;
    465 ; AVX2-LABEL: shuffle_v4f64_0415:
    466 ; AVX2:       # BB#0:
    467 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
    468 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    469 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
    470 ; AVX2-NEXT:    retq
    471 ;
    472 ; AVX512VL-LABEL: shuffle_v4f64_0415:
    473 ; AVX512VL:       # BB#0:
    474 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
    475 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    476 ; AVX512VL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
    477 ; AVX512VL-NEXT:    retq
    478   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    479   ret <4 x double> %shuffle
    480 }
    481 
    482 define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
    483 ; ALL-LABEL: shuffle_v4f64_u062:
    484 ; ALL:       # BB#0:
    485 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    486 ; ALL-NEXT:    retq
    487   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
    488   ret <4 x double> %shuffle
    489 }
    490 
    491 define <4 x double> @shuffle_v4f64_11uu(<4 x double> %a, <4 x double> %b) {
    492 ; ALL-LABEL: shuffle_v4f64_11uu:
    493 ; ALL:       # BB#0:
    494 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
    495 ; ALL-NEXT:    retq
    496   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
    497   ret <4 x double> %shuffle
    498 }
    499 
    500 define <4 x double> @shuffle_v4f64_22uu(<4 x double> %a, <4 x double> %b) {
    501 ; AVX1-LABEL: shuffle_v4f64_22uu:
    502 ; AVX1:       # BB#0:
    503 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    504 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    505 ; AVX1-NEXT:    retq
    506 ;
    507 ; AVX2-LABEL: shuffle_v4f64_22uu:
    508 ; AVX2:       # BB#0:
    509 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
    510 ; AVX2-NEXT:    retq
    511 ;
    512 ; AVX512VL-LABEL: shuffle_v4f64_22uu:
    513 ; AVX512VL:       # BB#0:
    514 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,2,3]
    515 ; AVX512VL-NEXT:    retq
    516   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
    517   ret <4 x double> %shuffle
    518 }
    519 
    520 define <4 x double> @shuffle_v4f64_3333(<4 x double> %a, <4 x double> %b) {
    521 ; AVX1-LABEL: shuffle_v4f64_3333:
    522 ; AVX1:       # BB#0:
    523 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    524 ; AVX1-NEXT:    vmovhlps {{.*#+}} xmm0 = xmm0[1,1]
    525 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    526 ; AVX1-NEXT:    retq
    527 ;
    528 ; AVX2-LABEL: shuffle_v4f64_3333:
    529 ; AVX2:       # BB#0:
    530 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
    531 ; AVX2-NEXT:    retq
    532 ;
    533 ; AVX512VL-LABEL: shuffle_v4f64_3333:
    534 ; AVX512VL:       # BB#0:
    535 ; AVX512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,3]
    536 ; AVX512VL-NEXT:    retq
    537   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
    538   ret <4 x double> %shuffle
    539 }
    540 
    541 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
    542 ; AVX1-LABEL: shuffle_v4i64_0000:
    543 ; AVX1:       # BB#0:
    544 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    545 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    546 ; AVX1-NEXT:    retq
    547 ;
    548 ; AVX2-LABEL: shuffle_v4i64_0000:
    549 ; AVX2:       # BB#0:
    550 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    551 ; AVX2-NEXT:    retq
    552 ;
    553 ; AVX512VL-LABEL: shuffle_v4i64_0000:
    554 ; AVX512VL:       # BB#0:
    555 ; AVX512VL-NEXT:    vpbroadcastq %xmm0, %ymm0
    556 ; AVX512VL-NEXT:    retq
    557   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    558   ret <4 x i64> %shuffle
    559 }
    560 
    561 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
    562 ; AVX1-LABEL: shuffle_v4i64_0001:
    563 ; AVX1:       # BB#0:
    564 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
    565 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    566 ; AVX1-NEXT:    retq
    567 ;
    568 ; AVX2-LABEL: shuffle_v4i64_0001:
    569 ; AVX2:       # BB#0:
    570 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    571 ; AVX2-NEXT:    retq
    572 ;
    573 ; AVX512VL-LABEL: shuffle_v4i64_0001:
    574 ; AVX512VL:       # BB#0:
    575 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    576 ; AVX512VL-NEXT:    retq
    577   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
    578   ret <4 x i64> %shuffle
    579 }
    580 
    581 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
    582 ; AVX1-LABEL: shuffle_v4i64_0020:
    583 ; AVX1:       # BB#0:
    584 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    585 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    586 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    587 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    588 ; AVX1-NEXT:    retq
    589 ;
    590 ; AVX2-LABEL: shuffle_v4i64_0020:
    591 ; AVX2:       # BB#0:
    592 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
    593 ; AVX2-NEXT:    retq
    594 ;
    595 ; AVX512VL-LABEL: shuffle_v4i64_0020:
    596 ; AVX512VL:       # BB#0:
    597 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
    598 ; AVX512VL-NEXT:    retq
    599   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
    600   ret <4 x i64> %shuffle
    601 }
    602 
    603 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
    604 ; AVX1-LABEL: shuffle_v4i64_0112:
    605 ; AVX1:       # BB#0:
    606 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    607 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
    608 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    609 ; AVX1-NEXT:    retq
    610 ;
    611 ; AVX2-LABEL: shuffle_v4i64_0112:
    612 ; AVX2:       # BB#0:
    613 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    614 ; AVX2-NEXT:    retq
    615 ;
    616 ; AVX512VL-LABEL: shuffle_v4i64_0112:
    617 ; AVX512VL:       # BB#0:
    618 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    619 ; AVX512VL-NEXT:    retq
    620   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
    621   ret <4 x i64> %shuffle
    622 }
    623 
    624 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
    625 ; AVX1-LABEL: shuffle_v4i64_0300:
    626 ; AVX1:       # BB#0:
    627 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    628 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
    629 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    630 ; AVX1-NEXT:    retq
    631 ;
    632 ; AVX2-LABEL: shuffle_v4i64_0300:
    633 ; AVX2:       # BB#0:
    634 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
    635 ; AVX2-NEXT:    retq
    636 ;
    637 ; AVX512VL-LABEL: shuffle_v4i64_0300:
    638 ; AVX512VL:       # BB#0:
    639 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
    640 ; AVX512VL-NEXT:    retq
    641   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
    642   ret <4 x i64> %shuffle
    643 }
    644 
    645 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
    646 ; AVX1-LABEL: shuffle_v4i64_1000:
    647 ; AVX1:       # BB#0:
    648 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    649 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    650 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    651 ; AVX1-NEXT:    retq
    652 ;
    653 ; AVX2-LABEL: shuffle_v4i64_1000:
    654 ; AVX2:       # BB#0:
    655 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
    656 ; AVX2-NEXT:    retq
    657 ;
    658 ; AVX512VL-LABEL: shuffle_v4i64_1000:
    659 ; AVX512VL:       # BB#0:
    660 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
    661 ; AVX512VL-NEXT:    retq
    662   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
    663   ret <4 x i64> %shuffle
    664 }
    665 
    666 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
    667 ; AVX1-LABEL: shuffle_v4i64_2200:
    668 ; AVX1:       # BB#0:
    669 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    670 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    671 ; AVX1-NEXT:    retq
    672 ;
    673 ; AVX2-LABEL: shuffle_v4i64_2200:
    674 ; AVX2:       # BB#0:
    675 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
    676 ; AVX2-NEXT:    retq
    677 ;
    678 ; AVX512VL-LABEL: shuffle_v4i64_2200:
    679 ; AVX512VL:       # BB#0:
    680 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
    681 ; AVX512VL-NEXT:    retq
    682   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
    683   ret <4 x i64> %shuffle
    684 }
    685 
    686 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
    687 ; AVX1-LABEL: shuffle_v4i64_3330:
    688 ; AVX1:       # BB#0:
    689 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    690 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
    691 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
    692 ; AVX1-NEXT:    retq
    693 ;
    694 ; AVX2-LABEL: shuffle_v4i64_3330:
    695 ; AVX2:       # BB#0:
    696 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
    697 ; AVX2-NEXT:    retq
    698 ;
    699 ; AVX512VL-LABEL: shuffle_v4i64_3330:
    700 ; AVX512VL:       # BB#0:
    701 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
    702 ; AVX512VL-NEXT:    retq
    703   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    704   ret <4 x i64> %shuffle
    705 }
    706 
    707 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
    708 ; AVX1-LABEL: shuffle_v4i64_3210:
    709 ; AVX1:       # BB#0:
    710 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    711 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    712 ; AVX1-NEXT:    retq
    713 ;
    714 ; AVX2-LABEL: shuffle_v4i64_3210:
    715 ; AVX2:       # BB#0:
    716 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
    717 ; AVX2-NEXT:    retq
    718 ;
    719 ; AVX512VL-LABEL: shuffle_v4i64_3210:
    720 ; AVX512VL:       # BB#0:
    721 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
    722 ; AVX512VL-NEXT:    retq
    723   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    724   ret <4 x i64> %shuffle
    725 }
    726 
    727 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
    728 ; AVX1-LABEL: shuffle_v4i64_0124:
    729 ; AVX1:       # BB#0:
    730 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
    731 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    732 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
    733 ; AVX1-NEXT:    retq
    734 ;
    735 ; AVX2-LABEL: shuffle_v4i64_0124:
    736 ; AVX2:       # BB#0:
    737 ; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
    738 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
    739 ; AVX2-NEXT:    retq
    740 ;
    741 ; AVX512VL-LABEL: shuffle_v4i64_0124:
    742 ; AVX512VL:       # BB#0:
    743 ; AVX512VL-NEXT:    vpbroadcastq %xmm1, %ymm1
    744 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
    745 ; AVX512VL-NEXT:    retq
    746   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
    747   ret <4 x i64> %shuffle
    748 }
    749 
    750 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
    751 ; AVX1-LABEL: shuffle_v4i64_0142:
    752 ; AVX1:       # BB#0:
    753 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    754 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
    755 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
    756 ; AVX1-NEXT:    retq
    757 ;
    758 ; AVX2-LABEL: shuffle_v4i64_0142:
    759 ; AVX2:       # BB#0:
    760 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
    761 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
    762 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    763 ; AVX2-NEXT:    retq
    764 ;
    765 ; AVX512VL-LABEL: shuffle_v4i64_0142:
    766 ; AVX512VL:       # BB#0:
    767 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm1, %ymm1
    768 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
    769 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    770 ; AVX512VL-NEXT:    retq
    771   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
    772   ret <4 x i64> %shuffle
    773 }
    774 
    775 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
    776 ; AVX1-LABEL: shuffle_v4i64_0412:
    777 ; AVX1:       # BB#0:
    778 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    779 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
    780 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    781 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    782 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    783 ; AVX1-NEXT:    retq
    784 ;
    785 ; AVX2-LABEL: shuffle_v4i64_0412:
    786 ; AVX2:       # BB#0:
    787 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    788 ; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
    789 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    790 ; AVX2-NEXT:    retq
    791 ;
    792 ; AVX512VL-LABEL: shuffle_v4i64_0412:
    793 ; AVX512VL:       # BB#0:
    794 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    795 ; AVX512VL-NEXT:    vpbroadcastq %xmm1, %ymm1
    796 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    797 ; AVX512VL-NEXT:    retq
    798   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
    799   ret <4 x i64> %shuffle
    800 }
    801 
    802 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
    803 ; AVX1-LABEL: shuffle_v4i64_4012:
    804 ; AVX1:       # BB#0:
    805 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    806 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
    807 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    808 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    809 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
    810 ; AVX1-NEXT:    retq
    811 ;
    812 ; AVX2-LABEL: shuffle_v4i64_4012:
    813 ; AVX2:       # BB#0:
    814 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
    815 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
    816 ; AVX2-NEXT:    retq
    817 ;
    818 ; AVX512VL-LABEL: shuffle_v4i64_4012:
    819 ; AVX512VL:       # BB#0:
    820 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
    821 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
    822 ; AVX512VL-NEXT:    retq
    823   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
    824   ret <4 x i64> %shuffle
    825 }
    826 
    827 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
    828 ; AVX1-LABEL: shuffle_v4i64_0145:
    829 ; AVX1:       # BB#0:
    830 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    831 ; AVX1-NEXT:    retq
    832 ;
    833 ; AVX2-LABEL: shuffle_v4i64_0145:
    834 ; AVX2:       # BB#0:
    835 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    836 ; AVX2-NEXT:    retq
    837 ;
    838 ; AVX512VL-LABEL: shuffle_v4i64_0145:
    839 ; AVX512VL:       # BB#0:
    840 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    841 ; AVX512VL-NEXT:    retq
    842   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    843   ret <4 x i64> %shuffle
    844 }
    845 
    846 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
    847 ; AVX1-LABEL: shuffle_v4i64_0451:
    848 ; AVX1:       # BB#0:
    849 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
    850 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    851 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    852 ; AVX1-NEXT:    retq
    853 ;
    854 ; AVX2-LABEL: shuffle_v4i64_0451:
    855 ; AVX2:       # BB#0:
    856 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    857 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
    858 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
    859 ; AVX2-NEXT:    retq
    860 ;
    861 ; AVX512VL-LABEL: shuffle_v4i64_0451:
    862 ; AVX512VL:       # BB#0:
    863 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
    864 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
    865 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
    866 ; AVX512VL-NEXT:    retq
    867   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
    868   ret <4 x i64> %shuffle
    869 }
    870 
    871 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
    872 ; AVX1-LABEL: shuffle_v4i64_4501:
    873 ; AVX1:       # BB#0:
    874 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    875 ; AVX1-NEXT:    retq
    876 ;
    877 ; AVX2-LABEL: shuffle_v4i64_4501:
    878 ; AVX2:       # BB#0:
    879 ; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    880 ; AVX2-NEXT:    retq
    881 ;
    882 ; AVX512VL-LABEL: shuffle_v4i64_4501:
    883 ; AVX512VL:       # BB#0:
    884 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm1, %ymm0
    885 ; AVX512VL-NEXT:    retq
    886   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    887   ret <4 x i64> %shuffle
    888 }
    889 
    890 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
    891 ; AVX1-LABEL: shuffle_v4i64_4015:
    892 ; AVX1:       # BB#0:
    893 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
    894 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    895 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    896 ; AVX1-NEXT:    retq
    897 ;
    898 ; AVX2-LABEL: shuffle_v4i64_4015:
    899 ; AVX2:       # BB#0:
    900 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
    901 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
    902 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
    903 ; AVX2-NEXT:    retq
    904 ;
    905 ; AVX512VL-LABEL: shuffle_v4i64_4015:
    906 ; AVX512VL:       # BB#0:
    907 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm1, %ymm1
    908 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
    909 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
    910 ; AVX512VL-NEXT:    retq
    911   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
    912   ret <4 x i64> %shuffle
    913 }
    914 
    915 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
    916 ; AVX1-LABEL: shuffle_v4i64_2u35:
    917 ; AVX1:       # BB#0:
    918 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    919 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    920 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    921 ; AVX1-NEXT:    retq
    922 ;
    923 ; AVX2-LABEL: shuffle_v4i64_2u35:
    924 ; AVX2:       # BB#0:
    925 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    926 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
    927 ; AVX2-NEXT:    retq
    928 ;
    929 ; AVX512VL-LABEL: shuffle_v4i64_2u35:
    930 ; AVX512VL:       # BB#0:
    931 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    932 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
    933 ; AVX512VL-NEXT:    retq
    934   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
    935   ret <4 x i64> %shuffle
    936 }
    937 
    938 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
    939 ; AVX1-LABEL: shuffle_v4i64_1251:
    940 ; AVX1:       # BB#0:
    941 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    942 ; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
    943 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
    944 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    945 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
    946 ; AVX1-NEXT:    retq
    947 ;
    948 ; AVX2-LABEL: shuffle_v4i64_1251:
    949 ; AVX2:       # BB#0:
    950 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
    951 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
    952 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    953 ; AVX2-NEXT:    retq
    954 ;
    955 ; AVX512VL-LABEL: shuffle_v4i64_1251:
    956 ; AVX512VL:       # BB#0:
    957 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
    958 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
    959 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    960 ; AVX512VL-NEXT:    retq
    961   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
    962   ret <4 x i64> %shuffle
    963 }
    964 
    965 define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
    966 ; AVX1-LABEL: shuffle_v4i64_1054:
    967 ; AVX1:       # BB#0:
    968 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    969 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    970 ; AVX1-NEXT:    retq
    971 ;
    972 ; AVX2-LABEL: shuffle_v4i64_1054:
    973 ; AVX2:       # BB#0:
    974 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    975 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    976 ; AVX2-NEXT:    retq
    977 ;
    978 ; AVX512VL-LABEL: shuffle_v4i64_1054:
    979 ; AVX512VL:       # BB#0:
    980 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
    981 ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    982 ; AVX512VL-NEXT:    retq
    983   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
    984   ret <4 x i64> %shuffle
    985 }
    986 
    987 define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
    988 ; AVX1-LABEL: shuffle_v4i64_3254:
    989 ; AVX1:       # BB#0:
    990 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    991 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    992 ; AVX1-NEXT:    retq
    993 ;
    994 ; AVX2-LABEL: shuffle_v4i64_3254:
    995 ; AVX2:       # BB#0:
    996 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    997 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    998 ; AVX2-NEXT:    retq
    999 ;
   1000 ; AVX512VL-LABEL: shuffle_v4i64_3254:
   1001 ; AVX512VL:       # BB#0:
   1002 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
   1003 ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   1004 ; AVX512VL-NEXT:    retq
   1005   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
   1006   ret <4 x i64> %shuffle
   1007 }
   1008 
   1009 define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
   1010 ; AVX1-LABEL: shuffle_v4i64_3276:
   1011 ; AVX1:       # BB#0:
   1012 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1013 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
   1014 ; AVX1-NEXT:    retq
   1015 ;
   1016 ; AVX2-LABEL: shuffle_v4i64_3276:
   1017 ; AVX2:       # BB#0:
   1018 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1019 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   1020 ; AVX2-NEXT:    retq
   1021 ;
   1022 ; AVX512VL-LABEL: shuffle_v4i64_3276:
   1023 ; AVX512VL:       # BB#0:
   1024 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1025 ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   1026 ; AVX512VL-NEXT:    retq
   1027   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
   1028   ret <4 x i64> %shuffle
   1029 }
   1030 
   1031 define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
   1032 ; AVX1-LABEL: shuffle_v4i64_1076:
   1033 ; AVX1:       # BB#0:
   1034 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   1035 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
   1036 ; AVX1-NEXT:    retq
   1037 ;
   1038 ; AVX2-LABEL: shuffle_v4i64_1076:
   1039 ; AVX2:       # BB#0:
   1040 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1041 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   1042 ; AVX2-NEXT:    retq
   1043 ;
   1044 ; AVX512VL-LABEL: shuffle_v4i64_1076:
   1045 ; AVX512VL:       # BB#0:
   1046 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1047 ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
   1048 ; AVX512VL-NEXT:    retq
   1049   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
   1050   ret <4 x i64> %shuffle
   1051 }
   1052 
   1053 define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
   1054 ; AVX1-LABEL: shuffle_v4i64_0415:
   1055 ; AVX1:       # BB#0:
   1056 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
   1057 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
   1058 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1059 ; AVX1-NEXT:    retq
   1060 ;
   1061 ; AVX2-LABEL: shuffle_v4i64_0415:
   1062 ; AVX2:       # BB#0:
   1063 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
   1064 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
   1065 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   1066 ; AVX2-NEXT:    retq
   1067 ;
   1068 ; AVX512VL-LABEL: shuffle_v4i64_0415:
   1069 ; AVX512VL:       # BB#0:
   1070 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
   1071 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
   1072 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
   1073 ; AVX512VL-NEXT:    retq
   1074   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
   1075   ret <4 x i64> %shuffle
   1076 }
   1077 
   1078 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
   1079 ; AVX1-LABEL: shuffle_v4i64_z4z6:
   1080 ; AVX1:       # BB#0:
   1081 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   1082 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
   1083 ; AVX1-NEXT:    retq
   1084 ;
   1085 ; AVX2-LABEL: shuffle_v4i64_z4z6:
   1086 ; AVX2:       # BB#0:
   1087 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
   1088 ; AVX2-NEXT:    retq
   1089 ;
   1090 ; AVX512VL-LABEL: shuffle_v4i64_z4z6:
   1091 ; AVX512VL:       # BB#0:
   1092 ; AVX512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
   1093 ; AVX512VL-NEXT:    retq
   1094   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
   1095   ret <4 x i64> %shuffle
   1096 }
   1097 
   1098 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
   1099 ; AVX1-LABEL: shuffle_v4i64_5zuz:
   1100 ; AVX1:       # BB#0:
   1101 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   1102 ; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
   1103 ; AVX1-NEXT:    retq
   1104 ;
   1105 ; AVX2-LABEL: shuffle_v4i64_5zuz:
   1106 ; AVX2:       # BB#0:
   1107 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
   1108 ; AVX2-NEXT:    retq
   1109 ;
   1110 ; AVX512VL-LABEL: shuffle_v4i64_5zuz:
   1111 ; AVX512VL:       # BB#0:
   1112 ; AVX512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
   1113 ; AVX512VL-NEXT:    retq
   1114   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
   1115   ret <4 x i64> %shuffle
   1116 }
   1117 
   1118 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
   1119 ; AVX1-LABEL: shuffle_v4i64_40u2:
   1120 ; AVX1:       # BB#0:
   1121 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
   1122 ; AVX1-NEXT:    retq
   1123 ;
   1124 ; AVX2-LABEL: shuffle_v4i64_40u2:
   1125 ; AVX2:       # BB#0:
   1126 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
   1127 ; AVX2-NEXT:    retq
   1128 ;
   1129 ; AVX512VL-LABEL: shuffle_v4i64_40u2:
   1130 ; AVX512VL:       # BB#0:
   1131 ; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
   1132 ; AVX512VL-NEXT:    retq
   1133   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
   1134   ret <4 x i64> %shuffle
   1135 }
   1136 
   1137 define <4 x i64> @shuffle_v4i64_11uu(<4 x i64> %a, <4 x i64> %b) {
   1138 ; AVX1-LABEL: shuffle_v4i64_11uu:
   1139 ; AVX1:       # BB#0:
   1140 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
   1141 ; AVX1-NEXT:    retq
   1142 ;
   1143 ; AVX2-LABEL: shuffle_v4i64_11uu:
   1144 ; AVX2:       # BB#0:
   1145 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
   1146 ; AVX2-NEXT:    retq
   1147 ;
   1148 ; AVX512VL-LABEL: shuffle_v4i64_11uu:
   1149 ; AVX512VL:       # BB#0:
   1150 ; AVX512VL-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,2,3,6,7,6,7]
   1151 ; AVX512VL-NEXT:    retq
   1152   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 1, i32 undef, i32 undef>
   1153   ret <4 x i64> %shuffle
   1154 }
   1155 
   1156 define <4 x i64> @shuffle_v4i64_22uu(<4 x i64> %a, <4 x i64> %b) {
   1157 ; AVX1-LABEL: shuffle_v4i64_22uu:
   1158 ; AVX1:       # BB#0:
   1159 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1160 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1161 ; AVX1-NEXT:    retq
   1162 ;
   1163 ; AVX2-LABEL: shuffle_v4i64_22uu:
   1164 ; AVX2:       # BB#0:
   1165 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
   1166 ; AVX2-NEXT:    retq
   1167 ;
   1168 ; AVX512VL-LABEL: shuffle_v4i64_22uu:
   1169 ; AVX512VL:       # BB#0:
   1170 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,2,3]
   1171 ; AVX512VL-NEXT:    retq
   1172   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 undef, i32 undef>
   1173   ret <4 x i64> %shuffle
   1174 }
   1175 
   1176 define <4 x i64> @shuffle_v4i64_3333(<4 x i64> %a, <4 x i64> %b) {
   1177 ; AVX1-LABEL: shuffle_v4i64_3333:
   1178 ; AVX1:       # BB#0:
   1179 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1180 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,1]
   1181 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1182 ; AVX1-NEXT:    retq
   1183 ;
   1184 ; AVX2-LABEL: shuffle_v4i64_3333:
   1185 ; AVX2:       # BB#0:
   1186 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
   1187 ; AVX2-NEXT:    retq
   1188 ;
   1189 ; AVX512VL-LABEL: shuffle_v4i64_3333:
   1190 ; AVX512VL:       # BB#0:
   1191 ; AVX512VL-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,3]
   1192 ; AVX512VL-NEXT:    retq
   1193   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 3>
   1194   ret <4 x i64> %shuffle
   1195 }
   1196 
   1197 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
   1198 ; ALL-LABEL: stress_test1:
   1199 ; ALL:         retq
   1200   %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
   1201   %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
   1202   %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
   1203   %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
   1204 
   1205   ret <4 x i64> %f
   1206 }
   1207 
   1208 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
   1209 ; ALL-LABEL: insert_reg_and_zero_v4i64:
   1210 ; ALL:       # BB#0:
   1211 ; ALL-NEXT:    vmovq %rdi, %xmm0
   1212 ; ALL-NEXT:    retq
   1213   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   1214   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   1215   ret <4 x i64> %shuffle
   1216 }
   1217 
   1218 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
   1219 ; AVX1-LABEL: insert_mem_and_zero_v4i64:
   1220 ; AVX1:       # BB#0:
   1221 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   1222 ; AVX1-NEXT:    retq
   1223 ;
   1224 ; AVX2-LABEL: insert_mem_and_zero_v4i64:
   1225 ; AVX2:       # BB#0:
   1226 ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   1227 ; AVX2-NEXT:    retq
   1228 ;
   1229 ; AVX512VL-LABEL: insert_mem_and_zero_v4i64:
   1230 ; AVX512VL:       # BB#0:
   1231 ; AVX512VL-NEXT:    vmovq (%rdi), %xmm0
   1232 ; AVX512VL-NEXT:    retq
   1233   %a = load i64, i64* %ptr
   1234   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   1235   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   1236   ret <4 x i64> %shuffle
   1237 }
   1238 
   1239 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
   1240 ; AVX1-LABEL: insert_reg_and_zero_v4f64:
   1241 ; AVX1:       # BB#0:
   1242 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   1243 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
   1244 ; AVX1-NEXT:    retq
   1245 ;
   1246 ; AVX2-LABEL: insert_reg_and_zero_v4f64:
   1247 ; AVX2:       # BB#0:
   1248 ; AVX2-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
   1249 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
   1250 ; AVX2-NEXT:    retq
   1251 ;
   1252 ; AVX512VL-LABEL: insert_reg_and_zero_v4f64:
   1253 ; AVX512VL:       # BB#0:
   1254 ; AVX512VL-NEXT:    vxorpd %xmm1, %xmm1, %xmm1
   1255 ; AVX512VL-NEXT:    vmovsd %xmm0, %xmm1, %xmm0
   1256 ; AVX512VL-NEXT:    retq
   1257   %v = insertelement <4 x double> undef, double %a, i32 0
   1258   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   1259   ret <4 x double> %shuffle
   1260 }
   1261 
   1262 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
   1263 ; AVX1-LABEL: insert_mem_and_zero_v4f64:
   1264 ; AVX1:       # BB#0:
   1265 ; AVX1-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1266 ; AVX1-NEXT:    retq
   1267 ;
   1268 ; AVX2-LABEL: insert_mem_and_zero_v4f64:
   1269 ; AVX2:       # BB#0:
   1270 ; AVX2-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   1271 ; AVX2-NEXT:    retq
   1272 ;
   1273 ; AVX512VL-LABEL: insert_mem_and_zero_v4f64:
   1274 ; AVX512VL:       # BB#0:
   1275 ; AVX512VL-NEXT:    vmovsd (%rdi), %xmm0
   1276 ; AVX512VL-NEXT:    retq
   1277   %a = load double, double* %ptr
   1278   %v = insertelement <4 x double> undef, double %a, i32 0
   1279   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
   1280   ret <4 x double> %shuffle
   1281 }
   1282 
   1283 define <4 x double> @splat_mem_v4f64(double* %ptr) {
   1284 ; ALL-LABEL: splat_mem_v4f64:
   1285 ; ALL:       # BB#0:
   1286 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
   1287 ; ALL-NEXT:    retq
   1288   %a = load double, double* %ptr
   1289   %v = insertelement <4 x double> undef, double %a, i32 0
   1290   %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1291   ret <4 x double> %shuffle
   1292 }
   1293 
   1294 define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
   1295 ; AVX1-LABEL: splat_mem_v4i64:
   1296 ; AVX1:       # BB#0:
   1297 ; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
   1298 ; AVX1-NEXT:    retq
   1299 ;
   1300 ; AVX2-LABEL: splat_mem_v4i64:
   1301 ; AVX2:       # BB#0:
   1302 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
   1303 ; AVX2-NEXT:    retq
   1304 ;
   1305 ; AVX512VL-LABEL: splat_mem_v4i64:
   1306 ; AVX512VL:       # BB#0:
   1307 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
   1308 ; AVX512VL-NEXT:    retq
   1309   %a = load i64, i64* %ptr
   1310   %v = insertelement <4 x i64> undef, i64 %a, i64 0
   1311   %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1312   ret <4 x i64> %shuffle
   1313 }
   1314 
   1315 define <4 x double> @splat_mem_v4f64_2(double* %p) {
   1316 ; ALL-LABEL: splat_mem_v4f64_2:
   1317 ; ALL:       # BB#0:
   1318 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
   1319 ; ALL-NEXT:    retq
   1320   %1 = load double, double* %p
   1321   %2 = insertelement <2 x double> undef, double %1, i32 0
   1322   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
   1323   ret <4 x double> %3
   1324 }
   1325 
   1326 define <4 x double> @splat_v4f64(<2 x double> %r) {
   1327 ; AVX1-LABEL: splat_v4f64:
   1328 ; AVX1:       # BB#0:
   1329 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1330 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1331 ; AVX1-NEXT:    retq
   1332 ;
   1333 ; AVX2-LABEL: splat_v4f64:
   1334 ; AVX2:       # BB#0:
   1335 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
   1336 ; AVX2-NEXT:    retq
   1337 ;
   1338 ; AVX512VL-LABEL: splat_v4f64:
   1339 ; AVX512VL:       # BB#0:
   1340 ; AVX512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
   1341 ; AVX512VL-NEXT:    retq
   1342   %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
   1343   ret <4 x double> %1
   1344 }
   1345 
   1346 define <4 x i64> @splat_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
   1347 ; AVX1-LABEL: splat_mem_v4i64_from_v2i64:
   1348 ; AVX1:       # BB#0:
   1349 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
   1350 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1351 ; AVX1-NEXT:    retq
   1352 ;
   1353 ; AVX2-LABEL: splat_mem_v4i64_from_v2i64:
   1354 ; AVX2:       # BB#0:
   1355 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
   1356 ; AVX2-NEXT:    retq
   1357 ;
   1358 ; AVX512VL-LABEL: splat_mem_v4i64_from_v2i64:
   1359 ; AVX512VL:       # BB#0:
   1360 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
   1361 ; AVX512VL-NEXT:    retq
   1362   %v = load <2 x i64>, <2 x i64>* %ptr
   1363   %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1364   ret <4 x i64> %shuffle
   1365 }
   1366 
   1367 define <4 x double> @splat_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
   1368 ; ALL-LABEL: splat_mem_v4f64_from_v2f64:
   1369 ; ALL:       # BB#0:
   1370 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
   1371 ; ALL-NEXT:    retq
   1372   %v = load <2 x double>, <2 x double>* %ptr
   1373   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
   1374   ret <4 x double> %shuffle
   1375 }
   1376 
   1377 define <4 x i64> @splat128_mem_v4i64_from_v2i64(<2 x i64>* %ptr) {
   1378 ; AVX1-LABEL: splat128_mem_v4i64_from_v2i64:
   1379 ; AVX1:       # BB#0:
   1380 ; AVX1-NEXT:    vmovaps (%rdi), %xmm0
   1381 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1382 ; AVX1-NEXT:    retq
   1383 ;
   1384 ; AVX2-LABEL: splat128_mem_v4i64_from_v2i64:
   1385 ; AVX2:       # BB#0:
   1386 ; AVX2-NEXT:    vmovaps (%rdi), %xmm0
   1387 ; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1388 ; AVX2-NEXT:    retq
   1389 ;
   1390 ; AVX512VL-LABEL: splat128_mem_v4i64_from_v2i64:
   1391 ; AVX512VL:       # BB#0:
   1392 ; AVX512VL-NEXT:    vmovdqa64 (%rdi), %xmm0
   1393 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm0, %ymm0, %ymm0
   1394 ; AVX512VL-NEXT:    retq
   1395   %v = load <2 x i64>, <2 x i64>* %ptr
   1396   %shuffle = shufflevector <2 x i64> %v, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1397   ret <4 x i64> %shuffle
   1398 }
   1399 
   1400 define <4 x double> @splat128_mem_v4f64_from_v2f64(<2 x double>* %ptr) {
   1401 ; AVX1-LABEL: splat128_mem_v4f64_from_v2f64:
   1402 ; AVX1:       # BB#0:
   1403 ; AVX1-NEXT:    vmovaps (%rdi), %xmm0
   1404 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1405 ; AVX1-NEXT:    retq
   1406 ;
   1407 ; AVX2-LABEL: splat128_mem_v4f64_from_v2f64:
   1408 ; AVX2:       # BB#0:
   1409 ; AVX2-NEXT:    vmovaps (%rdi), %xmm0
   1410 ; AVX2-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1411 ; AVX2-NEXT:    retq
   1412 ;
   1413 ; AVX512VL-LABEL: splat128_mem_v4f64_from_v2f64:
   1414 ; AVX512VL:       # BB#0:
   1415 ; AVX512VL-NEXT:    vmovapd (%rdi), %xmm0
   1416 ; AVX512VL-NEXT:    vinsertf32x4 $1, %xmm0, %ymm0, %ymm0
   1417 ; AVX512VL-NEXT:    retq
   1418   %v = load <2 x double>, <2 x double>* %ptr
   1419   %shuffle = shufflevector <2 x double> %v, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
   1420   ret <4 x double> %shuffle
   1421 }
   1422 
   1423 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
   1424 ; AVX1-LABEL: bitcast_v4f64_0426:
   1425 ; AVX1:       # BB#0:
   1426 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1427 ; AVX1-NEXT:    retq
   1428 ;
   1429 ; AVX2-LABEL: bitcast_v4f64_0426:
   1430 ; AVX2:       # BB#0:
   1431 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1432 ; AVX2-NEXT:    retq
   1433 ;
   1434 ; AVX512VL-LABEL: bitcast_v4f64_0426:
   1435 ; AVX512VL:       # BB#0:
   1436 ; AVX512VL-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
   1437 ; AVX512VL-NEXT:    retq
   1438   %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
   1439   %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
   1440   %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1441   %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
   1442   %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
   1443   %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
   1444   ret <4 x double> %bitcast64
   1445 }
   1446 
   1447 define <4 x i64> @concat_v4i64_0167(<4 x i64> %a0, <4 x i64> %a1) {
   1448 ; AVX1-LABEL: concat_v4i64_0167:
   1449 ; AVX1:       # BB#0:
   1450 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   1451 ; AVX1-NEXT:    retq
   1452 ;
   1453 ; AVX2-LABEL: concat_v4i64_0167:
   1454 ; AVX2:       # BB#0:
   1455 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1456 ; AVX2-NEXT:    retq
   1457 ;
   1458 ; AVX512VL-LABEL: concat_v4i64_0167:
   1459 ; AVX512VL:       # BB#0:
   1460 ; AVX512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1461 ; AVX512VL-NEXT:    retq
   1462   %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
   1463   %a1hi = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 6, i32 7>
   1464   %shuffle64 = shufflevector <2 x i64> %a0lo, <2 x i64> %a1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   1465   ret <4 x i64> %shuffle64
   1466 }
   1467 
   1468 define <4 x i64> @concat_v4i64_0145_bc(<4 x i64> %a0, <4 x i64> %a1) {
   1469 ; AVX1-LABEL: concat_v4i64_0145_bc:
   1470 ; AVX1:       # BB#0:
   1471 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1472 ; AVX1-NEXT:    retq
   1473 ;
   1474 ; AVX2-LABEL: concat_v4i64_0145_bc:
   1475 ; AVX2:       # BB#0:
   1476 ; AVX2-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1477 ; AVX2-NEXT:    retq
   1478 ;
   1479 ; AVX512VL-LABEL: concat_v4i64_0145_bc:
   1480 ; AVX512VL:       # BB#0:
   1481 ; AVX512VL-NEXT:    vinserti32x4 $1, %xmm1, %ymm0, %ymm0
   1482 ; AVX512VL-NEXT:    retq
   1483   %a0lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 0, i32 1>
   1484   %a1lo = shufflevector <4 x i64> %a0, <4 x i64> %a1, <2 x i32> <i32 4, i32 5>
   1485   %bc0lo = bitcast <2 x i64> %a0lo to <4 x i32>
   1486   %bc1lo = bitcast <2 x i64> %a1lo to <4 x i32>
   1487   %shuffle32 = shufflevector <4 x i32> %bc0lo, <4 x i32> %bc1lo, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   1488   %shuffle64 = bitcast <8 x i32> %shuffle32 to <4 x i64>
   1489   ret <4 x i64> %shuffle64
   1490 }
   1491 
   1492 define <4 x i64> @insert_dup_mem_v4i64(i64* %ptr) {
   1493 ; AVX1-LABEL: insert_dup_mem_v4i64:
   1494 ; AVX1:       # BB#0:
   1495 ; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
   1496 ; AVX1-NEXT:    retq
   1497 ;
   1498 ; AVX2-LABEL: insert_dup_mem_v4i64:
   1499 ; AVX2:       # BB#0:
   1500 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
   1501 ; AVX2-NEXT:    retq
   1502 ;
   1503 ; AVX512VL-LABEL: insert_dup_mem_v4i64:
   1504 ; AVX512VL:       # BB#0:
   1505 ; AVX512VL-NEXT:    vpbroadcastq (%rdi), %ymm0
   1506 ; AVX512VL-NEXT:    retq
   1507   %tmp = load i64, i64* %ptr, align 1
   1508   %tmp1 = insertelement <2 x i64> undef, i64 %tmp, i32 0
   1509   %tmp2 = shufflevector <2 x i64> %tmp1, <2 x i64> undef, <4 x i32> zeroinitializer
   1510   ret <4 x i64> %tmp2
   1511 }
   1512