Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      3 
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 define <4 x double> @shuffle_v4f64_0000(<4 x double> %a, <4 x double> %b) {
      7 ; AVX1-LABEL: shuffle_v4f64_0000:
      8 ; AVX1:       # BB#0:
      9 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     10 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     11 ; AVX1-NEXT:    retq
     12 ;
     13 ; AVX2-LABEL: shuffle_v4f64_0000:
     14 ; AVX2:       # BB#0:
     15 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
     16 ; AVX2-NEXT:    retq
     17   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
     18   ret <4 x double> %shuffle
     19 }
     20 
     21 define <4 x double> @shuffle_v4f64_0001(<4 x double> %a, <4 x double> %b) {
     22 ; AVX1-LABEL: shuffle_v4f64_0001:
     23 ; AVX1:       # BB#0:
     24 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
     25 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     26 ; AVX1-NEXT:    retq
     27 ;
     28 ; AVX2-LABEL: shuffle_v4f64_0001:
     29 ; AVX2:       # BB#0:
     30 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
     31 ; AVX2-NEXT:    retq
     32   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
     33   ret <4 x double> %shuffle
     34 }
     35 
     36 define <4 x double> @shuffle_v4f64_0020(<4 x double> %a, <4 x double> %b) {
     37 ; AVX1-LABEL: shuffle_v4f64_0020:
     38 ; AVX1:       # BB#0:
     39 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
     40 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
     41 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     42 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     43 ; AVX1-NEXT:    retq
     44 ;
     45 ; AVX2-LABEL: shuffle_v4f64_0020:
     46 ; AVX2:       # BB#0:
     47 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,2,0]
     48 ; AVX2-NEXT:    retq
     49   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
     50   ret <4 x double> %shuffle
     51 }
     52 
     53 define <4 x double> @shuffle_v4f64_0300(<4 x double> %a, <4 x double> %b) {
     54 ; AVX1-LABEL: shuffle_v4f64_0300:
     55 ; AVX1:       # BB#0:
     56 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     57 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
     58 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
     59 ; AVX1-NEXT:    retq
     60 ;
     61 ; AVX2-LABEL: shuffle_v4f64_0300:
     62 ; AVX2:       # BB#0:
     63 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,0,0]
     64 ; AVX2-NEXT:    retq
     65   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
     66   ret <4 x double> %shuffle
     67 }
     68 
     69 define <4 x double> @shuffle_v4f64_1000(<4 x double> %a, <4 x double> %b) {
     70 ; AVX1-LABEL: shuffle_v4f64_1000:
     71 ; AVX1:       # BB#0:
     72 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
     73 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
     74 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     75 ; AVX1-NEXT:    retq
     76 ;
     77 ; AVX2-LABEL: shuffle_v4f64_1000:
     78 ; AVX2:       # BB#0:
     79 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[1,0,0,0]
     80 ; AVX2-NEXT:    retq
     81   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
     82   ret <4 x double> %shuffle
     83 }
     84 
     85 define <4 x double> @shuffle_v4f64_2200(<4 x double> %a, <4 x double> %b) {
     86 ; AVX1-LABEL: shuffle_v4f64_2200:
     87 ; AVX1:       # BB#0:
     88 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
     89 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
     90 ; AVX1-NEXT:    retq
     91 ;
     92 ; AVX2-LABEL: shuffle_v4f64_2200:
     93 ; AVX2:       # BB#0:
     94 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,2,0,0]
     95 ; AVX2-NEXT:    retq
     96   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
     97   ret <4 x double> %shuffle
     98 }
     99 
    100 define <4 x double> @shuffle_v4f64_3330(<4 x double> %a, <4 x double> %b) {
    101 ; AVX1-LABEL: shuffle_v4f64_3330:
    102 ; AVX1:       # BB#0:
    103 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    104 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
    105 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
    106 ; AVX1-NEXT:    retq
    107 ;
    108 ; AVX2-LABEL: shuffle_v4f64_3330:
    109 ; AVX2:       # BB#0:
    110 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,0]
    111 ; AVX2-NEXT:    retq
    112   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    113   ret <4 x double> %shuffle
    114 }
    115 
    116 define <4 x double> @shuffle_v4f64_3210(<4 x double> %a, <4 x double> %b) {
    117 ; AVX1-LABEL: shuffle_v4f64_3210:
    118 ; AVX1:       # BB#0:
    119 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    120 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    121 ; AVX1-NEXT:    retq
    122 ;
    123 ; AVX2-LABEL: shuffle_v4f64_3210:
    124 ; AVX2:       # BB#0:
    125 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,2,1,0]
    126 ; AVX2-NEXT:    retq
    127   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    128   ret <4 x double> %shuffle
    129 }
    130 
    131 define <4 x double> @shuffle_v4f64_0023(<4 x double> %a, <4 x double> %b) {
    132 ; ALL-LABEL: shuffle_v4f64_0023:
    133 ; ALL:       # BB#0:
    134 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,0,2,3]
    135 ; ALL-NEXT:    retq
    136   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 3>
    137   ret <4 x double> %shuffle
    138 }
    139 
    140 define <4 x double> @shuffle_v4f64_0022(<4 x double> %a, <4 x double> %b) {
    141 ; ALL-LABEL: shuffle_v4f64_0022:
    142 ; ALL:       # BB#0:
    143 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    144 ; ALL-NEXT:    retq
    145   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
    146   ret <4 x double> %shuffle
    147 }
    148 
    149 define <4 x double> @shuffle_v4f64_1032(<4 x double> %a, <4 x double> %b) {
    150 ; ALL-LABEL: shuffle_v4f64_1032:
    151 ; ALL:       # BB#0:
    152 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    153 ; ALL-NEXT:    retq
    154   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
    155   ret <4 x double> %shuffle
    156 }
    157 
    158 define <4 x double> @shuffle_v4f64_1133(<4 x double> %a, <4 x double> %b) {
    159 ; ALL-LABEL: shuffle_v4f64_1133:
    160 ; ALL:       # BB#0:
    161 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,3]
    162 ; ALL-NEXT:    retq
    163   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
    164   ret <4 x double> %shuffle
    165 }
    166 
    167 define <4 x double> @shuffle_v4f64_1023(<4 x double> %a, <4 x double> %b) {
    168 ; ALL-LABEL: shuffle_v4f64_1023:
    169 ; ALL:       # BB#0:
    170 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
    171 ; ALL-NEXT:    retq
    172   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
    173   ret <4 x double> %shuffle
    174 }
    175 
    176 define <4 x double> @shuffle_v4f64_1022(<4 x double> %a, <4 x double> %b) {
    177 ; ALL-LABEL: shuffle_v4f64_1022:
    178 ; ALL:       # BB#0:
    179 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,2]
    180 ; ALL-NEXT:    retq
    181   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 2, i32 2>
    182   ret <4 x double> %shuffle
    183 }
    184 
    185 define <4 x double> @shuffle_v4f64_0423(<4 x double> %a, <4 x double> %b) {
    186 ; AVX1-LABEL: shuffle_v4f64_0423:
    187 ; AVX1:       # BB#0:
    188 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    189 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    190 ; AVX1-NEXT:    retq
    191 ;
    192 ; AVX2-LABEL: shuffle_v4f64_0423:
    193 ; AVX2:       # BB#0:
    194 ; AVX2-NEXT:    vbroadcastsd %xmm1, %ymm1
    195 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    196 ; AVX2-NEXT:    retq
    197   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 3>
    198   ret <4 x double> %shuffle
    199 }
    200 
    201 define <4 x double> @shuffle_v4f64_0462(<4 x double> %a, <4 x double> %b) {
    202 ; ALL-LABEL: shuffle_v4f64_0462:
    203 ; ALL:       # BB#0:
    204 ; ALL-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    205 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    206 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
    207 ; ALL-NEXT:    retq
    208   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 6, i32 2>
    209   ret <4 x double> %shuffle
    210 }
    211 
    212 define <4 x double> @shuffle_v4f64_0426(<4 x double> %a, <4 x double> %b) {
    213 ; ALL-LABEL: shuffle_v4f64_0426:
    214 ; ALL:       # BB#0:
    215 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
    216 ; ALL-NEXT:    retq
    217   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
    218   ret <4 x double> %shuffle
    219 }
    220 
    221 define <4 x double> @shuffle_v4f64_1537(<4 x double> %a, <4 x double> %b) {
    222 ; ALL-LABEL: shuffle_v4f64_1537:
    223 ; ALL:       # BB#0:
    224 ; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
    225 ; ALL-NEXT:    retq
    226   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
    227   ret <4 x double> %shuffle
    228 }
    229 
    230 define <4 x double> @shuffle_v4f64_4062(<4 x double> %a, <4 x double> %b) {
    231 ; ALL-LABEL: shuffle_v4f64_4062:
    232 ; ALL:       # BB#0:
    233 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    234 ; ALL-NEXT:    retq
    235   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
    236   ret <4 x double> %shuffle
    237 }
    238 
    239 define <4 x double> @shuffle_v4f64_5173(<4 x double> %a, <4 x double> %b) {
    240 ; ALL-LABEL: shuffle_v4f64_5173:
    241 ; ALL:       # BB#0:
    242 ; ALL-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[3],ymm0[3]
    243 ; ALL-NEXT:    retq
    244   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 7, i32 3>
    245   ret <4 x double> %shuffle
    246 }
    247 
    248 define <4 x double> @shuffle_v4f64_5163(<4 x double> %a, <4 x double> %b) {
    249 ; ALL-LABEL: shuffle_v4f64_5163:
    250 ; ALL:       # BB#0:
    251 ; ALL-NEXT:    vshufpd {{.*#+}} ymm0 = ymm1[1],ymm0[1],ymm1[2],ymm0[3]
    252 ; ALL-NEXT:    retq
    253   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 5, i32 1, i32 6, i32 3>
    254   ret <4 x double> %shuffle
    255 }
    256 
    257 define <4 x double> @shuffle_v4f64_0527(<4 x double> %a, <4 x double> %b) {
    258 ; ALL-LABEL: shuffle_v4f64_0527:
    259 ; ALL:       # BB#0:
    260 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
    261 ; ALL-NEXT:    retq
    262   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 5, i32 2, i32 7>
    263   ret <4 x double> %shuffle
    264 }
    265 
    266 define <4 x double> @shuffle_v4f64_4163(<4 x double> %a, <4 x double> %b) {
    267 ; ALL-LABEL: shuffle_v4f64_4163:
    268 ; ALL:       # BB#0:
    269 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3]
    270 ; ALL-NEXT:    retq
    271   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 1, i32 6, i32 3>
    272   ret <4 x double> %shuffle
    273 }
    274 
    275 define <4 x double> @shuffle_v4f64_0145(<4 x double> %a, <4 x double> %b) {
    276 ; ALL-LABEL: shuffle_v4f64_0145:
    277 ; ALL:       # BB#0:
    278 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    279 ; ALL-NEXT:    retq
    280   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    281   ret <4 x double> %shuffle
    282 }
    283 
    284 define <4 x double> @shuffle_v4f64_4501(<4 x double> %a, <4 x double> %b) {
    285 ; ALL-LABEL: shuffle_v4f64_4501:
    286 ; ALL:       # BB#0:
    287 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    288 ; ALL-NEXT:    retq
    289   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    290   ret <4 x double> %shuffle
    291 }
    292 
    293 define <4 x double> @shuffle_v4f64_0167(<4 x double> %a, <4 x double> %b) {
    294 ; ALL-LABEL: shuffle_v4f64_0167:
    295 ; ALL:       # BB#0:
    296 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    297 ; ALL-NEXT:    retq
    298   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 1, i32 6, i32 7>
    299   ret <4 x double> %shuffle
    300 }
    301 
    302 define <4 x double> @shuffle_v4f64_1054(<4 x double> %a, <4 x double> %b) {
    303 ; ALL-LABEL: shuffle_v4f64_1054:
    304 ; ALL:       # BB#0:
    305 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    306 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    307 ; ALL-NEXT:    retq
    308   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
    309   ret <4 x double> %shuffle
    310 }
    311 
    312 define <4 x double> @shuffle_v4f64_3254(<4 x double> %a, <4 x double> %b) {
    313 ; ALL-LABEL: shuffle_v4f64_3254:
    314 ; ALL:       # BB#0:
    315 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    316 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    317 ; ALL-NEXT:    retq
    318   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
    319   ret <4 x double> %shuffle
    320 }
    321 
    322 define <4 x double> @shuffle_v4f64_3276(<4 x double> %a, <4 x double> %b) {
    323 ; ALL-LABEL: shuffle_v4f64_3276:
    324 ; ALL:       # BB#0:
    325 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    326 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    327 ; ALL-NEXT:    retq
    328   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
    329   ret <4 x double> %shuffle
    330 }
    331 
    332 define <4 x double> @shuffle_v4f64_1076(<4 x double> %a, <4 x double> %b) {
    333 ; ALL-LABEL: shuffle_v4f64_1076:
    334 ; ALL:       # BB#0:
    335 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    336 ; ALL-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    337 ; ALL-NEXT:    retq
    338   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
    339   ret <4 x double> %shuffle
    340 }
    341 
    342 define <4 x double> @shuffle_v4f64_0415(<4 x double> %a, <4 x double> %b) {
    343 ; AVX1-LABEL: shuffle_v4f64_0415:
    344 ; AVX1:       # BB#0:
    345 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
    346 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    347 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    348 ; AVX1-NEXT:    retq
    349 ;
    350 ; AVX2-LABEL: shuffle_v4f64_0415:
    351 ; AVX2:       # BB#0:
    352 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,0,2,1]
    353 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    354 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3]
    355 ; AVX2-NEXT:    retq
    356   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    357   ret <4 x double> %shuffle
    358 }
    359 
    360 define <4 x double> @shuffle_v4f64_u062(<4 x double> %a, <4 x double> %b) {
    361 ; ALL-LABEL: shuffle_v4f64_u062:
    362 ; ALL:       # BB#0:
    363 ; ALL-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    364 ; ALL-NEXT:    retq
    365   %shuffle = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 undef, i32 0, i32 6, i32 2>
    366   ret <4 x double> %shuffle
    367 }
    368 
    369 define <4 x i64> @shuffle_v4i64_0000(<4 x i64> %a, <4 x i64> %b) {
    370 ; AVX1-LABEL: shuffle_v4i64_0000:
    371 ; AVX1:       # BB#0:
    372 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    373 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    374 ; AVX1-NEXT:    retq
    375 ;
    376 ; AVX2-LABEL: shuffle_v4i64_0000:
    377 ; AVX2:       # BB#0:
    378 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    379 ; AVX2-NEXT:    retq
    380   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    381   ret <4 x i64> %shuffle
    382 }
    383 
    384 define <4 x i64> @shuffle_v4i64_0001(<4 x i64> %a, <4 x i64> %b) {
    385 ; AVX1-LABEL: shuffle_v4i64_0001:
    386 ; AVX1:       # BB#0:
    387 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm0[0,0]
    388 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    389 ; AVX1-NEXT:    retq
    390 ;
    391 ; AVX2-LABEL: shuffle_v4i64_0001:
    392 ; AVX2:       # BB#0:
    393 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,0,1]
    394 ; AVX2-NEXT:    retq
    395   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 0, i32 1>
    396   ret <4 x i64> %shuffle
    397 }
    398 
    399 define <4 x i64> @shuffle_v4i64_0020(<4 x i64> %a, <4 x i64> %b) {
    400 ; AVX1-LABEL: shuffle_v4i64_0020:
    401 ; AVX1:       # BB#0:
    402 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    403 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm1 = xmm1[0],xmm0[0]
    404 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    405 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    406 ; AVX1-NEXT:    retq
    407 ;
    408 ; AVX2-LABEL: shuffle_v4i64_0020:
    409 ; AVX2:       # BB#0:
    410 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,2,0]
    411 ; AVX2-NEXT:    retq
    412   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
    413   ret <4 x i64> %shuffle
    414 }
    415 
    416 define <4 x i64> @shuffle_v4i64_0112(<4 x i64> %a, <4 x i64> %b) {
    417 ; AVX1-LABEL: shuffle_v4i64_0112:
    418 ; AVX1:       # BB#0:
    419 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
    420 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm1 = xmm0[1],xmm1[0]
    421 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    422 ; AVX1-NEXT:    retq
    423 ;
    424 ; AVX2-LABEL: shuffle_v4i64_0112:
    425 ; AVX2:       # BB#0:
    426 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    427 ; AVX2-NEXT:    retq
    428   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 1, i32 2>
    429   ret <4 x i64> %shuffle
    430 }
    431 
    432 define <4 x i64> @shuffle_v4i64_0300(<4 x i64> %a, <4 x i64> %b) {
    433 ; AVX1-LABEL: shuffle_v4i64_0300:
    434 ; AVX1:       # BB#0:
    435 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    436 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,1,2,2]
    437 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    438 ; AVX1-NEXT:    retq
    439 ;
    440 ; AVX2-LABEL: shuffle_v4i64_0300:
    441 ; AVX2:       # BB#0:
    442 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,3,0,0]
    443 ; AVX2-NEXT:    retq
    444   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 3, i32 0, i32 0>
    445   ret <4 x i64> %shuffle
    446 }
    447 
    448 define <4 x i64> @shuffle_v4i64_1000(<4 x i64> %a, <4 x i64> %b) {
    449 ; AVX1-LABEL: shuffle_v4i64_1000:
    450 ; AVX1:       # BB#0:
    451 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm0[1,0]
    452 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    453 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    454 ; AVX1-NEXT:    retq
    455 ;
    456 ; AVX2-LABEL: shuffle_v4i64_1000:
    457 ; AVX2:       # BB#0:
    458 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,0,0,0]
    459 ; AVX2-NEXT:    retq
    460   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 0, i32 0>
    461   ret <4 x i64> %shuffle
    462 }
    463 
    464 define <4 x i64> @shuffle_v4i64_2200(<4 x i64> %a, <4 x i64> %b) {
    465 ; AVX1-LABEL: shuffle_v4i64_2200:
    466 ; AVX1:       # BB#0:
    467 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    468 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    469 ; AVX1-NEXT:    retq
    470 ;
    471 ; AVX2-LABEL: shuffle_v4i64_2200:
    472 ; AVX2:       # BB#0:
    473 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,2,0,0]
    474 ; AVX2-NEXT:    retq
    475   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 2, i32 0, i32 0>
    476   ret <4 x i64> %shuffle
    477 }
    478 
    479 define <4 x i64> @shuffle_v4i64_3330(<4 x i64> %a, <4 x i64> %b) {
    480 ; AVX1-LABEL: shuffle_v4i64_3330:
    481 ; AVX1:       # BB#0:
    482 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    483 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3]
    484 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,1,3,2]
    485 ; AVX1-NEXT:    retq
    486 ;
    487 ; AVX2-LABEL: shuffle_v4i64_3330:
    488 ; AVX2:       # BB#0:
    489 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,3,3,0]
    490 ; AVX2-NEXT:    retq
    491   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 3, i32 3, i32 0>
    492   ret <4 x i64> %shuffle
    493 }
    494 
    495 define <4 x i64> @shuffle_v4i64_3210(<4 x i64> %a, <4 x i64> %b) {
    496 ; AVX1-LABEL: shuffle_v4i64_3210:
    497 ; AVX1:       # BB#0:
    498 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    499 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    500 ; AVX1-NEXT:    retq
    501 ;
    502 ; AVX2-LABEL: shuffle_v4i64_3210:
    503 ; AVX2:       # BB#0:
    504 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,2,1,0]
    505 ; AVX2-NEXT:    retq
    506   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
    507   ret <4 x i64> %shuffle
    508 }
    509 
    510 define <4 x i64> @shuffle_v4i64_0124(<4 x i64> %a, <4 x i64> %b) {
    511 ; AVX1-LABEL: shuffle_v4i64_0124:
    512 ; AVX1:       # BB#0:
    513 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm1 = xmm1[0,0]
    514 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    515 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3]
    516 ; AVX1-NEXT:    retq
    517 ;
    518 ; AVX2-LABEL: shuffle_v4i64_0124:
    519 ; AVX2:       # BB#0:
    520 ; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
    521 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3,4,5],ymm1[6,7]
    522 ; AVX2-NEXT:    retq
    523   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 2, i32 4>
    524   ret <4 x i64> %shuffle
    525 }
    526 
    527 define <4 x i64> @shuffle_v4i64_0142(<4 x i64> %a, <4 x i64> %b) {
    528 ; AVX1-LABEL: shuffle_v4i64_0142:
    529 ; AVX1:       # BB#0:
    530 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    531 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[0,1,2,2]
    532 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
    533 ; AVX1-NEXT:    retq
    534 ;
    535 ; AVX2-LABEL: shuffle_v4i64_0142:
    536 ; AVX2:       # BB#0:
    537 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
    538 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,2,2]
    539 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    540 ; AVX2-NEXT:    retq
    541   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 2>
    542   ret <4 x i64> %shuffle
    543 }
    544 
    545 define <4 x i64> @shuffle_v4i64_0412(<4 x i64> %a, <4 x i64> %b) {
    546 ; AVX1-LABEL: shuffle_v4i64_0412:
    547 ; AVX1:       # BB#0:
    548 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    549 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
    550 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    551 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm1 = ymm1[0,0,2,2]
    552 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2,3]
    553 ; AVX1-NEXT:    retq
    554 ;
    555 ; AVX2-LABEL: shuffle_v4i64_0412:
    556 ; AVX2:       # BB#0:
    557 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,2]
    558 ; AVX2-NEXT:    vpbroadcastq %xmm1, %ymm1
    559 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    560 ; AVX2-NEXT:    retq
    561   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 2>
    562   ret <4 x i64> %shuffle
    563 }
    564 
    565 define <4 x i64> @shuffle_v4i64_4012(<4 x i64> %a, <4 x i64> %b) {
    566 ; AVX1-LABEL: shuffle_v4i64_4012:
    567 ; AVX1:       # BB#0:
    568 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    569 ; AVX1-NEXT:    vshufpd {{.*#+}} xmm2 = xmm0[1],xmm2[0]
    570 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    571 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    572 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3]
    573 ; AVX1-NEXT:    retq
    574 ;
    575 ; AVX2-LABEL: shuffle_v4i64_4012:
    576 ; AVX2:       # BB#0:
    577 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,2]
    578 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5,6,7]
    579 ; AVX2-NEXT:    retq
    580   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 2>
    581   ret <4 x i64> %shuffle
    582 }
    583 
    584 define <4 x i64> @shuffle_v4i64_0145(<4 x i64> %a, <4 x i64> %b) {
    585 ; ALL-LABEL: shuffle_v4i64_0145:
    586 ; ALL:       # BB#0:
    587 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    588 ; ALL-NEXT:    retq
    589   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 1, i32 4, i32 5>
    590   ret <4 x i64> %shuffle
    591 }
    592 
    593 define <4 x i64> @shuffle_v4i64_0451(<4 x i64> %a, <4 x i64> %b) {
    594 ; AVX1-LABEL: shuffle_v4i64_0451:
    595 ; AVX1:       # BB#0:
    596 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm1[1],xmm0[1]
    597 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    598 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    599 ; AVX1-NEXT:    retq
    600 ;
    601 ; AVX2-LABEL: shuffle_v4i64_0451:
    602 ; AVX2:       # BB#0:
    603 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
    604 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,1,3]
    605 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5],ymm0[6,7]
    606 ; AVX2-NEXT:    retq
    607   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 5, i32 1>
    608   ret <4 x i64> %shuffle
    609 }
    610 
    611 define <4 x i64> @shuffle_v4i64_4501(<4 x i64> %a, <4 x i64> %b) {
    612 ; ALL-LABEL: shuffle_v4i64_4501:
    613 ; ALL:       # BB#0:
    614 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    615 ; ALL-NEXT:    retq
    616   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 5, i32 0, i32 1>
    617   ret <4 x i64> %shuffle
    618 }
    619 
    620 define <4 x i64> @shuffle_v4i64_4015(<4 x i64> %a, <4 x i64> %b) {
    621 ; AVX1-LABEL: shuffle_v4i64_4015:
    622 ; AVX1:       # BB#0:
    623 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
    624 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    625 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    626 ; AVX1-NEXT:    retq
    627 ;
    628 ; AVX2-LABEL: shuffle_v4i64_4015:
    629 ; AVX2:       # BB#0:
    630 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm1, %ymm1
    631 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,0,1,3]
    632 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3,4,5],ymm1[6,7]
    633 ; AVX2-NEXT:    retq
    634   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 1, i32 5>
    635   ret <4 x i64> %shuffle
    636 }
    637 
    638 define <4 x i64> @shuffle_v4i64_2u35(<4 x i64> %a, <4 x i64> %b) {
    639 ; AVX1-LABEL: shuffle_v4i64_2u35:
    640 ; AVX1:       # BB#0:
    641 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    642 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm1 = xmm0[1],xmm1[1]
    643 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    644 ; AVX1-NEXT:    retq
    645 ;
    646 ; AVX2-LABEL: shuffle_v4i64_2u35:
    647 ; AVX2:       # BB#0:
    648 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5,6,7]
    649 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[2,1,3,1]
    650 ; AVX2-NEXT:    retq
    651   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 2, i32 undef, i32 3, i32 5>
    652   ret <4 x i64> %shuffle
    653 }
    654 
    655 define <4 x i64> @shuffle_v4i64_1251(<4 x i64> %a, <4 x i64> %b) {
    656 ; AVX1-LABEL: shuffle_v4i64_1251:
    657 ; AVX1:       # BB#0:
    658 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    659 ; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm0[1],ymm2[0],ymm0[2],ymm2[3]
    660 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm1 = xmm1[1,0]
    661 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    662 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2],ymm0[3]
    663 ; AVX1-NEXT:    retq
    664 ;
    665 ; AVX2-LABEL: shuffle_v4i64_1251:
    666 ; AVX2:       # BB#0:
    667 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,1,1,3]
    668 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[1,2,2,1]
    669 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5],ymm0[6,7]
    670 ; AVX2-NEXT:    retq
    671   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 2, i32 5, i32 1>
    672   ret <4 x i64> %shuffle
    673 }
    674 
    675 define <4 x i64> @shuffle_v4i64_1054(<4 x i64> %a, <4 x i64> %b) {
    676 ; AVX1-LABEL: shuffle_v4i64_1054:
    677 ; AVX1:       # BB#0:
    678 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    679 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    680 ; AVX1-NEXT:    retq
    681 ;
    682 ; AVX2-LABEL: shuffle_v4i64_1054:
    683 ; AVX2:       # BB#0:
    684 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    685 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    686 ; AVX2-NEXT:    retq
    687   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 5, i32 4>
    688   ret <4 x i64> %shuffle
    689 }
    690 
    691 define <4 x i64> @shuffle_v4i64_3254(<4 x i64> %a, <4 x i64> %b) {
    692 ; AVX1-LABEL: shuffle_v4i64_3254:
    693 ; AVX1:       # BB#0:
    694 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    695 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    696 ; AVX1-NEXT:    retq
    697 ;
    698 ; AVX2-LABEL: shuffle_v4i64_3254:
    699 ; AVX2:       # BB#0:
    700 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
    701 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    702 ; AVX2-NEXT:    retq
    703   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 5, i32 4>
    704   ret <4 x i64> %shuffle
    705 }
    706 
    707 define <4 x i64> @shuffle_v4i64_3276(<4 x i64> %a, <4 x i64> %b) {
    708 ; AVX1-LABEL: shuffle_v4i64_3276:
    709 ; AVX1:       # BB#0:
    710 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    711 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    712 ; AVX1-NEXT:    retq
    713 ;
    714 ; AVX2-LABEL: shuffle_v4i64_3276:
    715 ; AVX2:       # BB#0:
    716 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    717 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    718 ; AVX2-NEXT:    retq
    719   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 3, i32 2, i32 7, i32 6>
    720   ret <4 x i64> %shuffle
    721 }
    722 
    723 define <4 x i64> @shuffle_v4i64_1076(<4 x i64> %a, <4 x i64> %b) {
    724 ; AVX1-LABEL: shuffle_v4i64_1076:
    725 ; AVX1:       # BB#0:
    726 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    727 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
    728 ; AVX1-NEXT:    retq
    729 ;
    730 ; AVX2-LABEL: shuffle_v4i64_1076:
    731 ; AVX2:       # BB#0:
    732 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
    733 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,3,0,1,6,7,4,5]
    734 ; AVX2-NEXT:    retq
    735   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 1, i32 0, i32 7, i32 6>
    736   ret <4 x i64> %shuffle
    737 }
    738 
    739 define <4 x i64> @shuffle_v4i64_0415(<4 x i64> %a, <4 x i64> %b) {
    740 ; AVX1-LABEL: shuffle_v4i64_0415:
    741 ; AVX1:       # BB#0:
    742 ; AVX1-NEXT:    vunpckhpd {{.*#+}} xmm2 = xmm0[1],xmm1[1]
    743 ; AVX1-NEXT:    vunpcklpd {{.*#+}} xmm0 = xmm0[0],xmm1[0]
    744 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    745 ; AVX1-NEXT:    retq
    746 ;
    747 ; AVX2-LABEL: shuffle_v4i64_0415:
    748 ; AVX2:       # BB#0:
    749 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[0,0,2,1]
    750 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
    751 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3],ymm0[4,5],ymm1[6,7]
    752 ; AVX2-NEXT:    retq
    753   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
    754   ret <4 x i64> %shuffle
    755 }
    756 
    757 define <4 x i64> @shuffle_v4i64_z4z6(<4 x i64> %a) {
    758 ; AVX1-LABEL: shuffle_v4i64_z4z6:
    759 ; AVX1:       # BB#0:
    760 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    761 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    762 ; AVX1-NEXT:    retq
    763 ;
    764 ; AVX2-LABEL: shuffle_v4i64_z4z6:
    765 ; AVX2:       # BB#0:
    766 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3,4,5,6,7],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19,20,21,22,23]
    767 ; AVX2-NEXT:    retq
    768   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 0, i32 4, i32 0, i32 6>
    769   ret <4 x i64> %shuffle
    770 }
    771 
    772 define <4 x i64> @shuffle_v4i64_5zuz(<4 x i64> %a) {
    773 ; AVX1-LABEL: shuffle_v4i64_5zuz:
    774 ; AVX1:       # BB#0:
    775 ; AVX1-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    776 ; AVX1-NEXT:    vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
    777 ; AVX1-NEXT:    retq
    778 ;
    779 ; AVX2-LABEL: shuffle_v4i64_5zuz:
    780 ; AVX2:       # BB#0:
    781 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[8,9,10,11,12,13,14,15],zero,zero,zero,zero,zero,zero,zero,zero,ymm0[24,25,26,27,28,29,30,31],zero,zero,zero,zero,zero,zero,zero,zero
    782 ; AVX2-NEXT:    retq
    783   %shuffle = shufflevector <4 x i64> zeroinitializer, <4 x i64> %a, <4 x i32> <i32 5, i32 0, i32 undef, i32 0>
    784   ret <4 x i64> %shuffle
    785 }
    786 
    787 define <4 x i64> @shuffle_v4i64_40u2(<4 x i64> %a, <4 x i64> %b) {
    788 ; AVX1-LABEL: shuffle_v4i64_40u2:
    789 ; AVX1:       # BB#0:
    790 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    791 ; AVX1-NEXT:    retq
    792 ;
    793 ; AVX2-LABEL: shuffle_v4i64_40u2:
    794 ; AVX2:       # BB#0:
    795 ; AVX2-NEXT:    vpunpcklqdq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[2],ymm0[2]
    796 ; AVX2-NEXT:    retq
    797   %shuffle = shufflevector <4 x i64> %a, <4 x i64> %b, <4 x i32> <i32 4, i32 0, i32 undef, i32 2>
    798   ret <4 x i64> %shuffle
    799 }
    800 
    801 define <4 x i64> @stress_test1(<4 x i64> %a, <4 x i64> %b) {
    802 ; ALL-LABEL: stress_test1:
    803 ; ALL:         retq
    804   %c = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 0>
    805   %d = shufflevector <4 x i64> %c, <4 x i64> undef, <4 x i32> <i32 3, i32 undef, i32 2, i32 undef>
    806   %e = shufflevector <4 x i64> %b, <4 x i64> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 undef>
    807   %f = shufflevector <4 x i64> %d, <4 x i64> %e, <4 x i32> <i32 5, i32 1, i32 1, i32 0>
    808 
    809   ret <4 x i64> %f
    810 }
    811 
    812 define <4 x i64> @insert_reg_and_zero_v4i64(i64 %a) {
    813 ; AVX1-LABEL: insert_reg_and_zero_v4i64:
    814 ; AVX1:       # BB#0:
    815 ; AVX1-NEXT:    vmovq %rdi, %xmm0
    816 ; AVX1-NEXT:    retq
    817 ;
    818 ; AVX2-LABEL: insert_reg_and_zero_v4i64:
    819 ; AVX2:       # BB#0:
    820 ; AVX2-NEXT:    vmovq %rdi, %xmm0
    821 ; AVX2-NEXT:    retq
    822   %v = insertelement <4 x i64> undef, i64 %a, i64 0
    823   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    824   ret <4 x i64> %shuffle
    825 }
    826 
    827 define <4 x i64> @insert_mem_and_zero_v4i64(i64* %ptr) {
    828 ; AVX1-LABEL: insert_mem_and_zero_v4i64:
    829 ; AVX1:       # BB#0:
    830 ; AVX1-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    831 ; AVX1-NEXT:    retq
    832 ;
    833 ; AVX2-LABEL: insert_mem_and_zero_v4i64:
    834 ; AVX2:       # BB#0:
    835 ; AVX2-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
    836 ; AVX2-NEXT:    retq
    837   %a = load i64, i64* %ptr
    838   %v = insertelement <4 x i64> undef, i64 %a, i64 0
    839   %shuffle = shufflevector <4 x i64> %v, <4 x i64> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    840   ret <4 x i64> %shuffle
    841 }
    842 
    843 define <4 x double> @insert_reg_and_zero_v4f64(double %a) {
    844 ; ALL-LABEL: insert_reg_and_zero_v4f64:
    845 ; ALL:       # BB#0:
    846 ; ALL-NEXT:    # kill: XMM0<def> XMM0<kill> YMM0<def>
    847 ; ALL-NEXT:    vxorpd %ymm1, %ymm1, %ymm1
    848 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    849 ; ALL-NEXT:    retq
    850   %v = insertelement <4 x double> undef, double %a, i32 0
    851   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    852   ret <4 x double> %shuffle
    853 }
    854 
    855 define <4 x double> @insert_mem_and_zero_v4f64(double* %ptr) {
    856 ; ALL-LABEL: insert_mem_and_zero_v4f64:
    857 ; ALL:       # BB#0:
    858 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    859 ; ALL-NEXT:    retq
    860   %a = load double, double* %ptr
    861   %v = insertelement <4 x double> undef, double %a, i32 0
    862   %shuffle = shufflevector <4 x double> %v, <4 x double> zeroinitializer, <4 x i32> <i32 0, i32 5, i32 6, i32 7>
    863   ret <4 x double> %shuffle
    864 }
    865 
    866 define <4 x double> @splat_mem_v4f64(double* %ptr) {
    867 ; ALL-LABEL: splat_mem_v4f64:
    868 ; ALL:       # BB#0:
    869 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
    870 ; ALL-NEXT:    retq
    871   %a = load double, double* %ptr
    872   %v = insertelement <4 x double> undef, double %a, i32 0
    873   %shuffle = shufflevector <4 x double> %v, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    874   ret <4 x double> %shuffle
    875 }
    876 
    877 define <4 x i64> @splat_mem_v4i64(i64* %ptr) {
    878 ; AVX1-LABEL: splat_mem_v4i64:
    879 ; AVX1:       # BB#0:
    880 ; AVX1-NEXT:    vbroadcastsd (%rdi), %ymm0
    881 ; AVX1-NEXT:    retq
    882 ;
    883 ; AVX2-LABEL: splat_mem_v4i64:
    884 ; AVX2:       # BB#0:
    885 ; AVX2-NEXT:    vbroadcastsd (%rdi), %ymm0
    886 ; AVX2-NEXT:    retq
    887   %a = load i64, i64* %ptr
    888   %v = insertelement <4 x i64> undef, i64 %a, i64 0
    889   %shuffle = shufflevector <4 x i64> %v, <4 x i64> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
    890   ret <4 x i64> %shuffle
    891 }
    892 
    893 define <4 x double> @splat_mem_v4f64_2(double* %p) {
    894 ; ALL-LABEL: splat_mem_v4f64_2:
    895 ; ALL:       # BB#0:
    896 ; ALL-NEXT:    vbroadcastsd (%rdi), %ymm0
    897 ; ALL-NEXT:    retq
    898   %1 = load double, double* %p
    899   %2 = insertelement <2 x double> undef, double %1, i32 0
    900   %3 = shufflevector <2 x double> %2, <2 x double> undef, <4 x i32> zeroinitializer
    901   ret <4 x double> %3
    902 }
    903 
    904 define <4 x double> @splat_v4f64(<2 x double> %r) {
    905 ; AVX1-LABEL: splat_v4f64:
    906 ; AVX1:       # BB#0:
    907 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    908 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    909 ; AVX1-NEXT:    retq
    910 ;
    911 ; AVX2-LABEL: splat_v4f64:
    912 ; AVX2:       # BB#0:
    913 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    914 ; AVX2-NEXT:    retq
    915   %1 = shufflevector <2 x double> %r, <2 x double> undef, <4 x i32> zeroinitializer
    916   ret <4 x double> %1
    917 }
    918 
    919 define <4 x double> @bitcast_v4f64_0426(<4 x double> %a, <4 x double> %b) {
    920 ; AVX1-LABEL: bitcast_v4f64_0426:
    921 ; AVX1:       # BB#0:
    922 ; AVX1-NEXT:    vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
    923 ; AVX1-NEXT:    retq
    924 ;
    925 ; AVX2-LABEL: bitcast_v4f64_0426:
    926 ; AVX2:       # BB#0:
    927 ; AVX2-NEXT:    vpunpcklqdq  {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
    928 ; AVX2-NEXT:    retq
    929   %shuffle64 = shufflevector <4 x double> %a, <4 x double> %b, <4 x i32> <i32 4, i32 0, i32 6, i32 2>
    930   %bitcast32 = bitcast <4 x double> %shuffle64 to <8 x float>
    931   %shuffle32 = shufflevector <8 x float> %bitcast32, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    932   %bitcast16 = bitcast <8 x float> %shuffle32 to <16 x i16>
    933   %shuffle16 = shufflevector <16 x i16> %bitcast16, <16 x i16> undef, <16 x i32> <i32 2, i32 3, i32 0, i32 1, i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9, i32 14, i32 15, i32 12, i32 13>
    934   %bitcast64 = bitcast <16 x i16> %shuffle16 to <4 x double>
    935   ret <4 x double> %bitcast64
    936 }
    937