Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
      8 ; AVX1-LABEL: shuffle_v8f32_00000000:
      9 ; AVX1:       # BB#0:
     10 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
     11 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     12 ; AVX1-NEXT:    retq
     13 ;
     14 ; AVX2-LABEL: shuffle_v8f32_00000000:
     15 ; AVX2:       # BB#0:
     16 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
     17 ; AVX2-NEXT:    retq
     18   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     19   ret <8 x float> %shuffle
     20 }
     21 
     22 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
     23 ; AVX1-LABEL: shuffle_v8f32_00000010:
     24 ; AVX1:       # BB#0:
     25 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     26 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
     27 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     28 ; AVX1-NEXT:    retq
     29 ;
     30 ; AVX2-LABEL: shuffle_v8f32_00000010:
     31 ; AVX2:       # BB#0:
     32 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
     33 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     34 ; AVX2-NEXT:    retq
     35   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     36   ret <8 x float> %shuffle
     37 }
     38 
     39 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
     40 ; AVX1-LABEL: shuffle_v8f32_00000200:
     41 ; AVX1:       # BB#0:
     42 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     43 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
     44 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     45 ; AVX1-NEXT:    retq
     46 ;
     47 ; AVX2-LABEL: shuffle_v8f32_00000200:
     48 ; AVX2:       # BB#0:
     49 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
     50 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     51 ; AVX2-NEXT:    retq
     52   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     53   ret <8 x float> %shuffle
     54 }
     55 
     56 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
     57 ; AVX1-LABEL: shuffle_v8f32_00003000:
     58 ; AVX1:       # BB#0:
     59 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     60 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
     61 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     62 ; AVX1-NEXT:    retq
     63 ;
     64 ; AVX2-LABEL: shuffle_v8f32_00003000:
     65 ; AVX2:       # BB#0:
     66 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
     67 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     68 ; AVX2-NEXT:    retq
     69   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     70   ret <8 x float> %shuffle
     71 }
     72 
     73 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
     74 ; AVX1-LABEL: shuffle_v8f32_00040000:
     75 ; AVX1:       # BB#0:
     76 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     77 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
     78 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
     79 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
     80 ; AVX1-NEXT:    retq
     81 ;
     82 ; AVX2-LABEL: shuffle_v8f32_00040000:
     83 ; AVX2:       # BB#0:
     84 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
     85 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     86 ; AVX2-NEXT:    retq
     87   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     88   ret <8 x float> %shuffle
     89 }
     90 
     91 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
     92 ; AVX1-LABEL: shuffle_v8f32_00500000:
     93 ; AVX1:       # BB#0:
     94 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     95 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
     96 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
     97 ; AVX1-NEXT:    retq
     98 ;
     99 ; AVX2-LABEL: shuffle_v8f32_00500000:
    100 ; AVX2:       # BB#0:
    101 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    102 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    103 ; AVX2-NEXT:    retq
    104   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    105   ret <8 x float> %shuffle
    106 }
    107 
    108 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
    109 ; AVX1-LABEL: shuffle_v8f32_06000000:
    110 ; AVX1:       # BB#0:
    111 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    112 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    113 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    114 ; AVX1-NEXT:    retq
    115 ;
    116 ; AVX2-LABEL: shuffle_v8f32_06000000:
    117 ; AVX2:       # BB#0:
    118 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    119 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    120 ; AVX2-NEXT:    retq
    121   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    122   ret <8 x float> %shuffle
    123 }
    124 
    125 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
    126 ; AVX1-LABEL: shuffle_v8f32_70000000:
    127 ; AVX1:       # BB#0:
    128 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    129 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    130 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
    131 ; AVX1-NEXT:    retq
    132 ;
    133 ; AVX2-LABEL: shuffle_v8f32_70000000:
    134 ; AVX2:       # BB#0:
    135 ; AVX2-NEXT:    movl $7, %eax
    136 ; AVX2-NEXT:    vmovd %eax, %xmm1
    137 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    138 ; AVX2-NEXT:    retq
    139   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    140   ret <8 x float> %shuffle
    141 }
    142 
    143 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
    144 ; ALL-LABEL: shuffle_v8f32_01014545:
    145 ; ALL:       # BB#0:
    146 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    147 ; ALL-NEXT:    retq
    148   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    149   ret <8 x float> %shuffle
    150 }
    151 
    152 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
    153 ; AVX1-LABEL: shuffle_v8f32_00112233:
    154 ; AVX1:       # BB#0:
    155 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
    156 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    157 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    158 ; AVX1-NEXT:    retq
    159 ;
    160 ; AVX2-LABEL: shuffle_v8f32_00112233:
    161 ; AVX2:       # BB#0:
    162 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
    163 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    164 ; AVX2-NEXT:    retq
    165   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    166   ret <8 x float> %shuffle
    167 }
    168 
    169 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
    170 ; AVX1-LABEL: shuffle_v8f32_00001111:
    171 ; AVX1:       # BB#0:
    172 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    173 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    174 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    175 ; AVX1-NEXT:    retq
    176 ;
    177 ; AVX2-LABEL: shuffle_v8f32_00001111:
    178 ; AVX2:       # BB#0:
    179 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
    180 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    181 ; AVX2-NEXT:    retq
    182   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
    183   ret <8 x float> %shuffle
    184 }
    185 
    186 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
    187 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
    188 ; ALL:       # BB#0:
    189 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
    190 ; ALL-NEXT:    retq
    191   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    192   ret <8 x float> %shuffle
    193 }
    194 
    195 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
    196 ; AVX1-LABEL: shuffle_v8f32_08080808:
    197 ; AVX1:       # BB#0:
    198 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
    199 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    200 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    201 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    202 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    203 ; AVX1-NEXT:    retq
    204 ;
    205 ; AVX2-LABEL: shuffle_v8f32_08080808:
    206 ; AVX2:       # BB#0:
    207 ; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
    208 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    209 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    210 ; AVX2-NEXT:    retq
    211   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
    212   ret <8 x float> %shuffle
    213 }
    214 
    215 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
    216 ; ALL-LABEL: shuffle_v8f32_08084c4c:
    217 ; ALL:       # BB#0:
    218 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
    219 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
    220 ; ALL-NEXT:    retq
    221   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
    222   ret <8 x float> %shuffle
    223 }
    224 
    225 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
    226 ; ALL-LABEL: shuffle_v8f32_8823cc67:
    227 ; ALL:       # BB#0:
    228 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
    229 ; ALL-NEXT:    retq
    230   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
    231   ret <8 x float> %shuffle
    232 }
    233 
    234 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
    235 ; ALL-LABEL: shuffle_v8f32_9832dc76:
    236 ; ALL:       # BB#0:
    237 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
    238 ; ALL-NEXT:    retq
    239   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
    240   ret <8 x float> %shuffle
    241 }
    242 
    243 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
    244 ; ALL-LABEL: shuffle_v8f32_9810dc54:
    245 ; ALL:       # BB#0:
    246 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
    247 ; ALL-NEXT:    retq
    248   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
    249   ret <8 x float> %shuffle
    250 }
    251 
    252 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
    253 ; ALL-LABEL: shuffle_v8f32_08194c5d:
    254 ; ALL:       # BB#0:
    255 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    256 ; ALL-NEXT:    retq
    257   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
    258   ret <8 x float> %shuffle
    259 }
    260 
    261 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
    262 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
    263 ; ALL:       # BB#0:
    264 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
    265 ; ALL-NEXT:    retq
    266   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
    267   ret <8 x float> %shuffle
    268 }
    269 
    270 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
    271 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
    272 ; AVX1:       # BB#0:
    273 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    274 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    275 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    276 ; AVX1-NEXT:    retq
    277 ;
    278 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
    279 ; AVX2:       # BB#0:
    280 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
    281 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    282 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    283 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    284 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    285 ; AVX2-NEXT:    retq
    286   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    287   ret <8 x float> %shuffle
    288 }
    289 
    290 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
    291 ; AVX1-LABEL: shuffle_v8f32_08991abb:
    292 ; AVX1:       # BB#0:
    293 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
    294 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
    295 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    296 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
    297 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    298 ; AVX1-NEXT:    retq
    299 ;
    300 ; AVX2-LABEL: shuffle_v8f32_08991abb:
    301 ; AVX2:       # BB#0:
    302 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    303 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    304 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
    305 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    306 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    307 ; AVX2-NEXT:    retq
    308   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
    309   ret <8 x float> %shuffle
    310 }
    311 
    312 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
    313 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
    314 ; AVX1:       # BB#0:
    315 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    316 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
    317 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    318 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    319 ; AVX1-NEXT:    retq
    320 ;
    321 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
    322 ; AVX2:       # BB#0:
    323 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    324 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    325 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    326 ; AVX2-NEXT:    retq
    327   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    328   ret <8 x float> %shuffle
    329 }
    330 
    331 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
    332 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
    333 ; AVX1:       # BB#0:
    334 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    335 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    336 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    337 ; AVX1-NEXT:    retq
    338 ;
    339 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
    340 ; AVX2:       # BB#0:
    341 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    342 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    343 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    344 ; AVX2-NEXT:    retq
    345   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
    346   ret <8 x float> %shuffle
    347 }
    348 
    349 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
    350 ; ALL-LABEL: shuffle_v8f32_00014445:
    351 ; ALL:       # BB#0:
    352 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
    353 ; ALL-NEXT:    retq
    354   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
    355   ret <8 x float> %shuffle
    356 }
    357 
    358 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
    359 ; ALL-LABEL: shuffle_v8f32_00204464:
    360 ; ALL:       # BB#0:
    361 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
    362 ; ALL-NEXT:    retq
    363   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
    364   ret <8 x float> %shuffle
    365 }
    366 
    367 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
    368 ; ALL-LABEL: shuffle_v8f32_03004744:
    369 ; ALL:       # BB#0:
    370 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
    371 ; ALL-NEXT:    retq
    372   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
    373   ret <8 x float> %shuffle
    374 }
    375 
    376 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
    377 ; ALL-LABEL: shuffle_v8f32_10005444:
    378 ; ALL:       # BB#0:
    379 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
    380 ; ALL-NEXT:    retq
    381   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
    382   ret <8 x float> %shuffle
    383 }
    384 
    385 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
    386 ; ALL-LABEL: shuffle_v8f32_22006644:
    387 ; ALL:       # BB#0:
    388 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
    389 ; ALL-NEXT:    retq
    390   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
    391   ret <8 x float> %shuffle
    392 }
    393 
    394 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
    395 ; ALL-LABEL: shuffle_v8f32_33307774:
    396 ; ALL:       # BB#0:
    397 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
    398 ; ALL-NEXT:    retq
    399   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
    400   ret <8 x float> %shuffle
    401 }
    402 
    403 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
    404 ; ALL-LABEL: shuffle_v8f32_32107654:
    405 ; ALL:       # BB#0:
    406 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    407 ; ALL-NEXT:    retq
    408   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    409   ret <8 x float> %shuffle
    410 }
    411 
    412 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
    413 ; ALL-LABEL: shuffle_v8f32_00234467:
    414 ; ALL:       # BB#0:
    415 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
    416 ; ALL-NEXT:    retq
    417   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
    418   ret <8 x float> %shuffle
    419 }
    420 
    421 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
    422 ; ALL-LABEL: shuffle_v8f32_00224466:
    423 ; ALL:       # BB#0:
    424 ; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
    425 ; ALL-NEXT:    retq
    426   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    427   ret <8 x float> %shuffle
    428 }
    429 
    430 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
    431 ; ALL-LABEL: shuffle_v8f32_10325476:
    432 ; ALL:       # BB#0:
    433 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
    434 ; ALL-NEXT:    retq
    435   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    436   ret <8 x float> %shuffle
    437 }
    438 
    439 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
    440 ; ALL-LABEL: shuffle_v8f32_11335577:
    441 ; ALL:       # BB#0:
    442 ; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
    443 ; ALL-NEXT:    retq
    444   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    445   ret <8 x float> %shuffle
    446 }
    447 
    448 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
    449 ; ALL-LABEL: shuffle_v8f32_10235467:
    450 ; ALL:       # BB#0:
    451 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
    452 ; ALL-NEXT:    retq
    453   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    454   ret <8 x float> %shuffle
    455 }
    456 
    457 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
    458 ; ALL-LABEL: shuffle_v8f32_10225466:
    459 ; ALL:       # BB#0:
    460 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
    461 ; ALL-NEXT:    retq
    462   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
    463   ret <8 x float> %shuffle
    464 }
    465 
    466 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
    467 ; ALL-LABEL: shuffle_v8f32_00015444:
    468 ; ALL:       # BB#0:
    469 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
    470 ; ALL-NEXT:    retq
    471   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
    472   ret <8 x float> %shuffle
    473 }
    474 
    475 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
    476 ; ALL-LABEL: shuffle_v8f32_00204644:
    477 ; ALL:       # BB#0:
    478 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
    479 ; ALL-NEXT:    retq
    480   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
    481   ret <8 x float> %shuffle
    482 }
    483 
    484 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
    485 ; ALL-LABEL: shuffle_v8f32_03004474:
    486 ; ALL:       # BB#0:
    487 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
    488 ; ALL-NEXT:    retq
    489   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
    490   ret <8 x float> %shuffle
    491 }
    492 
    493 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
    494 ; ALL-LABEL: shuffle_v8f32_10004444:
    495 ; ALL:       # BB#0:
    496 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
    497 ; ALL-NEXT:    retq
    498   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    499   ret <8 x float> %shuffle
    500 }
    501 
    502 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
    503 ; ALL-LABEL: shuffle_v8f32_22006446:
    504 ; ALL:       # BB#0:
    505 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
    506 ; ALL-NEXT:    retq
    507   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
    508   ret <8 x float> %shuffle
    509 }
    510 
    511 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
    512 ; ALL-LABEL: shuffle_v8f32_33307474:
    513 ; ALL:       # BB#0:
    514 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
    515 ; ALL-NEXT:    retq
    516   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
    517   ret <8 x float> %shuffle
    518 }
    519 
    520 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
    521 ; ALL-LABEL: shuffle_v8f32_32104567:
    522 ; ALL:       # BB#0:
    523 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
    524 ; ALL-NEXT:    retq
    525   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
    526   ret <8 x float> %shuffle
    527 }
    528 
    529 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
    530 ; ALL-LABEL: shuffle_v8f32_00236744:
    531 ; ALL:       # BB#0:
    532 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
    533 ; ALL-NEXT:    retq
    534   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
    535   ret <8 x float> %shuffle
    536 }
    537 
    538 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
    539 ; ALL-LABEL: shuffle_v8f32_00226644:
    540 ; ALL:       # BB#0:
    541 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
    542 ; ALL-NEXT:    retq
    543   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
    544   ret <8 x float> %shuffle
    545 }
    546 
    547 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
    548 ; ALL-LABEL: shuffle_v8f32_10324567:
    549 ; ALL:       # BB#0:
    550 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
    551 ; ALL-NEXT:    retq
    552   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
    553   ret <8 x float> %shuffle
    554 }
    555 
    556 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
    557 ; ALL-LABEL: shuffle_v8f32_11334567:
    558 ; ALL:       # BB#0:
    559 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
    560 ; ALL-NEXT:    retq
    561   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
    562   ret <8 x float> %shuffle
    563 }
    564 
    565 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
    566 ; ALL-LABEL: shuffle_v8f32_01235467:
    567 ; ALL:       # BB#0:
    568 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
    569 ; ALL-NEXT:    retq
    570   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    571   ret <8 x float> %shuffle
    572 }
    573 
    574 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
    575 ; ALL-LABEL: shuffle_v8f32_01235466:
    576 ; ALL:       # BB#0:
    577 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
    578 ; ALL-NEXT:    retq
    579   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
    580   ret <8 x float> %shuffle
    581 }
    582 
    583 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
    584 ; ALL-LABEL: shuffle_v8f32_002u6u44:
    585 ; ALL:       # BB#0:
    586 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
    587 ; ALL-NEXT:    retq
    588   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
    589   ret <8 x float> %shuffle
    590 }
    591 
    592 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
    593 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
    594 ; ALL:       # BB#0:
    595 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
    596 ; ALL-NEXT:    retq
    597   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
    598   ret <8 x float> %shuffle
    599 }
    600 
    601 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
    602 ; ALL-LABEL: shuffle_v8f32_103245uu:
    603 ; ALL:       # BB#0:
    604 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
    605 ; ALL-NEXT:    retq
    606   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
    607   ret <8 x float> %shuffle
    608 }
    609 
    610 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
    611 ; ALL-LABEL: shuffle_v8f32_1133uu67:
    612 ; ALL:       # BB#0:
    613 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
    614 ; ALL-NEXT:    retq
    615   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
    616   ret <8 x float> %shuffle
    617 }
    618 
    619 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
    620 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
    621 ; ALL:       # BB#0:
    622 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
    623 ; ALL-NEXT:    retq
    624   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
    625   ret <8 x float> %shuffle
    626 }
    627 
    628 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
    629 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
    630 ; ALL:       # BB#0:
    631 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
    632 ; ALL-NEXT:    retq
    633   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
    634   ret <8 x float> %shuffle
    635 }
    636 
    637 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
    638 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
    639 ; AVX1:       # BB#0:
    640 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    641 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
    642 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
    643 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
    644 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
    645 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    646 ; AVX1-NEXT:    retq
    647 ;
    648 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
    649 ; AVX2:       # BB#0:
    650 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
    651 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    652 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
    653 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    654 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    655 ; AVX2-NEXT:    retq
    656   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
    657   ret <8 x float> %shuffle
    658 }
    659 
    660 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
    661 ; AVX1-LABEL: shuffle_v8f32_f511235a:
    662 ; AVX1:       # BB#0:
    663 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    664 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
    665 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
    666 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
    667 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
    668 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
    669 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    670 ; AVX1-NEXT:    retq
    671 ;
    672 ; AVX2-LABEL: shuffle_v8f32_f511235a:
    673 ; AVX2:       # BB#0:
    674 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
    675 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    676 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
    677 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    678 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    679 ; AVX2-NEXT:    retq
    680   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
    681   ret <8 x float> %shuffle
    682 }
    683 
    684 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
    685 ; AVX1-LABEL: shuffle_v8f32_32103210:
    686 ; AVX1:       # BB#0:
    687 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    688 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    689 ; AVX1-NEXT:    retq
    690 ;
    691 ; AVX2-LABEL: shuffle_v8f32_32103210:
    692 ; AVX2:       # BB#0:
    693 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
    694 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    695 ; AVX2-NEXT:    retq
    696   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
    697   ret <8 x float> %shuffle
    698 }
    699 
    700 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
    701 ; AVX1-LABEL: shuffle_v8f32_76547654:
    702 ; AVX1:       # BB#0:
    703 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    704 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    705 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    706 ; AVX1-NEXT:    retq
    707 ;
    708 ; AVX2-LABEL: shuffle_v8f32_76547654:
    709 ; AVX2:       # BB#0:
    710 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
    711 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    712 ; AVX2-NEXT:    retq
    713   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
    714   ret <8 x float> %shuffle
    715 }
    716 
    717 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
    718 ; AVX1-LABEL: shuffle_v8f32_76543210:
    719 ; AVX1:       # BB#0:
    720 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    721 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    722 ; AVX1-NEXT:    retq
    723 ;
    724 ; AVX2-LABEL: shuffle_v8f32_76543210:
    725 ; AVX2:       # BB#0:
    726 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
    727 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    728 ; AVX2-NEXT:    retq
    729   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    730   ret <8 x float> %shuffle
    731 }
    732 
    733 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
    734 ; ALL-LABEL: shuffle_v8f32_3210ba98:
    735 ; ALL:       # BB#0:
    736 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    737 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    738 ; ALL-NEXT:    retq
    739   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
    740   ret <8 x float> %shuffle
    741 }
    742 
    743 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
    744 ; ALL-LABEL: shuffle_v8f32_3210fedc:
    745 ; ALL:       # BB#0:
    746 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    747 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    748 ; ALL-NEXT:    retq
    749   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
    750   ret <8 x float> %shuffle
    751 }
    752 
    753 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
    754 ; ALL-LABEL: shuffle_v8f32_7654fedc:
    755 ; ALL:       # BB#0:
    756 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    757 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    758 ; ALL-NEXT:    retq
    759   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
    760   ret <8 x float> %shuffle
    761 }
    762 
    763 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
    764 ; ALL-LABEL: shuffle_v8f32_fedc7654:
    765 ; ALL:       # BB#0:
    766 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
    767 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    768 ; ALL-NEXT:    retq
    769   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
    770   ret <8 x float> %shuffle
    771 }
    772 
    773 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
    774 ; AVX1-LABEL: PR21138:
    775 ; AVX1:       # BB#0:
    776 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    777 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
    778 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    779 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    780 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
    781 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    782 ; AVX1-NEXT:    retq
    783 ;
    784 ; AVX2-LABEL: PR21138:
    785 ; AVX2:       # BB#0:
    786 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
    787 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    788 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
    789 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    790 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    791 ; AVX2-NEXT:    retq
    792   %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    793   ret <8 x float> %shuffle
    794 }
    795 
    796 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
    797 ; ALL-LABEL: shuffle_v8f32_ba987654:
    798 ; ALL:       # BB#0:
    799 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    800 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    801 ; ALL-NEXT:    retq
    802   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    803   ret <8 x float> %shuffle
    804 }
    805 
    806 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
    807 ; ALL-LABEL: shuffle_v8f32_ba983210:
    808 ; ALL:       # BB#0:
    809 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    810 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    811 ; ALL-NEXT:    retq
    812   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    813   ret <8 x float> %shuffle
    814 }
    815 
    816 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
    817 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
    818 ; ALL:       # BB#0:
    819 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
    820 ; ALL-NEXT:    retq
    821   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
    822   ret <8 x float> %shuffle
    823 }
    824 
    825 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
    826 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
    827 ; ALL:       # BB#0:
    828 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
    829 ; ALL-NEXT:    retq
    830   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
    831   ret <8 x float> %shuffle
    832 }
    833 
    834 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
    835 ; AVX1-LABEL: shuffle_v8f32_uuuu1111:
    836 ; AVX1:       # BB#0:
    837 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    838 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    839 ; AVX1-NEXT:    retq
    840 ;
    841 ; AVX2-LABEL: shuffle_v8f32_uuuu1111:
    842 ; AVX2:       # BB#0:
    843 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
    844 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    845 ; AVX2-NEXT:    retq
    846   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
    847   ret <8 x float> %shuffle
    848 }
    849 
    850 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
    851 ; AVX1-LABEL: shuffle_v8f32_44444444:
    852 ; AVX1:       # BB#0:
    853 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    854 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    855 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    856 ; AVX1-NEXT:    retq
    857 ;
    858 ; AVX2-LABEL: shuffle_v8f32_44444444:
    859 ; AVX2:       # BB#0:
    860 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
    861 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    862 ; AVX2-NEXT:    retq
    863   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    864   ret <8 x float> %shuffle
    865 }
    866 
    867 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
    868 ; AVX1-LABEL: shuffle_v8f32_5555uuuu:
    869 ; AVX1:       # BB#0:
    870 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    871 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    872 ; AVX1-NEXT:    retq
    873 ;
    874 ; AVX2-LABEL: shuffle_v8f32_5555uuuu:
    875 ; AVX2:       # BB#0:
    876 ; AVX2-NEXT:    vbroadcastss {{.*}}(%rip), %ymm1
    877 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    878 ; AVX2-NEXT:    retq
    879   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
    880   ret <8 x float> %shuffle
    881 }
    882 
    883 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
    884 ; AVX1-LABEL: shuffle_v8i32_00000000:
    885 ; AVX1:       # BB#0:
    886 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    887 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    888 ; AVX1-NEXT:    retq
    889 ;
    890 ; AVX2-LABEL: shuffle_v8i32_00000000:
    891 ; AVX2:       # BB#0:
    892 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
    893 ; AVX2-NEXT:    retq
    894   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    895   ret <8 x i32> %shuffle
    896 }
    897 
    898 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
    899 ; AVX1-LABEL: shuffle_v8i32_00000010:
    900 ; AVX1:       # BB#0:
    901 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    902 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
    903 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    904 ; AVX1-NEXT:    retq
    905 ;
    906 ; AVX2-LABEL: shuffle_v8i32_00000010:
    907 ; AVX2:       # BB#0:
    908 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
    909 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    910 ; AVX2-NEXT:    retq
    911   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    912   ret <8 x i32> %shuffle
    913 }
    914 
    915 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
    916 ; AVX1-LABEL: shuffle_v8i32_00000200:
    917 ; AVX1:       # BB#0:
    918 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    919 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
    920 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    921 ; AVX1-NEXT:    retq
    922 ;
    923 ; AVX2-LABEL: shuffle_v8i32_00000200:
    924 ; AVX2:       # BB#0:
    925 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
    926 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    927 ; AVX2-NEXT:    retq
    928   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    929   ret <8 x i32> %shuffle
    930 }
    931 
    932 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
    933 ; AVX1-LABEL: shuffle_v8i32_00003000:
    934 ; AVX1:       # BB#0:
    935 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    936 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
    937 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    938 ; AVX1-NEXT:    retq
    939 ;
    940 ; AVX2-LABEL: shuffle_v8i32_00003000:
    941 ; AVX2:       # BB#0:
    942 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
    943 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    944 ; AVX2-NEXT:    retq
    945   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    946   ret <8 x i32> %shuffle
    947 }
    948 
    949 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
    950 ; AVX1-LABEL: shuffle_v8i32_00040000:
    951 ; AVX1:       # BB#0:
    952 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    953 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
    954 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
    955 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
    956 ; AVX1-NEXT:    retq
    957 ;
    958 ; AVX2-LABEL: shuffle_v8i32_00040000:
    959 ; AVX2:       # BB#0:
    960 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
    961 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    962 ; AVX2-NEXT:    retq
    963   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    964   ret <8 x i32> %shuffle
    965 }
    966 
    967 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
    968 ; AVX1-LABEL: shuffle_v8i32_00500000:
    969 ; AVX1:       # BB#0:
    970 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    971 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    972 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
    973 ; AVX1-NEXT:    retq
    974 ;
    975 ; AVX2-LABEL: shuffle_v8i32_00500000:
    976 ; AVX2:       # BB#0:
    977 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    978 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    979 ; AVX2-NEXT:    retq
    980   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    981   ret <8 x i32> %shuffle
    982 }
    983 
    984 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
    985 ; AVX1-LABEL: shuffle_v8i32_06000000:
    986 ; AVX1:       # BB#0:
    987 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    988 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    989 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    990 ; AVX1-NEXT:    retq
    991 ;
    992 ; AVX2-LABEL: shuffle_v8i32_06000000:
    993 ; AVX2:       # BB#0:
    994 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    995 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    996 ; AVX2-NEXT:    retq
    997   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    998   ret <8 x i32> %shuffle
    999 }
   1000 
   1001 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
   1002 ; AVX1-LABEL: shuffle_v8i32_70000000:
   1003 ; AVX1:       # BB#0:
   1004 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   1005 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
   1006 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
   1007 ; AVX1-NEXT:    retq
   1008 ;
   1009 ; AVX2-LABEL: shuffle_v8i32_70000000:
   1010 ; AVX2:       # BB#0:
   1011 ; AVX2-NEXT:    movl $7, %eax
   1012 ; AVX2-NEXT:    vmovd %eax, %xmm1
   1013 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1014 ; AVX2-NEXT:    retq
   1015   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1016   ret <8 x i32> %shuffle
   1017 }
   1018 
   1019 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
   1020 ; AVX1-LABEL: shuffle_v8i32_01014545:
   1021 ; AVX1:       # BB#0:
   1022 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
   1023 ; AVX1-NEXT:    retq
   1024 ;
   1025 ; AVX2-LABEL: shuffle_v8i32_01014545:
   1026 ; AVX2:       # BB#0:
   1027 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1028 ; AVX2-NEXT:    retq
   1029   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   1030   ret <8 x i32> %shuffle
   1031 }
   1032 
   1033 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
   1034 ; AVX1-LABEL: shuffle_v8i32_00112233:
   1035 ; AVX1:       # BB#0:
   1036 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
   1037 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1038 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1039 ; AVX1-NEXT:    retq
   1040 ;
   1041 ; AVX2-LABEL: shuffle_v8i32_00112233:
   1042 ; AVX2:       # BB#0:
   1043 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
   1044 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1045 ; AVX2-NEXT:    retq
   1046   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   1047   ret <8 x i32> %shuffle
   1048 }
   1049 
   1050 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
   1051 ; AVX1-LABEL: shuffle_v8i32_00001111:
   1052 ; AVX1:       # BB#0:
   1053 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1054 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1055 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1056 ; AVX1-NEXT:    retq
   1057 ;
   1058 ; AVX2-LABEL: shuffle_v8i32_00001111:
   1059 ; AVX2:       # BB#0:
   1060 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
   1061 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1062 ; AVX2-NEXT:    retq
   1063   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   1064   ret <8 x i32> %shuffle
   1065 }
   1066 
   1067 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
   1068 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
   1069 ; AVX1:       # BB#0:
   1070 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1071 ; AVX1-NEXT:    retq
   1072 ;
   1073 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
   1074 ; AVX2:       # BB#0:
   1075 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1076 ; AVX2-NEXT:    retq
   1077   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   1078   ret <8 x i32> %shuffle
   1079 }
   1080 
   1081 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
   1082 ; AVX1-LABEL: shuffle_v8i32_08080808:
   1083 ; AVX1:       # BB#0:
   1084 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
   1085 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
   1086 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1087 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1088 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1089 ; AVX1-NEXT:    retq
   1090 ;
   1091 ; AVX2-LABEL: shuffle_v8i32_08080808:
   1092 ; AVX2:       # BB#0:
   1093 ; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
   1094 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
   1095 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1096 ; AVX2-NEXT:    retq
   1097   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   1098   ret <8 x i32> %shuffle
   1099 }
   1100 
   1101 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
   1102 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
   1103 ; AVX1:       # BB#0:
   1104 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
   1105 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1106 ; AVX1-NEXT:    retq
   1107 ;
   1108 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
   1109 ; AVX2:       # BB#0:
   1110 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
   1111 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1112 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1113 ; AVX2-NEXT:    retq
   1114   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   1115   ret <8 x i32> %shuffle
   1116 }
   1117 
   1118 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
   1119 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
   1120 ; AVX1:       # BB#0:
   1121 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
   1122 ; AVX1-NEXT:    retq
   1123 ;
   1124 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
   1125 ; AVX2:       # BB#0:
   1126 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
   1127 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1128 ; AVX2-NEXT:    retq
   1129   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   1130   ret <8 x i32> %shuffle
   1131 }
   1132 
   1133 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
   1134 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
   1135 ; AVX1:       # BB#0:
   1136 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
   1137 ; AVX1-NEXT:    retq
   1138 ;
   1139 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
   1140 ; AVX2:       # BB#0:
   1141 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1142 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1143 ; AVX2-NEXT:    retq
   1144   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   1145   ret <8 x i32> %shuffle
   1146 }
   1147 
   1148 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
   1149 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
   1150 ; AVX1:       # BB#0:
   1151 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
   1152 ; AVX1-NEXT:    retq
   1153 ;
   1154 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
   1155 ; AVX2:       # BB#0:
   1156 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
   1157 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
   1158 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1159 ; AVX2-NEXT:    retq
   1160   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   1161   ret <8 x i32> %shuffle
   1162 }
   1163 
   1164 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
   1165 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
   1166 ; AVX1:       # BB#0:
   1167 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1168 ; AVX1-NEXT:    retq
   1169 ;
   1170 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
   1171 ; AVX2:       # BB#0:
   1172 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1173 ; AVX2-NEXT:    retq
   1174   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   1175   ret <8 x i32> %shuffle
   1176 }
   1177 
   1178 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
   1179 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
   1180 ; AVX1:       # BB#0:
   1181 ; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1182 ; AVX1-NEXT:    retq
   1183 ;
   1184 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
   1185 ; AVX2:       # BB#0:
   1186 ; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1187 ; AVX2-NEXT:    retq
   1188   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   1189   ret <8 x i32> %shuffle
   1190 }
   1191 
   1192 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
   1193 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
   1194 ; AVX1:       # BB#0:
   1195 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1196 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1197 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1198 ; AVX1-NEXT:    retq
   1199 ;
   1200 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
   1201 ; AVX2:       # BB#0:
   1202 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
   1203 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1204 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1205 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1206 ; AVX2-NEXT:    retq
   1207   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1208   ret <8 x i32> %shuffle
   1209 }
   1210 
   1211 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
   1212 ; AVX1-LABEL: shuffle_v8i32_08991abb:
   1213 ; AVX1:       # BB#0:
   1214 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
   1215 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
   1216 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1217 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1218 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1219 ; AVX1-NEXT:    retq
   1220 ;
   1221 ; AVX2-LABEL: shuffle_v8i32_08991abb:
   1222 ; AVX2:       # BB#0:
   1223 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1224 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1225 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
   1226 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1227 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1228 ; AVX2-NEXT:    retq
   1229   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   1230   ret <8 x i32> %shuffle
   1231 }
   1232 
   1233 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
   1234 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
   1235 ; AVX1:       # BB#0:
   1236 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
   1237 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
   1238 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1239 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1240 ; AVX1-NEXT:    retq
   1241 ;
   1242 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
   1243 ; AVX2:       # BB#0:
   1244 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1245 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1246 ; AVX2-NEXT:    retq
   1247   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1248   ret <8 x i32> %shuffle
   1249 }
   1250 
   1251 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
   1252 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
   1253 ; AVX1:       # BB#0:
   1254 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
   1255 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1256 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1257 ; AVX1-NEXT:    retq
   1258 ;
   1259 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
   1260 ; AVX2:       # BB#0:
   1261 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1262 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1263 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1264 ; AVX2-NEXT:    retq
   1265   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1266   ret <8 x i32> %shuffle
   1267 }
   1268 
   1269 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
   1270 ; AVX1-LABEL: shuffle_v8i32_00014445:
   1271 ; AVX1:       # BB#0:
   1272 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1273 ; AVX1-NEXT:    retq
   1274 ;
   1275 ; AVX2-LABEL: shuffle_v8i32_00014445:
   1276 ; AVX2:       # BB#0:
   1277 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1278 ; AVX2-NEXT:    retq
   1279   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   1280   ret <8 x i32> %shuffle
   1281 }
   1282 
   1283 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
   1284 ; AVX1-LABEL: shuffle_v8i32_00204464:
   1285 ; AVX1:       # BB#0:
   1286 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1287 ; AVX1-NEXT:    retq
   1288 ;
   1289 ; AVX2-LABEL: shuffle_v8i32_00204464:
   1290 ; AVX2:       # BB#0:
   1291 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1292 ; AVX2-NEXT:    retq
   1293   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   1294   ret <8 x i32> %shuffle
   1295 }
   1296 
   1297 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
   1298 ; AVX1-LABEL: shuffle_v8i32_03004744:
   1299 ; AVX1:       # BB#0:
   1300 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1301 ; AVX1-NEXT:    retq
   1302 ;
   1303 ; AVX2-LABEL: shuffle_v8i32_03004744:
   1304 ; AVX2:       # BB#0:
   1305 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1306 ; AVX2-NEXT:    retq
   1307   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   1308   ret <8 x i32> %shuffle
   1309 }
   1310 
   1311 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
   1312 ; AVX1-LABEL: shuffle_v8i32_10005444:
   1313 ; AVX1:       # BB#0:
   1314 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1315 ; AVX1-NEXT:    retq
   1316 ;
   1317 ; AVX2-LABEL: shuffle_v8i32_10005444:
   1318 ; AVX2:       # BB#0:
   1319 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1320 ; AVX2-NEXT:    retq
   1321   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   1322   ret <8 x i32> %shuffle
   1323 }
   1324 
   1325 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
   1326 ; AVX1-LABEL: shuffle_v8i32_22006644:
   1327 ; AVX1:       # BB#0:
   1328 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1329 ; AVX1-NEXT:    retq
   1330 ;
   1331 ; AVX2-LABEL: shuffle_v8i32_22006644:
   1332 ; AVX2:       # BB#0:
   1333 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1334 ; AVX2-NEXT:    retq
   1335   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   1336   ret <8 x i32> %shuffle
   1337 }
   1338 
   1339 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
   1340 ; AVX1-LABEL: shuffle_v8i32_33307774:
   1341 ; AVX1:       # BB#0:
   1342 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1343 ; AVX1-NEXT:    retq
   1344 ;
   1345 ; AVX2-LABEL: shuffle_v8i32_33307774:
   1346 ; AVX2:       # BB#0:
   1347 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1348 ; AVX2-NEXT:    retq
   1349   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   1350   ret <8 x i32> %shuffle
   1351 }
   1352 
   1353 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
   1354 ; AVX1-LABEL: shuffle_v8i32_32107654:
   1355 ; AVX1:       # BB#0:
   1356 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1357 ; AVX1-NEXT:    retq
   1358 ;
   1359 ; AVX2-LABEL: shuffle_v8i32_32107654:
   1360 ; AVX2:       # BB#0:
   1361 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1362 ; AVX2-NEXT:    retq
   1363   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1364   ret <8 x i32> %shuffle
   1365 }
   1366 
   1367 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
   1368 ; AVX1-LABEL: shuffle_v8i32_00234467:
   1369 ; AVX1:       # BB#0:
   1370 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1371 ; AVX1-NEXT:    retq
   1372 ;
   1373 ; AVX2-LABEL: shuffle_v8i32_00234467:
   1374 ; AVX2:       # BB#0:
   1375 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1376 ; AVX2-NEXT:    retq
   1377   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   1378   ret <8 x i32> %shuffle
   1379 }
   1380 
   1381 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
   1382 ; AVX1-LABEL: shuffle_v8i32_00224466:
   1383 ; AVX1:       # BB#0:
   1384 ; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1385 ; AVX1-NEXT:    retq
   1386 ;
   1387 ; AVX2-LABEL: shuffle_v8i32_00224466:
   1388 ; AVX2:       # BB#0:
   1389 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1390 ; AVX2-NEXT:    retq
   1391   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   1392   ret <8 x i32> %shuffle
   1393 }
   1394 
   1395 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
   1396 ; AVX1-LABEL: shuffle_v8i32_10325476:
   1397 ; AVX1:       # BB#0:
   1398 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1399 ; AVX1-NEXT:    retq
   1400 ;
   1401 ; AVX2-LABEL: shuffle_v8i32_10325476:
   1402 ; AVX2:       # BB#0:
   1403 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1404 ; AVX2-NEXT:    retq
   1405   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1406   ret <8 x i32> %shuffle
   1407 }
   1408 
   1409 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
   1410 ; AVX1-LABEL: shuffle_v8i32_11335577:
   1411 ; AVX1:       # BB#0:
   1412 ; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1413 ; AVX1-NEXT:    retq
   1414 ;
   1415 ; AVX2-LABEL: shuffle_v8i32_11335577:
   1416 ; AVX2:       # BB#0:
   1417 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1418 ; AVX2-NEXT:    retq
   1419   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   1420   ret <8 x i32> %shuffle
   1421 }
   1422 
   1423 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
   1424 ; AVX1-LABEL: shuffle_v8i32_10235467:
   1425 ; AVX1:       # BB#0:
   1426 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1427 ; AVX1-NEXT:    retq
   1428 ;
   1429 ; AVX2-LABEL: shuffle_v8i32_10235467:
   1430 ; AVX2:       # BB#0:
   1431 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1432 ; AVX2-NEXT:    retq
   1433   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1434   ret <8 x i32> %shuffle
   1435 }
   1436 
   1437 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
   1438 ; AVX1-LABEL: shuffle_v8i32_10225466:
   1439 ; AVX1:       # BB#0:
   1440 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1441 ; AVX1-NEXT:    retq
   1442 ;
   1443 ; AVX2-LABEL: shuffle_v8i32_10225466:
   1444 ; AVX2:       # BB#0:
   1445 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1446 ; AVX2-NEXT:    retq
   1447   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   1448   ret <8 x i32> %shuffle
   1449 }
   1450 
   1451 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
   1452 ; AVX1-LABEL: shuffle_v8i32_00015444:
   1453 ; AVX1:       # BB#0:
   1454 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
   1455 ; AVX1-NEXT:    retq
   1456 ;
   1457 ; AVX2-LABEL: shuffle_v8i32_00015444:
   1458 ; AVX2:       # BB#0:
   1459 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
   1460 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1461 ; AVX2-NEXT:    retq
   1462   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   1463   ret <8 x i32> %shuffle
   1464 }
   1465 
   1466 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
   1467 ; AVX1-LABEL: shuffle_v8i32_00204644:
   1468 ; AVX1:       # BB#0:
   1469 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
   1470 ; AVX1-NEXT:    retq
   1471 ;
   1472 ; AVX2-LABEL: shuffle_v8i32_00204644:
   1473 ; AVX2:       # BB#0:
   1474 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
   1475 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1476 ; AVX2-NEXT:    retq
   1477   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   1478   ret <8 x i32> %shuffle
   1479 }
   1480 
   1481 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
   1482 ; AVX1-LABEL: shuffle_v8i32_03004474:
   1483 ; AVX1:       # BB#0:
   1484 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
   1485 ; AVX1-NEXT:    retq
   1486 ;
   1487 ; AVX2-LABEL: shuffle_v8i32_03004474:
   1488 ; AVX2:       # BB#0:
   1489 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
   1490 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1491 ; AVX2-NEXT:    retq
   1492   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   1493   ret <8 x i32> %shuffle
   1494 }
   1495 
   1496 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
   1497 ; AVX1-LABEL: shuffle_v8i32_10004444:
   1498 ; AVX1:       # BB#0:
   1499 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
   1500 ; AVX1-NEXT:    retq
   1501 ;
   1502 ; AVX2-LABEL: shuffle_v8i32_10004444:
   1503 ; AVX2:       # BB#0:
   1504 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
   1505 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1506 ; AVX2-NEXT:    retq
   1507   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   1508   ret <8 x i32> %shuffle
   1509 }
   1510 
   1511 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
   1512 ; AVX1-LABEL: shuffle_v8i32_22006446:
   1513 ; AVX1:       # BB#0:
   1514 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
   1515 ; AVX1-NEXT:    retq
   1516 ;
   1517 ; AVX2-LABEL: shuffle_v8i32_22006446:
   1518 ; AVX2:       # BB#0:
   1519 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
   1520 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1521 ; AVX2-NEXT:    retq
   1522   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   1523   ret <8 x i32> %shuffle
   1524 }
   1525 
   1526 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
   1527 ; AVX1-LABEL: shuffle_v8i32_33307474:
   1528 ; AVX1:       # BB#0:
   1529 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
   1530 ; AVX1-NEXT:    retq
   1531 ;
   1532 ; AVX2-LABEL: shuffle_v8i32_33307474:
   1533 ; AVX2:       # BB#0:
   1534 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
   1535 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1536 ; AVX2-NEXT:    retq
   1537   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   1538   ret <8 x i32> %shuffle
   1539 }
   1540 
   1541 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
   1542 ; AVX1-LABEL: shuffle_v8i32_32104567:
   1543 ; AVX1:       # BB#0:
   1544 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
   1545 ; AVX1-NEXT:    retq
   1546 ;
   1547 ; AVX2-LABEL: shuffle_v8i32_32104567:
   1548 ; AVX2:       # BB#0:
   1549 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
   1550 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1551 ; AVX2-NEXT:    retq
   1552   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   1553   ret <8 x i32> %shuffle
   1554 }
   1555 
   1556 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
   1557 ; AVX1-LABEL: shuffle_v8i32_00236744:
   1558 ; AVX1:       # BB#0:
   1559 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
   1560 ; AVX1-NEXT:    retq
   1561 ;
   1562 ; AVX2-LABEL: shuffle_v8i32_00236744:
   1563 ; AVX2:       # BB#0:
   1564 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
   1565 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1566 ; AVX2-NEXT:    retq
   1567   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   1568   ret <8 x i32> %shuffle
   1569 }
   1570 
   1571 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
   1572 ; AVX1-LABEL: shuffle_v8i32_00226644:
   1573 ; AVX1:       # BB#0:
   1574 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
   1575 ; AVX1-NEXT:    retq
   1576 ;
   1577 ; AVX2-LABEL: shuffle_v8i32_00226644:
   1578 ; AVX2:       # BB#0:
   1579 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
   1580 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1581 ; AVX2-NEXT:    retq
   1582   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   1583   ret <8 x i32> %shuffle
   1584 }
   1585 
   1586 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
   1587 ; AVX1-LABEL: shuffle_v8i32_10324567:
   1588 ; AVX1:       # BB#0:
   1589 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
   1590 ; AVX1-NEXT:    retq
   1591 ;
   1592 ; AVX2-LABEL: shuffle_v8i32_10324567:
   1593 ; AVX2:       # BB#0:
   1594 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
   1595 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1596 ; AVX2-NEXT:    retq
   1597   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   1598   ret <8 x i32> %shuffle
   1599 }
   1600 
   1601 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
   1602 ; AVX1-LABEL: shuffle_v8i32_11334567:
   1603 ; AVX1:       # BB#0:
   1604 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
   1605 ; AVX1-NEXT:    retq
   1606 ;
   1607 ; AVX2-LABEL: shuffle_v8i32_11334567:
   1608 ; AVX2:       # BB#0:
   1609 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
   1610 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1611 ; AVX2-NEXT:    retq
   1612   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   1613   ret <8 x i32> %shuffle
   1614 }
   1615 
   1616 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
   1617 ; AVX1-LABEL: shuffle_v8i32_01235467:
   1618 ; AVX1:       # BB#0:
   1619 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
   1620 ; AVX1-NEXT:    retq
   1621 ;
   1622 ; AVX2-LABEL: shuffle_v8i32_01235467:
   1623 ; AVX2:       # BB#0:
   1624 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
   1625 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1626 ; AVX2-NEXT:    retq
   1627   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1628   ret <8 x i32> %shuffle
   1629 }
   1630 
   1631 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
   1632 ; AVX1-LABEL: shuffle_v8i32_01235466:
   1633 ; AVX1:       # BB#0:
   1634 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
   1635 ; AVX1-NEXT:    retq
   1636 ;
   1637 ; AVX2-LABEL: shuffle_v8i32_01235466:
   1638 ; AVX2:       # BB#0:
   1639 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
   1640 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1641 ; AVX2-NEXT:    retq
   1642   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   1643   ret <8 x i32> %shuffle
   1644 }
   1645 
   1646 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
   1647 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
   1648 ; AVX1:       # BB#0:
   1649 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
   1650 ; AVX1-NEXT:    retq
   1651 ;
   1652 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
   1653 ; AVX2:       # BB#0:
   1654 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
   1655 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1656 ; AVX2-NEXT:    retq
   1657   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   1658   ret <8 x i32> %shuffle
   1659 }
   1660 
   1661 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
   1662 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
   1663 ; AVX1:       # BB#0:
   1664 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
   1665 ; AVX1-NEXT:    retq
   1666 ;
   1667 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
   1668 ; AVX2:       # BB#0:
   1669 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
   1670 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1671 ; AVX2-NEXT:    retq
   1672   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   1673   ret <8 x i32> %shuffle
   1674 }
   1675 
   1676 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
   1677 ; AVX1-LABEL: shuffle_v8i32_103245uu:
   1678 ; AVX1:       # BB#0:
   1679 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
   1680 ; AVX1-NEXT:    retq
   1681 ;
   1682 ; AVX2-LABEL: shuffle_v8i32_103245uu:
   1683 ; AVX2:       # BB#0:
   1684 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
   1685 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1686 ; AVX2-NEXT:    retq
   1687   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   1688   ret <8 x i32> %shuffle
   1689 }
   1690 
   1691 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
   1692 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
   1693 ; AVX1:       # BB#0:
   1694 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
   1695 ; AVX1-NEXT:    retq
   1696 ;
   1697 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
   1698 ; AVX2:       # BB#0:
   1699 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
   1700 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1701 ; AVX2-NEXT:    retq
   1702   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   1703   ret <8 x i32> %shuffle
   1704 }
   1705 
   1706 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
   1707 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
   1708 ; AVX1:       # BB#0:
   1709 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
   1710 ; AVX1-NEXT:    retq
   1711 ;
   1712 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
   1713 ; AVX2:       # BB#0:
   1714 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
   1715 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1716 ; AVX2-NEXT:    retq
   1717   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   1718   ret <8 x i32> %shuffle
   1719 }
   1720 
   1721 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
   1722 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
   1723 ; AVX1:       # BB#0:
   1724 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
   1725 ; AVX1-NEXT:    retq
   1726 ;
   1727 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
   1728 ; AVX2:       # BB#0:
   1729 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
   1730 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1731 ; AVX2-NEXT:    retq
   1732   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   1733   ret <8 x i32> %shuffle
   1734 }
   1735 
   1736 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
   1737 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
   1738 ; AVX1:       # BB#0:
   1739 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
   1740 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
   1741 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1742 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
   1743 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1744 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1745 ; AVX1-NEXT:    retq
   1746 ;
   1747 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
   1748 ; AVX2:       # BB#0:
   1749 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
   1750 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1751 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
   1752 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1753 ; AVX2-NEXT:    retq
   1754   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   1755   ret <8 x i32> %shuffle
   1756 }
   1757 
   1758 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
   1759 ; AVX1-LABEL: shuffle_v8i32_32103210:
   1760 ; AVX1:       # BB#0:
   1761 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1762 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1763 ; AVX1-NEXT:    retq
   1764 ;
   1765 ; AVX2-LABEL: shuffle_v8i32_32103210:
   1766 ; AVX2:       # BB#0:
   1767 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
   1768 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1769 ; AVX2-NEXT:    retq
   1770   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
   1771   ret <8 x i32> %shuffle
   1772 }
   1773 
   1774 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
   1775 ; AVX1-LABEL: shuffle_v8i32_76547654:
   1776 ; AVX1:       # BB#0:
   1777 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1778 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1779 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1780 ; AVX1-NEXT:    retq
   1781 ;
   1782 ; AVX2-LABEL: shuffle_v8i32_76547654:
   1783 ; AVX2:       # BB#0:
   1784 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
   1785 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1786 ; AVX2-NEXT:    retq
   1787   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
   1788   ret <8 x i32> %shuffle
   1789 }
   1790 
   1791 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
   1792 ; AVX1-LABEL: shuffle_v8i32_76543210:
   1793 ; AVX1:       # BB#0:
   1794 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1795 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1796 ; AVX1-NEXT:    retq
   1797 ;
   1798 ; AVX2-LABEL: shuffle_v8i32_76543210:
   1799 ; AVX2:       # BB#0:
   1800 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
   1801 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1802 ; AVX2-NEXT:    retq
   1803   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   1804   ret <8 x i32> %shuffle
   1805 }
   1806 
   1807 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
   1808 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
   1809 ; AVX1:       # BB#0:
   1810 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1811 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1812 ; AVX1-NEXT:    retq
   1813 ;
   1814 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
   1815 ; AVX2:       # BB#0:
   1816 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1817 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1818 ; AVX2-NEXT:    retq
   1819   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
   1820   ret <8 x i32> %shuffle
   1821 }
   1822 
   1823 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
   1824 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
   1825 ; AVX1:       # BB#0:
   1826 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   1827 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1828 ; AVX1-NEXT:    retq
   1829 ;
   1830 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
   1831 ; AVX2:       # BB#0:
   1832 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1833 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1834 ; AVX2-NEXT:    retq
   1835   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
   1836   ret <8 x i32> %shuffle
   1837 }
   1838 
   1839 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
   1840 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
   1841 ; AVX1:       # BB#0:
   1842 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1843 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1844 ; AVX1-NEXT:    retq
   1845 ;
   1846 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
   1847 ; AVX2:       # BB#0:
   1848 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1849 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1850 ; AVX2-NEXT:    retq
   1851   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
   1852   ret <8 x i32> %shuffle
   1853 }
   1854 
   1855 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
   1856 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
   1857 ; AVX1:       # BB#0:
   1858 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1859 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1860 ; AVX1-NEXT:    retq
   1861 ;
   1862 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
   1863 ; AVX2:       # BB#0:
   1864 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1865 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1866 ; AVX2-NEXT:    retq
   1867   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
   1868   ret <8 x i32> %shuffle
   1869 }
   1870 
   1871 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
   1872 ; AVX1-LABEL: shuffle_v8i32_ba987654:
   1873 ; AVX1:       # BB#0:
   1874 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1875 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1876 ; AVX1-NEXT:    retq
   1877 ;
   1878 ; AVX2-LABEL: shuffle_v8i32_ba987654:
   1879 ; AVX2:       # BB#0:
   1880 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1881 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1882 ; AVX2-NEXT:    retq
   1883   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1884   ret <8 x i32> %shuffle
   1885 }
   1886 
   1887 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
   1888 ; AVX1-LABEL: shuffle_v8i32_ba983210:
   1889 ; AVX1:       # BB#0:
   1890 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1891 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1892 ; AVX1-NEXT:    retq
   1893 ;
   1894 ; AVX2-LABEL: shuffle_v8i32_ba983210:
   1895 ; AVX2:       # BB#0:
   1896 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1897 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1898 ; AVX2-NEXT:    retq
   1899   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1900   ret <8 x i32> %shuffle
   1901 }
   1902 
   1903 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
   1904 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
   1905 ; AVX1:       # BB#0:
   1906 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1907 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
   1908 ; AVX1-NEXT:    retq
   1909 ;
   1910 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
   1911 ; AVX2:       # BB#0:
   1912 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
   1913 ; AVX2-NEXT:    retq
   1914   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
   1915   ret <8 x i32> %shuffle
   1916 }
   1917 
   1918 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
   1919 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
   1920 ; AVX1:       # BB#0:
   1921 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1922 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
   1923 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   1924 ; AVX1-NEXT:    retq
   1925 ;
   1926 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
   1927 ; AVX2:       # BB#0:
   1928 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
   1929 ; AVX2-NEXT:    retq
   1930   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
   1931   ret <8 x i32> %shuffle
   1932 }
   1933 
   1934 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
   1935 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
   1936 ; AVX1:       # BB#0:
   1937 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1938 ; AVX1-NEXT:    retq
   1939 ;
   1940 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
   1941 ; AVX2:       # BB#0:
   1942 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1943 ; AVX2-NEXT:    retq
   1944   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
   1945   ret <8 x i32> %shuffle
   1946 }
   1947 
   1948 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
   1949 ; AVX1-LABEL: shuffle_v8i32_uuuu1111:
   1950 ; AVX1:       # BB#0:
   1951 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1952 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1953 ; AVX1-NEXT:    retq
   1954 ;
   1955 ; AVX2-LABEL: shuffle_v8i32_uuuu1111:
   1956 ; AVX2:       # BB#0:
   1957 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
   1958 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1959 ; AVX2-NEXT:    retq
   1960   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
   1961   ret <8 x i32> %shuffle
   1962 }
   1963 
   1964 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
   1965 ; AVX1-LABEL: shuffle_v8i32_44444444:
   1966 ; AVX1:       # BB#0:
   1967 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1968 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1969 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1970 ; AVX1-NEXT:    retq
   1971 ;
   1972 ; AVX2-LABEL: shuffle_v8i32_44444444:
   1973 ; AVX2:       # BB#0:
   1974 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
   1975 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1976 ; AVX2-NEXT:    retq
   1977   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   1978   ret <8 x i32> %shuffle
   1979 }
   1980 
   1981 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
   1982 ; AVX1-LABEL: shuffle_v8i32_5555uuuu:
   1983 ; AVX1:       # BB#0:
   1984 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1985 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1986 ; AVX1-NEXT:    retq
   1987 ;
   1988 ; AVX2-LABEL: shuffle_v8i32_5555uuuu:
   1989 ; AVX2:       # BB#0:
   1990 ; AVX2-NEXT:    vpbroadcastd {{.*}}(%rip), %ymm1
   1991 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1992 ; AVX2-NEXT:    retq
   1993   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   1994   ret <8 x i32> %shuffle
   1995 }
   1996 
   1997 define <8 x float> @splat_mem_v8f32_2(float* %p) {
   1998 ; ALL-LABEL: splat_mem_v8f32_2:
   1999 ; ALL:       # BB#0:
   2000 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2001 ; ALL-NEXT:    retq
   2002   %1 = load float, float* %p
   2003   %2 = insertelement <4 x float> undef, float %1, i32 0
   2004   %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
   2005   ret <8 x float> %3
   2006 }
   2007 
   2008 define <8 x float> @splat_v8f32(<4 x float> %r) {
   2009 ; AVX1-LABEL: splat_v8f32:
   2010 ; AVX1:       # BB#0:
   2011 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2012 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2013 ; AVX1-NEXT:    retq
   2014 ;
   2015 ; AVX2-LABEL: splat_v8f32:
   2016 ; AVX2:       # BB#0:
   2017 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
   2018 ; AVX2-NEXT:    retq
   2019   %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
   2020   ret <8 x float> %1
   2021 }
   2022 
   2023 ;
   2024 ; Shuffle to logical bit shifts
   2025 ;
   2026 
   2027 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
   2028 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
   2029 ; AVX1:       # BB#0:
   2030 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2031 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2032 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
   2033 ; AVX1-NEXT:    retq
   2034 ;
   2035 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
   2036 ; AVX2:       # BB#0:
   2037 ; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
   2038 ; AVX2-NEXT:    retq
   2039   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
   2040   ret <8 x i32> %shuffle
   2041 }
   2042 
   2043 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
   2044 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
   2045 ; AVX1:       # BB#0:
   2046 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2047 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2048 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   2049 ; AVX1-NEXT:    retq
   2050 ;
   2051 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
   2052 ; AVX2:       # BB#0:
   2053 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   2054 ; AVX2-NEXT:    retq
   2055   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
   2056   ret <8 x i32> %shuffle
   2057 }
   2058 
   2059 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
   2060 ; AVX1-LABEL: shuffle_v8i32_B012F456:
   2061 ; AVX1:       # BB#0:
   2062 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
   2063 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
   2064 ; AVX1-NEXT:    retq
   2065 ;
   2066 ; AVX2-LABEL: shuffle_v8i32_B012F456:
   2067 ; AVX2:       # BB#0:
   2068 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
   2069 ; AVX2-NEXT:    retq
   2070   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
   2071   ret <8 x i32> %shuffle
   2072 }
   2073 
   2074 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
   2075 ; AVX1-LABEL: shuffle_v8i32_1238567C:
   2076 ; AVX1:       # BB#0:
   2077 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   2078 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2079 ; AVX1-NEXT:    retq
   2080 ;
   2081 ; AVX2-LABEL: shuffle_v8i32_1238567C:
   2082 ; AVX2:       # BB#0:
   2083 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
   2084 ; AVX2-NEXT:    retq
   2085   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   2086   ret <8 x i32> %shuffle
   2087 }
   2088 
   2089 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
   2090 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
   2091 ; AVX1:       # BB#0:
   2092 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
   2093 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
   2094 ; AVX1-NEXT:    retq
   2095 ;
   2096 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
   2097 ; AVX2:       # BB#0:
   2098 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
   2099 ; AVX2-NEXT:    retq
   2100   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
   2101   ret <8 x i32> %shuffle
   2102 }
   2103 
   2104 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
   2105 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
   2106 ; AVX1:       # BB#0:
   2107 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
   2108 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
   2109 ; AVX1-NEXT:    retq
   2110 ;
   2111 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
   2112 ; AVX2:       # BB#0:
   2113 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
   2114 ; AVX2-NEXT:    retq
   2115   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
   2116   ret <8 x i32> %shuffle
   2117 }
   2118 
   2119 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
   2120 ; AVX1-LABEL: shuffle_v8i32_30127456:
   2121 ; AVX1:       # BB#0:
   2122 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2123 ; AVX1-NEXT:    retq
   2124 ;
   2125 ; AVX2-LABEL: shuffle_v8i32_30127456:
   2126 ; AVX2:       # BB#0:
   2127 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2128 ; AVX2-NEXT:    retq
   2129   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   2130   ret <8 x i32> %shuffle
   2131 }
   2132 
   2133 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
   2134 ; AVX1-LABEL: shuffle_v8i32_12305674:
   2135 ; AVX1:       # BB#0:
   2136 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2137 ; AVX1-NEXT:    retq
   2138 ;
   2139 ; AVX2-LABEL: shuffle_v8i32_12305674:
   2140 ; AVX2:       # BB#0:
   2141 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2142 ; AVX2-NEXT:    retq
   2143   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   2144   ret <8 x i32> %shuffle
   2145 }
   2146 
   2147 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2148 ; ALL-LABEL: concat_v2f32_1:
   2149 ; ALL:       # BB#0: # %entry
   2150 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2151 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2152 ; ALL-NEXT:    retq
   2153 entry:
   2154   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2155   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2156   %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2157   %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2158   %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
   2159   ret <8 x float> %tmp76
   2160 }
   2161 
   2162 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2163 ; ALL-LABEL: concat_v2f32_2:
   2164 ; ALL:       # BB#0: # %entry
   2165 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2166 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2167 ; ALL-NEXT:    retq
   2168 entry:
   2169   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2170   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2171   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2172   ret <8 x float> %tmp76
   2173 }
   2174 
   2175 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2176 ; ALL-LABEL: concat_v2f32_3:
   2177 ; ALL:       # BB#0: # %entry
   2178 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2179 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2180 ; ALL-NEXT:    retq
   2181 entry:
   2182   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2183   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2184   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2185   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2186   ret <8 x float> %res
   2187 }
   2188 
   2189 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
   2190 ; ALL-LABEL: insert_mem_and_zero_v8i32:
   2191 ; ALL:       # BB#0:
   2192 ; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2193 ; ALL-NEXT:    retq
   2194   %a = load i32, i32* %ptr
   2195   %v = insertelement <8 x i32> undef, i32 %a, i32 0
   2196   %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2197   ret <8 x i32> %shuffle
   2198 }
   2199 
   2200 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
   2201 ; AVX1-LABEL: concat_v8i32_0123CDEF:
   2202 ; AVX1:       # BB#0:
   2203 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   2204 ; AVX1-NEXT:    retq
   2205 ;
   2206 ; AVX2-LABEL: concat_v8i32_0123CDEF:
   2207 ; AVX2:       # BB#0:
   2208 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   2209 ; AVX2-NEXT:    retq
   2210   %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2211   %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2212   %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2213   ret <8 x i32> %shuf
   2214 }
   2215 
   2216 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
   2217 ; ALL-LABEL: concat_v8i32_4567CDEF_bc:
   2218 ; ALL:       # BB#0:
   2219 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2220 ; ALL-NEXT:    retq
   2221   %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2222   %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   2223   %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
   2224   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2225   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2226   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
   2227   ret <8 x i32> %shuffle32
   2228 }
   2229 
   2230 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
   2231 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
   2232 ; ALL:       # BB#0:
   2233 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2234 ; ALL-NEXT:    retq
   2235   %a0 = bitcast <8 x float> %f0 to <4 x i64>
   2236   %a1 = bitcast <8 x float> %f1 to <8 x i32>
   2237   %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   2238   %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2239   %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
   2240   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2241   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2242   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
   2243   ret <8 x float> %shuffle32
   2244 }
   2245 
   2246 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
   2247 ; ALL-LABEL: insert_dup_mem_v8i32:
   2248 ; ALL:       # BB#0:
   2249 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2250 ; ALL-NEXT:    retq
   2251   %tmp = load i32, i32* %ptr, align 4
   2252   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2253   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
   2254   ret <8 x i32> %tmp2
   2255 }
   2256