Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      3 
      4 target triple = "x86_64-unknown-unknown"
      5 
      6 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
      7 ; AVX1-LABEL: shuffle_v8f32_00000000:
      8 ; AVX1:       # BB#0:
      9 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
     10 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     11 ; AVX1-NEXT:    retq
     12 ;
     13 ; AVX2-LABEL: shuffle_v8f32_00000000:
     14 ; AVX2:       # BB#0:
     15 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
     16 ; AVX2-NEXT:    retq
     17   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     18   ret <8 x float> %shuffle
     19 }
     20 
     21 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
     22 ; AVX1-LABEL: shuffle_v8f32_00000010:
     23 ; AVX1:       # BB#0:
     24 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     25 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
     26 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     27 ; AVX1-NEXT:    retq
     28 ;
     29 ; AVX2-LABEL: shuffle_v8f32_00000010:
     30 ; AVX2:       # BB#0:
     31 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
     32 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     33 ; AVX2-NEXT:    retq
     34   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     35   ret <8 x float> %shuffle
     36 }
     37 
     38 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
     39 ; AVX1-LABEL: shuffle_v8f32_00000200:
     40 ; AVX1:       # BB#0:
     41 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     42 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
     43 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     44 ; AVX1-NEXT:    retq
     45 ;
     46 ; AVX2-LABEL: shuffle_v8f32_00000200:
     47 ; AVX2:       # BB#0:
     48 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
     49 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     50 ; AVX2-NEXT:    retq
     51   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     52   ret <8 x float> %shuffle
     53 }
     54 
     55 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
     56 ; AVX1-LABEL: shuffle_v8f32_00003000:
     57 ; AVX1:       # BB#0:
     58 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     59 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
     60 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     61 ; AVX1-NEXT:    retq
     62 ;
     63 ; AVX2-LABEL: shuffle_v8f32_00003000:
     64 ; AVX2:       # BB#0:
     65 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
     66 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     67 ; AVX2-NEXT:    retq
     68   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     69   ret <8 x float> %shuffle
     70 }
     71 
     72 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
     73 ; AVX1-LABEL: shuffle_v8f32_00040000:
     74 ; AVX1:       # BB#0:
     75 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     76 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
     77 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
     78 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
     79 ; AVX1-NEXT:    retq
     80 ;
     81 ; AVX2-LABEL: shuffle_v8f32_00040000:
     82 ; AVX2:       # BB#0:
     83 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
     84 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     85 ; AVX2-NEXT:    retq
     86   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     87   ret <8 x float> %shuffle
     88 }
     89 
     90 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
     91 ; AVX1-LABEL: shuffle_v8f32_00500000:
     92 ; AVX1:       # BB#0:
     93 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     94 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
     95 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
     96 ; AVX1-NEXT:    retq
     97 ;
     98 ; AVX2-LABEL: shuffle_v8f32_00500000:
     99 ; AVX2:       # BB#0:
    100 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    101 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    102 ; AVX2-NEXT:    retq
    103   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    104   ret <8 x float> %shuffle
    105 }
    106 
    107 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
    108 ; AVX1-LABEL: shuffle_v8f32_06000000:
    109 ; AVX1:       # BB#0:
    110 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    111 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    112 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    113 ; AVX1-NEXT:    retq
    114 ;
    115 ; AVX2-LABEL: shuffle_v8f32_06000000:
    116 ; AVX2:       # BB#0:
    117 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    118 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    119 ; AVX2-NEXT:    retq
    120   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    121   ret <8 x float> %shuffle
    122 }
    123 
    124 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
    125 ; AVX1-LABEL: shuffle_v8f32_70000000:
    126 ; AVX1:       # BB#0:
    127 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    128 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    129 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
    130 ; AVX1-NEXT:    retq
    131 ;
    132 ; AVX2-LABEL: shuffle_v8f32_70000000:
    133 ; AVX2:       # BB#0:
    134 ; AVX2-NEXT:    movl $7, %eax
    135 ; AVX2-NEXT:    vmovd %eax, %xmm1
    136 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    137 ; AVX2-NEXT:    retq
    138   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    139   ret <8 x float> %shuffle
    140 }
    141 
    142 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
    143 ; ALL-LABEL: shuffle_v8f32_01014545:
    144 ; ALL:       # BB#0:
    145 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    146 ; ALL-NEXT:    retq
    147   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    148   ret <8 x float> %shuffle
    149 }
    150 
    151 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
    152 ; AVX1-LABEL: shuffle_v8f32_00112233:
    153 ; AVX1:       # BB#0:
    154 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm1 = xmm0[0,0,1,1]
    155 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    156 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    157 ; AVX1-NEXT:    retq
    158 ;
    159 ; AVX2-LABEL: shuffle_v8f32_00112233:
    160 ; AVX2:       # BB#0:
    161 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
    162 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    163 ; AVX2-NEXT:    retq
    164   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    165   ret <8 x float> %shuffle
    166 }
    167 
    168 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
    169 ; AVX1-LABEL: shuffle_v8f32_00001111:
    170 ; AVX1:       # BB#0:
    171 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    172 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    173 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    174 ; AVX1-NEXT:    retq
    175 ;
    176 ; AVX2-LABEL: shuffle_v8f32_00001111:
    177 ; AVX2:       # BB#0:
    178 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
    179 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    180 ; AVX2-NEXT:    retq
    181   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
    182   ret <8 x float> %shuffle
    183 }
    184 
    185 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
    186 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
    187 ; ALL:       # BB#0:
    188 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
    189 ; ALL-NEXT:    retq
    190   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    191   ret <8 x float> %shuffle
    192 }
    193 
    194 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
    195 ; AVX1-LABEL: shuffle_v8f32_08080808:
    196 ; AVX1:       # BB#0:
    197 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
    198 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
    199 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
    200 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    201 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    202 ; AVX1-NEXT:    retq
    203 ;
    204 ; AVX2-LABEL: shuffle_v8f32_08080808:
    205 ; AVX2:       # BB#0:
    206 ; AVX2-NEXT:    vbroadcastss %xmm1, %ymm1
    207 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    208 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    209 ; AVX2-NEXT:    retq
    210   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
    211   ret <8 x float> %shuffle
    212 }
    213 
    214 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
    215 ; ALL-LABEL: shuffle_v8f32_08084c4c:
    216 ; ALL:       # BB#0:
    217 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
    218 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
    219 ; ALL-NEXT:    retq
    220   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
    221   ret <8 x float> %shuffle
    222 }
    223 
    224 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
    225 ; ALL-LABEL: shuffle_v8f32_8823cc67:
    226 ; ALL:       # BB#0:
    227 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
    228 ; ALL-NEXT:    retq
    229   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
    230   ret <8 x float> %shuffle
    231 }
    232 
    233 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
    234 ; ALL-LABEL: shuffle_v8f32_9832dc76:
    235 ; ALL:       # BB#0:
    236 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
    237 ; ALL-NEXT:    retq
    238   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
    239   ret <8 x float> %shuffle
    240 }
    241 
    242 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
    243 ; ALL-LABEL: shuffle_v8f32_9810dc54:
    244 ; ALL:       # BB#0:
    245 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
    246 ; ALL-NEXT:    retq
    247   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
    248   ret <8 x float> %shuffle
    249 }
    250 
    251 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
    252 ; ALL-LABEL: shuffle_v8f32_08194c5d:
    253 ; ALL:       # BB#0:
    254 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    255 ; ALL-NEXT:    retq
    256   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
    257   ret <8 x float> %shuffle
    258 }
    259 
    260 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
    261 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
    262 ; ALL:       # BB#0:
    263 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
    264 ; ALL-NEXT:    retq
    265   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
    266   ret <8 x float> %shuffle
    267 }
    268 
    269 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
    270 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
    271 ; AVX1:       # BB#0:
    272 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    273 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    274 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    275 ; AVX1-NEXT:    retq
    276 ;
    277 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
    278 ; AVX2:       # BB#0:
    279 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
    280 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    281 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    282 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    283 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    284 ; AVX2-NEXT:    retq
    285   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    286   ret <8 x float> %shuffle
    287 }
    288 
    289 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
    290 ; AVX1-LABEL: shuffle_v8f32_08991abb:
    291 ; AVX1:       # BB#0:
    292 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
    293 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
    294 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    295 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
    296 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    297 ; AVX1-NEXT:    retq
    298 ;
    299 ; AVX2-LABEL: shuffle_v8f32_08991abb:
    300 ; AVX2:       # BB#0:
    301 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    302 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    303 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
    304 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    305 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    306 ; AVX2-NEXT:    retq
    307   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
    308   ret <8 x float> %shuffle
    309 }
    310 
    311 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
    312 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
    313 ; AVX1:       # BB#0:
    314 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    315 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
    316 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    317 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    318 ; AVX1-NEXT:    retq
    319 ;
    320 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
    321 ; AVX2:       # BB#0:
    322 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    323 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    324 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    325 ; AVX2-NEXT:    retq
    326   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    327   ret <8 x float> %shuffle
    328 }
    329 
    330 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
    331 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
    332 ; AVX1:       # BB#0:
    333 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    334 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    335 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    336 ; AVX1-NEXT:    retq
    337 ;
    338 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
    339 ; AVX2:       # BB#0:
    340 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    341 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    342 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    343 ; AVX2-NEXT:    retq
    344   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
    345   ret <8 x float> %shuffle
    346 }
    347 
    348 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
    349 ; ALL-LABEL: shuffle_v8f32_00014445:
    350 ; ALL:       # BB#0:
    351 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
    352 ; ALL-NEXT:    retq
    353   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
    354   ret <8 x float> %shuffle
    355 }
    356 
    357 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
    358 ; ALL-LABEL: shuffle_v8f32_00204464:
    359 ; ALL:       # BB#0:
    360 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
    361 ; ALL-NEXT:    retq
    362   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
    363   ret <8 x float> %shuffle
    364 }
    365 
    366 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
    367 ; ALL-LABEL: shuffle_v8f32_03004744:
    368 ; ALL:       # BB#0:
    369 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
    370 ; ALL-NEXT:    retq
    371   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
    372   ret <8 x float> %shuffle
    373 }
    374 
    375 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
    376 ; ALL-LABEL: shuffle_v8f32_10005444:
    377 ; ALL:       # BB#0:
    378 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
    379 ; ALL-NEXT:    retq
    380   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
    381   ret <8 x float> %shuffle
    382 }
    383 
    384 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
    385 ; ALL-LABEL: shuffle_v8f32_22006644:
    386 ; ALL:       # BB#0:
    387 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
    388 ; ALL-NEXT:    retq
    389   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
    390   ret <8 x float> %shuffle
    391 }
    392 
    393 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
    394 ; ALL-LABEL: shuffle_v8f32_33307774:
    395 ; ALL:       # BB#0:
    396 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
    397 ; ALL-NEXT:    retq
    398   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
    399   ret <8 x float> %shuffle
    400 }
    401 
    402 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
    403 ; ALL-LABEL: shuffle_v8f32_32107654:
    404 ; ALL:       # BB#0:
    405 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    406 ; ALL-NEXT:    retq
    407   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    408   ret <8 x float> %shuffle
    409 }
    410 
    411 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
    412 ; ALL-LABEL: shuffle_v8f32_00234467:
    413 ; ALL:       # BB#0:
    414 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
    415 ; ALL-NEXT:    retq
    416   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
    417   ret <8 x float> %shuffle
    418 }
    419 
    420 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
    421 ; ALL-LABEL: shuffle_v8f32_00224466:
    422 ; ALL:       # BB#0:
    423 ; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
    424 ; ALL-NEXT:    retq
    425   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    426   ret <8 x float> %shuffle
    427 }
    428 
    429 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
    430 ; ALL-LABEL: shuffle_v8f32_10325476:
    431 ; ALL:       # BB#0:
    432 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
    433 ; ALL-NEXT:    retq
    434   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    435   ret <8 x float> %shuffle
    436 }
    437 
    438 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
    439 ; ALL-LABEL: shuffle_v8f32_11335577:
    440 ; ALL:       # BB#0:
    441 ; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
    442 ; ALL-NEXT:    retq
    443   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    444   ret <8 x float> %shuffle
    445 }
    446 
    447 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
    448 ; ALL-LABEL: shuffle_v8f32_10235467:
    449 ; ALL:       # BB#0:
    450 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
    451 ; ALL-NEXT:    retq
    452   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    453   ret <8 x float> %shuffle
    454 }
    455 
    456 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
    457 ; ALL-LABEL: shuffle_v8f32_10225466:
    458 ; ALL:       # BB#0:
    459 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
    460 ; ALL-NEXT:    retq
    461   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
    462   ret <8 x float> %shuffle
    463 }
    464 
    465 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
    466 ; ALL-LABEL: shuffle_v8f32_00015444:
    467 ; ALL:       # BB#0:
    468 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
    469 ; ALL-NEXT:    retq
    470   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
    471   ret <8 x float> %shuffle
    472 }
    473 
    474 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
    475 ; ALL-LABEL: shuffle_v8f32_00204644:
    476 ; ALL:       # BB#0:
    477 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
    478 ; ALL-NEXT:    retq
    479   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
    480   ret <8 x float> %shuffle
    481 }
    482 
    483 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
    484 ; ALL-LABEL: shuffle_v8f32_03004474:
    485 ; ALL:       # BB#0:
    486 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
    487 ; ALL-NEXT:    retq
    488   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
    489   ret <8 x float> %shuffle
    490 }
    491 
    492 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
    493 ; ALL-LABEL: shuffle_v8f32_10004444:
    494 ; ALL:       # BB#0:
    495 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
    496 ; ALL-NEXT:    retq
    497   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    498   ret <8 x float> %shuffle
    499 }
    500 
    501 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
    502 ; ALL-LABEL: shuffle_v8f32_22006446:
    503 ; ALL:       # BB#0:
    504 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
    505 ; ALL-NEXT:    retq
    506   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
    507   ret <8 x float> %shuffle
    508 }
    509 
    510 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
    511 ; ALL-LABEL: shuffle_v8f32_33307474:
    512 ; ALL:       # BB#0:
    513 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
    514 ; ALL-NEXT:    retq
    515   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
    516   ret <8 x float> %shuffle
    517 }
    518 
    519 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
    520 ; ALL-LABEL: shuffle_v8f32_32104567:
    521 ; ALL:       # BB#0:
    522 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
    523 ; ALL-NEXT:    retq
    524   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
    525   ret <8 x float> %shuffle
    526 }
    527 
    528 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
    529 ; ALL-LABEL: shuffle_v8f32_00236744:
    530 ; ALL:       # BB#0:
    531 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
    532 ; ALL-NEXT:    retq
    533   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
    534   ret <8 x float> %shuffle
    535 }
    536 
    537 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
    538 ; ALL-LABEL: shuffle_v8f32_00226644:
    539 ; ALL:       # BB#0:
    540 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
    541 ; ALL-NEXT:    retq
    542   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
    543   ret <8 x float> %shuffle
    544 }
    545 
    546 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
    547 ; ALL-LABEL: shuffle_v8f32_10324567:
    548 ; ALL:       # BB#0:
    549 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
    550 ; ALL-NEXT:    retq
    551   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
    552   ret <8 x float> %shuffle
    553 }
    554 
    555 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
    556 ; ALL-LABEL: shuffle_v8f32_11334567:
    557 ; ALL:       # BB#0:
    558 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
    559 ; ALL-NEXT:    retq
    560   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
    561   ret <8 x float> %shuffle
    562 }
    563 
    564 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
    565 ; ALL-LABEL: shuffle_v8f32_01235467:
    566 ; ALL:       # BB#0:
    567 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
    568 ; ALL-NEXT:    retq
    569   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    570   ret <8 x float> %shuffle
    571 }
    572 
    573 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
    574 ; ALL-LABEL: shuffle_v8f32_01235466:
    575 ; ALL:       # BB#0:
    576 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
    577 ; ALL-NEXT:    retq
    578   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
    579   ret <8 x float> %shuffle
    580 }
    581 
    582 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
    583 ; ALL-LABEL: shuffle_v8f32_002u6u44:
    584 ; ALL:       # BB#0:
    585 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
    586 ; ALL-NEXT:    retq
    587   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
    588   ret <8 x float> %shuffle
    589 }
    590 
    591 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
    592 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
    593 ; ALL:       # BB#0:
    594 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
    595 ; ALL-NEXT:    retq
    596   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
    597   ret <8 x float> %shuffle
    598 }
    599 
    600 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
    601 ; ALL-LABEL: shuffle_v8f32_103245uu:
    602 ; ALL:       # BB#0:
    603 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
    604 ; ALL-NEXT:    retq
    605   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
    606   ret <8 x float> %shuffle
    607 }
    608 
    609 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
    610 ; ALL-LABEL: shuffle_v8f32_1133uu67:
    611 ; ALL:       # BB#0:
    612 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
    613 ; ALL-NEXT:    retq
    614   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
    615   ret <8 x float> %shuffle
    616 }
    617 
    618 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
    619 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
    620 ; ALL:       # BB#0:
    621 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
    622 ; ALL-NEXT:    retq
    623   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
    624   ret <8 x float> %shuffle
    625 }
    626 
    627 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
    628 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
    629 ; ALL:       # BB#0:
    630 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
    631 ; ALL-NEXT:    retq
    632   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
    633   ret <8 x float> %shuffle
    634 }
    635 
    636 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
    637 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
    638 ; AVX1:       # BB#0:
    639 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    640 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
    641 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
    642 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
    643 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
    644 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    645 ; AVX1-NEXT:    retq
    646 ;
    647 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
    648 ; AVX2:       # BB#0:
    649 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,3,4,u,u,u,u,0>
    650 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    651 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
    652 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    653 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    654 ; AVX2-NEXT:    retq
    655   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
    656   ret <8 x float> %shuffle
    657 }
    658 
    659 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
    660 ; AVX1-LABEL: shuffle_v8f32_f511235a:
    661 ; AVX1:       # BB#0:
    662 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    663 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
    664 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
    665 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
    666 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
    667 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
    668 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    669 ; AVX1-NEXT:    retq
    670 ;
    671 ; AVX2-LABEL: shuffle_v8f32_f511235a:
    672 ; AVX2:       # BB#0:
    673 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <7,u,u,u,u,u,u,2>
    674 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    675 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
    676 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    677 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    678 ; AVX2-NEXT:    retq
    679   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
    680   ret <8 x float> %shuffle
    681 }
    682 
    683 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
    684 ; AVX1-LABEL: shuffle_v8f32_32103210:
    685 ; AVX1:       # BB#0:
    686 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    687 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    688 ; AVX1-NEXT:    retq
    689 ;
    690 ; AVX2-LABEL: shuffle_v8f32_32103210:
    691 ; AVX2:       # BB#0:
    692 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
    693 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    694 ; AVX2-NEXT:    retq
    695   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
    696   ret <8 x float> %shuffle
    697 }
    698 
    699 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
    700 ; AVX1-LABEL: shuffle_v8f32_76547654:
    701 ; AVX1:       # BB#0:
    702 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
    703 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    704 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    705 ; AVX1-NEXT:    retq
    706 ;
    707 ; AVX2-LABEL: shuffle_v8f32_76547654:
    708 ; AVX2:       # BB#0:
    709 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
    710 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    711 ; AVX2-NEXT:    retq
    712   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
    713   ret <8 x float> %shuffle
    714 }
    715 
    716 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
    717 ; AVX1-LABEL: shuffle_v8f32_76543210:
    718 ; AVX1:       # BB#0:
    719 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    720 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    721 ; AVX1-NEXT:    retq
    722 ;
    723 ; AVX2-LABEL: shuffle_v8f32_76543210:
    724 ; AVX2:       # BB#0:
    725 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
    726 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    727 ; AVX2-NEXT:    retq
    728   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    729   ret <8 x float> %shuffle
    730 }
    731 
    732 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
    733 ; ALL-LABEL: shuffle_v8f32_3210ba98:
    734 ; ALL:       # BB#0:
    735 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    736 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    737 ; ALL-NEXT:    retq
    738   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
    739   ret <8 x float> %shuffle
    740 }
    741 
    742 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
    743 ; ALL-LABEL: shuffle_v8f32_3210fedc:
    744 ; ALL:       # BB#0:
    745 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    746 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    747 ; ALL-NEXT:    retq
    748   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
    749   ret <8 x float> %shuffle
    750 }
    751 
    752 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
    753 ; ALL-LABEL: shuffle_v8f32_7654fedc:
    754 ; ALL:       # BB#0:
    755 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    756 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    757 ; ALL-NEXT:    retq
    758   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
    759   ret <8 x float> %shuffle
    760 }
    761 
    762 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
    763 ; ALL-LABEL: shuffle_v8f32_fedc7654:
    764 ; ALL:       # BB#0:
    765 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
    766 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    767 ; ALL-NEXT:    retq
    768   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
    769   ret <8 x float> %shuffle
    770 }
    771 
    772 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
    773 ; AVX1-LABEL: PR21138:
    774 ; AVX1:       # BB#0:
    775 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    776 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
    777 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    778 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    779 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
    780 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    781 ; AVX1-NEXT:    retq
    782 ;
    783 ; AVX2-LABEL: PR21138:
    784 ; AVX2:       # BB#0:
    785 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,u,u,u,1,3,5,7>
    786 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    787 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <1,3,5,7,u,u,u,u>
    788 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    789 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    790 ; AVX2-NEXT:    retq
    791   %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    792   ret <8 x float> %shuffle
    793 }
    794 
    795 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
    796 ; ALL-LABEL: shuffle_v8f32_ba987654:
    797 ; ALL:       # BB#0:
    798 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    799 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    800 ; ALL-NEXT:    retq
    801   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    802   ret <8 x float> %shuffle
    803 }
    804 
    805 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
    806 ; ALL-LABEL: shuffle_v8f32_ba983210:
    807 ; ALL:       # BB#0:
    808 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    809 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    810 ; ALL-NEXT:    retq
    811   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    812   ret <8 x float> %shuffle
    813 }
    814 
    815 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
    816 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
    817 ; ALL:       # BB#0:
    818 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
    819 ; ALL-NEXT:    retq
    820   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
    821   ret <8 x float> %shuffle
    822 }
    823 
    824 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
    825 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
    826 ; ALL:       # BB#0:
    827 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
    828 ; ALL-NEXT:    retq
    829   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
    830   ret <8 x float> %shuffle
    831 }
    832 
    833 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
    834 ; AVX1-LABEL: shuffle_v8i32_00000000:
    835 ; AVX1:       # BB#0:
    836 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    837 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    838 ; AVX1-NEXT:    retq
    839 ;
    840 ; AVX2-LABEL: shuffle_v8i32_00000000:
    841 ; AVX2:       # BB#0:
    842 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
    843 ; AVX2-NEXT:    retq
    844   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    845   ret <8 x i32> %shuffle
    846 }
    847 
    848 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
    849 ; AVX1-LABEL: shuffle_v8i32_00000010:
    850 ; AVX1:       # BB#0:
    851 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    852 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
    853 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    854 ; AVX1-NEXT:    retq
    855 ;
    856 ; AVX2-LABEL: shuffle_v8i32_00000010:
    857 ; AVX2:       # BB#0:
    858 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
    859 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    860 ; AVX2-NEXT:    retq
    861   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    862   ret <8 x i32> %shuffle
    863 }
    864 
    865 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
    866 ; AVX1-LABEL: shuffle_v8i32_00000200:
    867 ; AVX1:       # BB#0:
    868 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    869 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
    870 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    871 ; AVX1-NEXT:    retq
    872 ;
    873 ; AVX2-LABEL: shuffle_v8i32_00000200:
    874 ; AVX2:       # BB#0:
    875 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
    876 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    877 ; AVX2-NEXT:    retq
    878   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    879   ret <8 x i32> %shuffle
    880 }
    881 
    882 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
    883 ; AVX1-LABEL: shuffle_v8i32_00003000:
    884 ; AVX1:       # BB#0:
    885 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    886 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
    887 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    888 ; AVX1-NEXT:    retq
    889 ;
    890 ; AVX2-LABEL: shuffle_v8i32_00003000:
    891 ; AVX2:       # BB#0:
    892 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
    893 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    894 ; AVX2-NEXT:    retq
    895   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    896   ret <8 x i32> %shuffle
    897 }
    898 
    899 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
    900 ; AVX1-LABEL: shuffle_v8i32_00040000:
    901 ; AVX1:       # BB#0:
    902 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    903 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,0,0,4,4,4,4]
    904 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,3,4,4,4,7]
    905 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2],ymm1[3,4,5,6,7]
    906 ; AVX1-NEXT:    retq
    907 ;
    908 ; AVX2-LABEL: shuffle_v8i32_00040000:
    909 ; AVX2:       # BB#0:
    910 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
    911 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    912 ; AVX2-NEXT:    retq
    913   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    914   ret <8 x i32> %shuffle
    915 }
    916 
    917 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
    918 ; AVX1-LABEL: shuffle_v8i32_00500000:
    919 ; AVX1:       # BB#0:
    920 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    921 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    922 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
    923 ; AVX1-NEXT:    retq
    924 ;
    925 ; AVX2-LABEL: shuffle_v8i32_00500000:
    926 ; AVX2:       # BB#0:
    927 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    928 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    929 ; AVX2-NEXT:    retq
    930   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    931   ret <8 x i32> %shuffle
    932 }
    933 
    934 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
    935 ; AVX1-LABEL: shuffle_v8i32_06000000:
    936 ; AVX1:       # BB#0:
    937 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    938 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    939 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    940 ; AVX1-NEXT:    retq
    941 ;
    942 ; AVX2-LABEL: shuffle_v8i32_06000000:
    943 ; AVX2:       # BB#0:
    944 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    945 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    946 ; AVX2-NEXT:    retq
    947   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    948   ret <8 x i32> %shuffle
    949 }
    950 
    951 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
    952 ; AVX1-LABEL: shuffle_v8i32_70000000:
    953 ; AVX1:       # BB#0:
    954 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    955 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    956 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
    957 ; AVX1-NEXT:    retq
    958 ;
    959 ; AVX2-LABEL: shuffle_v8i32_70000000:
    960 ; AVX2:       # BB#0:
    961 ; AVX2-NEXT:    movl $7, %eax
    962 ; AVX2-NEXT:    vmovd %eax, %xmm1
    963 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    964 ; AVX2-NEXT:    retq
    965   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    966   ret <8 x i32> %shuffle
    967 }
    968 
    969 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
    970 ; AVX1-LABEL: shuffle_v8i32_01014545:
    971 ; AVX1:       # BB#0:
    972 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    973 ; AVX1-NEXT:    retq
    974 ;
    975 ; AVX2-LABEL: shuffle_v8i32_01014545:
    976 ; AVX2:       # BB#0:
    977 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
    978 ; AVX2-NEXT:    retq
    979   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    980   ret <8 x i32> %shuffle
    981 }
    982 
    983 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
    984 ; AVX1-LABEL: shuffle_v8i32_00112233:
    985 ; AVX1:       # BB#0:
    986 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
    987 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    988 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    989 ; AVX1-NEXT:    retq
    990 ;
    991 ; AVX2-LABEL: shuffle_v8i32_00112233:
    992 ; AVX2:       # BB#0:
    993 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
    994 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    995 ; AVX2-NEXT:    retq
    996   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    997   ret <8 x i32> %shuffle
    998 }
    999 
   1000 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
   1001 ; AVX1-LABEL: shuffle_v8i32_00001111:
   1002 ; AVX1:       # BB#0:
   1003 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1004 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1005 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1006 ; AVX1-NEXT:    retq
   1007 ;
   1008 ; AVX2-LABEL: shuffle_v8i32_00001111:
   1009 ; AVX2:       # BB#0:
   1010 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
   1011 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1012 ; AVX2-NEXT:    retq
   1013   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   1014   ret <8 x i32> %shuffle
   1015 }
   1016 
   1017 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
   1018 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
   1019 ; AVX1:       # BB#0:
   1020 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1021 ; AVX1-NEXT:    retq
   1022 ;
   1023 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
   1024 ; AVX2:       # BB#0:
   1025 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1026 ; AVX2-NEXT:    retq
   1027   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   1028   ret <8 x i32> %shuffle
   1029 }
   1030 
   1031 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
   1032 ; AVX1-LABEL: shuffle_v8i32_08080808:
   1033 ; AVX1:       # BB#0:
   1034 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm1[0,0,2,0]
   1035 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm1, %ymm1
   1036 ; AVX1-NEXT:    vmovddup {{.*#+}} xmm0 = xmm0[0,0]
   1037 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1038 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1039 ; AVX1-NEXT:    retq
   1040 ;
   1041 ; AVX2-LABEL: shuffle_v8i32_08080808:
   1042 ; AVX2:       # BB#0:
   1043 ; AVX2-NEXT:    vpbroadcastd %xmm1, %ymm1
   1044 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
   1045 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1046 ; AVX2-NEXT:    retq
   1047   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   1048   ret <8 x i32> %shuffle
   1049 }
   1050 
   1051 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
   1052 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
   1053 ; AVX1:       # BB#0:
   1054 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
   1055 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1056 ; AVX1-NEXT:    retq
   1057 ;
   1058 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
   1059 ; AVX2:       # BB#0:
   1060 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
   1061 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1062 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1063 ; AVX2-NEXT:    retq
   1064   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   1065   ret <8 x i32> %shuffle
   1066 }
   1067 
   1068 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
   1069 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
   1070 ; AVX1:       # BB#0:
   1071 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
   1072 ; AVX1-NEXT:    retq
   1073 ;
   1074 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
   1075 ; AVX2:       # BB#0:
   1076 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
   1077 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1078 ; AVX2-NEXT:    retq
   1079   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   1080   ret <8 x i32> %shuffle
   1081 }
   1082 
   1083 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
   1084 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
   1085 ; AVX1:       # BB#0:
   1086 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
   1087 ; AVX1-NEXT:    retq
   1088 ;
   1089 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
   1090 ; AVX2:       # BB#0:
   1091 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1092 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1093 ; AVX2-NEXT:    retq
   1094   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   1095   ret <8 x i32> %shuffle
   1096 }
   1097 
   1098 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
   1099 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
   1100 ; AVX1:       # BB#0:
   1101 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
   1102 ; AVX1-NEXT:    retq
   1103 ;
   1104 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
   1105 ; AVX2:       # BB#0:
   1106 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
   1107 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
   1108 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1109 ; AVX2-NEXT:    retq
   1110   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   1111   ret <8 x i32> %shuffle
   1112 }
   1113 
   1114 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
   1115 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
   1116 ; AVX1:       # BB#0:
   1117 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1118 ; AVX1-NEXT:    retq
   1119 ;
   1120 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
   1121 ; AVX2:       # BB#0:
   1122 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1123 ; AVX2-NEXT:    retq
   1124   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   1125   ret <8 x i32> %shuffle
   1126 }
   1127 
   1128 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
   1129 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
   1130 ; AVX1:       # BB#0:
   1131 ; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1132 ; AVX1-NEXT:    retq
   1133 ;
   1134 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
   1135 ; AVX2:       # BB#0:
   1136 ; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1137 ; AVX2-NEXT:    retq
   1138   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   1139   ret <8 x i32> %shuffle
   1140 }
   1141 
   1142 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
   1143 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
   1144 ; AVX1:       # BB#0:
   1145 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1146 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1147 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1148 ; AVX1-NEXT:    retq
   1149 ;
   1150 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
   1151 ; AVX2:       # BB#0:
   1152 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
   1153 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1154 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1155 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1156 ; AVX2-NEXT:    retq
   1157   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1158   ret <8 x i32> %shuffle
   1159 }
   1160 
   1161 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
   1162 ; AVX1-LABEL: shuffle_v8i32_08991abb:
   1163 ; AVX1:       # BB#0:
   1164 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
   1165 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
   1166 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1167 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1168 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1169 ; AVX1-NEXT:    retq
   1170 ;
   1171 ; AVX2-LABEL: shuffle_v8i32_08991abb:
   1172 ; AVX2:       # BB#0:
   1173 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1174 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1175 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
   1176 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1177 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1178 ; AVX2-NEXT:    retq
   1179   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   1180   ret <8 x i32> %shuffle
   1181 }
   1182 
   1183 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
   1184 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
   1185 ; AVX1:       # BB#0:
   1186 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
   1187 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
   1188 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1189 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1190 ; AVX1-NEXT:    retq
   1191 ;
   1192 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
   1193 ; AVX2:       # BB#0:
   1194 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1195 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1196 ; AVX2-NEXT:    retq
   1197   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1198   ret <8 x i32> %shuffle
   1199 }
   1200 
   1201 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
   1202 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
   1203 ; AVX1:       # BB#0:
   1204 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
   1205 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1206 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1207 ; AVX1-NEXT:    retq
   1208 ;
   1209 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
   1210 ; AVX2:       # BB#0:
   1211 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1212 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1213 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1214 ; AVX2-NEXT:    retq
   1215   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1216   ret <8 x i32> %shuffle
   1217 }
   1218 
   1219 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
   1220 ; AVX1-LABEL: shuffle_v8i32_00014445:
   1221 ; AVX1:       # BB#0:
   1222 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1223 ; AVX1-NEXT:    retq
   1224 ;
   1225 ; AVX2-LABEL: shuffle_v8i32_00014445:
   1226 ; AVX2:       # BB#0:
   1227 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1228 ; AVX2-NEXT:    retq
   1229   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   1230   ret <8 x i32> %shuffle
   1231 }
   1232 
   1233 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
   1234 ; AVX1-LABEL: shuffle_v8i32_00204464:
   1235 ; AVX1:       # BB#0:
   1236 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1237 ; AVX1-NEXT:    retq
   1238 ;
   1239 ; AVX2-LABEL: shuffle_v8i32_00204464:
   1240 ; AVX2:       # BB#0:
   1241 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1242 ; AVX2-NEXT:    retq
   1243   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   1244   ret <8 x i32> %shuffle
   1245 }
   1246 
   1247 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
   1248 ; AVX1-LABEL: shuffle_v8i32_03004744:
   1249 ; AVX1:       # BB#0:
   1250 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1251 ; AVX1-NEXT:    retq
   1252 ;
   1253 ; AVX2-LABEL: shuffle_v8i32_03004744:
   1254 ; AVX2:       # BB#0:
   1255 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1256 ; AVX2-NEXT:    retq
   1257   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   1258   ret <8 x i32> %shuffle
   1259 }
   1260 
   1261 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
   1262 ; AVX1-LABEL: shuffle_v8i32_10005444:
   1263 ; AVX1:       # BB#0:
   1264 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1265 ; AVX1-NEXT:    retq
   1266 ;
   1267 ; AVX2-LABEL: shuffle_v8i32_10005444:
   1268 ; AVX2:       # BB#0:
   1269 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1270 ; AVX2-NEXT:    retq
   1271   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   1272   ret <8 x i32> %shuffle
   1273 }
   1274 
   1275 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
   1276 ; AVX1-LABEL: shuffle_v8i32_22006644:
   1277 ; AVX1:       # BB#0:
   1278 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1279 ; AVX1-NEXT:    retq
   1280 ;
   1281 ; AVX2-LABEL: shuffle_v8i32_22006644:
   1282 ; AVX2:       # BB#0:
   1283 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1284 ; AVX2-NEXT:    retq
   1285   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   1286   ret <8 x i32> %shuffle
   1287 }
   1288 
   1289 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
   1290 ; AVX1-LABEL: shuffle_v8i32_33307774:
   1291 ; AVX1:       # BB#0:
   1292 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1293 ; AVX1-NEXT:    retq
   1294 ;
   1295 ; AVX2-LABEL: shuffle_v8i32_33307774:
   1296 ; AVX2:       # BB#0:
   1297 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1298 ; AVX2-NEXT:    retq
   1299   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   1300   ret <8 x i32> %shuffle
   1301 }
   1302 
   1303 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
   1304 ; AVX1-LABEL: shuffle_v8i32_32107654:
   1305 ; AVX1:       # BB#0:
   1306 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1307 ; AVX1-NEXT:    retq
   1308 ;
   1309 ; AVX2-LABEL: shuffle_v8i32_32107654:
   1310 ; AVX2:       # BB#0:
   1311 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1312 ; AVX2-NEXT:    retq
   1313   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1314   ret <8 x i32> %shuffle
   1315 }
   1316 
   1317 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
   1318 ; AVX1-LABEL: shuffle_v8i32_00234467:
   1319 ; AVX1:       # BB#0:
   1320 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1321 ; AVX1-NEXT:    retq
   1322 ;
   1323 ; AVX2-LABEL: shuffle_v8i32_00234467:
   1324 ; AVX2:       # BB#0:
   1325 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1326 ; AVX2-NEXT:    retq
   1327   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   1328   ret <8 x i32> %shuffle
   1329 }
   1330 
   1331 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
   1332 ; AVX1-LABEL: shuffle_v8i32_00224466:
   1333 ; AVX1:       # BB#0:
   1334 ; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1335 ; AVX1-NEXT:    retq
   1336 ;
   1337 ; AVX2-LABEL: shuffle_v8i32_00224466:
   1338 ; AVX2:       # BB#0:
   1339 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1340 ; AVX2-NEXT:    retq
   1341   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   1342   ret <8 x i32> %shuffle
   1343 }
   1344 
   1345 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
   1346 ; AVX1-LABEL: shuffle_v8i32_10325476:
   1347 ; AVX1:       # BB#0:
   1348 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1349 ; AVX1-NEXT:    retq
   1350 ;
   1351 ; AVX2-LABEL: shuffle_v8i32_10325476:
   1352 ; AVX2:       # BB#0:
   1353 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1354 ; AVX2-NEXT:    retq
   1355   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1356   ret <8 x i32> %shuffle
   1357 }
   1358 
   1359 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
   1360 ; AVX1-LABEL: shuffle_v8i32_11335577:
   1361 ; AVX1:       # BB#0:
   1362 ; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1363 ; AVX1-NEXT:    retq
   1364 ;
   1365 ; AVX2-LABEL: shuffle_v8i32_11335577:
   1366 ; AVX2:       # BB#0:
   1367 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1368 ; AVX2-NEXT:    retq
   1369   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   1370   ret <8 x i32> %shuffle
   1371 }
   1372 
   1373 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
   1374 ; AVX1-LABEL: shuffle_v8i32_10235467:
   1375 ; AVX1:       # BB#0:
   1376 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1377 ; AVX1-NEXT:    retq
   1378 ;
   1379 ; AVX2-LABEL: shuffle_v8i32_10235467:
   1380 ; AVX2:       # BB#0:
   1381 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1382 ; AVX2-NEXT:    retq
   1383   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1384   ret <8 x i32> %shuffle
   1385 }
   1386 
   1387 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
   1388 ; AVX1-LABEL: shuffle_v8i32_10225466:
   1389 ; AVX1:       # BB#0:
   1390 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1391 ; AVX1-NEXT:    retq
   1392 ;
   1393 ; AVX2-LABEL: shuffle_v8i32_10225466:
   1394 ; AVX2:       # BB#0:
   1395 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1396 ; AVX2-NEXT:    retq
   1397   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   1398   ret <8 x i32> %shuffle
   1399 }
   1400 
   1401 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
   1402 ; AVX1-LABEL: shuffle_v8i32_00015444:
   1403 ; AVX1:       # BB#0:
   1404 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
   1405 ; AVX1-NEXT:    retq
   1406 ;
   1407 ; AVX2-LABEL: shuffle_v8i32_00015444:
   1408 ; AVX2:       # BB#0:
   1409 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
   1410 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1411 ; AVX2-NEXT:    retq
   1412   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   1413   ret <8 x i32> %shuffle
   1414 }
   1415 
   1416 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
   1417 ; AVX1-LABEL: shuffle_v8i32_00204644:
   1418 ; AVX1:       # BB#0:
   1419 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
   1420 ; AVX1-NEXT:    retq
   1421 ;
   1422 ; AVX2-LABEL: shuffle_v8i32_00204644:
   1423 ; AVX2:       # BB#0:
   1424 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
   1425 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1426 ; AVX2-NEXT:    retq
   1427   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   1428   ret <8 x i32> %shuffle
   1429 }
   1430 
   1431 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
   1432 ; AVX1-LABEL: shuffle_v8i32_03004474:
   1433 ; AVX1:       # BB#0:
   1434 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
   1435 ; AVX1-NEXT:    retq
   1436 ;
   1437 ; AVX2-LABEL: shuffle_v8i32_03004474:
   1438 ; AVX2:       # BB#0:
   1439 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
   1440 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1441 ; AVX2-NEXT:    retq
   1442   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   1443   ret <8 x i32> %shuffle
   1444 }
   1445 
   1446 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
   1447 ; AVX1-LABEL: shuffle_v8i32_10004444:
   1448 ; AVX1:       # BB#0:
   1449 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
   1450 ; AVX1-NEXT:    retq
   1451 ;
   1452 ; AVX2-LABEL: shuffle_v8i32_10004444:
   1453 ; AVX2:       # BB#0:
   1454 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
   1455 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1456 ; AVX2-NEXT:    retq
   1457   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   1458   ret <8 x i32> %shuffle
   1459 }
   1460 
   1461 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
   1462 ; AVX1-LABEL: shuffle_v8i32_22006446:
   1463 ; AVX1:       # BB#0:
   1464 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
   1465 ; AVX1-NEXT:    retq
   1466 ;
   1467 ; AVX2-LABEL: shuffle_v8i32_22006446:
   1468 ; AVX2:       # BB#0:
   1469 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
   1470 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1471 ; AVX2-NEXT:    retq
   1472   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   1473   ret <8 x i32> %shuffle
   1474 }
   1475 
   1476 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
   1477 ; AVX1-LABEL: shuffle_v8i32_33307474:
   1478 ; AVX1:       # BB#0:
   1479 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
   1480 ; AVX1-NEXT:    retq
   1481 ;
   1482 ; AVX2-LABEL: shuffle_v8i32_33307474:
   1483 ; AVX2:       # BB#0:
   1484 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
   1485 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1486 ; AVX2-NEXT:    retq
   1487   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   1488   ret <8 x i32> %shuffle
   1489 }
   1490 
   1491 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
   1492 ; AVX1-LABEL: shuffle_v8i32_32104567:
   1493 ; AVX1:       # BB#0:
   1494 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
   1495 ; AVX1-NEXT:    retq
   1496 ;
   1497 ; AVX2-LABEL: shuffle_v8i32_32104567:
   1498 ; AVX2:       # BB#0:
   1499 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
   1500 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1501 ; AVX2-NEXT:    retq
   1502   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   1503   ret <8 x i32> %shuffle
   1504 }
   1505 
   1506 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
   1507 ; AVX1-LABEL: shuffle_v8i32_00236744:
   1508 ; AVX1:       # BB#0:
   1509 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
   1510 ; AVX1-NEXT:    retq
   1511 ;
   1512 ; AVX2-LABEL: shuffle_v8i32_00236744:
   1513 ; AVX2:       # BB#0:
   1514 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
   1515 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1516 ; AVX2-NEXT:    retq
   1517   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   1518   ret <8 x i32> %shuffle
   1519 }
   1520 
   1521 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
   1522 ; AVX1-LABEL: shuffle_v8i32_00226644:
   1523 ; AVX1:       # BB#0:
   1524 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
   1525 ; AVX1-NEXT:    retq
   1526 ;
   1527 ; AVX2-LABEL: shuffle_v8i32_00226644:
   1528 ; AVX2:       # BB#0:
   1529 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
   1530 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1531 ; AVX2-NEXT:    retq
   1532   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   1533   ret <8 x i32> %shuffle
   1534 }
   1535 
   1536 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
   1537 ; AVX1-LABEL: shuffle_v8i32_10324567:
   1538 ; AVX1:       # BB#0:
   1539 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
   1540 ; AVX1-NEXT:    retq
   1541 ;
   1542 ; AVX2-LABEL: shuffle_v8i32_10324567:
   1543 ; AVX2:       # BB#0:
   1544 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
   1545 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1546 ; AVX2-NEXT:    retq
   1547   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   1548   ret <8 x i32> %shuffle
   1549 }
   1550 
   1551 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
   1552 ; AVX1-LABEL: shuffle_v8i32_11334567:
   1553 ; AVX1:       # BB#0:
   1554 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
   1555 ; AVX1-NEXT:    retq
   1556 ;
   1557 ; AVX2-LABEL: shuffle_v8i32_11334567:
   1558 ; AVX2:       # BB#0:
   1559 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
   1560 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1561 ; AVX2-NEXT:    retq
   1562   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   1563   ret <8 x i32> %shuffle
   1564 }
   1565 
   1566 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
   1567 ; AVX1-LABEL: shuffle_v8i32_01235467:
   1568 ; AVX1:       # BB#0:
   1569 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
   1570 ; AVX1-NEXT:    retq
   1571 ;
   1572 ; AVX2-LABEL: shuffle_v8i32_01235467:
   1573 ; AVX2:       # BB#0:
   1574 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
   1575 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1576 ; AVX2-NEXT:    retq
   1577   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1578   ret <8 x i32> %shuffle
   1579 }
   1580 
   1581 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
   1582 ; AVX1-LABEL: shuffle_v8i32_01235466:
   1583 ; AVX1:       # BB#0:
   1584 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
   1585 ; AVX1-NEXT:    retq
   1586 ;
   1587 ; AVX2-LABEL: shuffle_v8i32_01235466:
   1588 ; AVX2:       # BB#0:
   1589 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
   1590 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1591 ; AVX2-NEXT:    retq
   1592   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   1593   ret <8 x i32> %shuffle
   1594 }
   1595 
   1596 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
   1597 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
   1598 ; AVX1:       # BB#0:
   1599 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
   1600 ; AVX1-NEXT:    retq
   1601 ;
   1602 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
   1603 ; AVX2:       # BB#0:
   1604 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
   1605 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1606 ; AVX2-NEXT:    retq
   1607   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   1608   ret <8 x i32> %shuffle
   1609 }
   1610 
   1611 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
   1612 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
   1613 ; AVX1:       # BB#0:
   1614 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
   1615 ; AVX1-NEXT:    retq
   1616 ;
   1617 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
   1618 ; AVX2:       # BB#0:
   1619 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
   1620 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1621 ; AVX2-NEXT:    retq
   1622   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   1623   ret <8 x i32> %shuffle
   1624 }
   1625 
   1626 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
   1627 ; AVX1-LABEL: shuffle_v8i32_103245uu:
   1628 ; AVX1:       # BB#0:
   1629 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
   1630 ; AVX1-NEXT:    retq
   1631 ;
   1632 ; AVX2-LABEL: shuffle_v8i32_103245uu:
   1633 ; AVX2:       # BB#0:
   1634 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
   1635 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1636 ; AVX2-NEXT:    retq
   1637   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   1638   ret <8 x i32> %shuffle
   1639 }
   1640 
   1641 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
   1642 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
   1643 ; AVX1:       # BB#0:
   1644 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
   1645 ; AVX1-NEXT:    retq
   1646 ;
   1647 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
   1648 ; AVX2:       # BB#0:
   1649 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
   1650 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1651 ; AVX2-NEXT:    retq
   1652   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   1653   ret <8 x i32> %shuffle
   1654 }
   1655 
   1656 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
   1657 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
   1658 ; AVX1:       # BB#0:
   1659 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
   1660 ; AVX1-NEXT:    retq
   1661 ;
   1662 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
   1663 ; AVX2:       # BB#0:
   1664 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
   1665 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1666 ; AVX2-NEXT:    retq
   1667   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   1668   ret <8 x i32> %shuffle
   1669 }
   1670 
   1671 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
   1672 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
   1673 ; AVX1:       # BB#0:
   1674 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
   1675 ; AVX1-NEXT:    retq
   1676 ;
   1677 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
   1678 ; AVX2:       # BB#0:
   1679 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
   1680 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1681 ; AVX2-NEXT:    retq
   1682   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   1683   ret <8 x i32> %shuffle
   1684 }
   1685 
   1686 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
   1687 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
   1688 ; AVX1:       # BB#0:
   1689 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
   1690 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
   1691 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1692 ; AVX1-NEXT:    vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
   1693 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1694 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1695 ; AVX1-NEXT:    retq
   1696 ;
   1697 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
   1698 ; AVX2:       # BB#0:
   1699 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,4,2,2,0,u,6,u>
   1700 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1701 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
   1702 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1703 ; AVX2-NEXT:    retq
   1704   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   1705   ret <8 x i32> %shuffle
   1706 }
   1707 
   1708 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
   1709 ; AVX1-LABEL: shuffle_v8i32_32103210:
   1710 ; AVX1:       # BB#0:
   1711 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1712 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1713 ; AVX1-NEXT:    retq
   1714 ;
   1715 ; AVX2-LABEL: shuffle_v8i32_32103210:
   1716 ; AVX2:       # BB#0:
   1717 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,3,2,1,0]
   1718 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1719 ; AVX2-NEXT:    retq
   1720   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
   1721   ret <8 x i32> %shuffle
   1722 }
   1723 
   1724 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
   1725 ; AVX1-LABEL: shuffle_v8i32_76547654:
   1726 ; AVX1:       # BB#0:
   1727 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1728 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1729 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1730 ; AVX1-NEXT:    retq
   1731 ;
   1732 ; AVX2-LABEL: shuffle_v8i32_76547654:
   1733 ; AVX2:       # BB#0:
   1734 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
   1735 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1736 ; AVX2-NEXT:    retq
   1737   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
   1738   ret <8 x i32> %shuffle
   1739 }
   1740 
   1741 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
   1742 ; AVX1-LABEL: shuffle_v8i32_76543210:
   1743 ; AVX1:       # BB#0:
   1744 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1745 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1746 ; AVX1-NEXT:    retq
   1747 ;
   1748 ; AVX2-LABEL: shuffle_v8i32_76543210:
   1749 ; AVX2:       # BB#0:
   1750 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
   1751 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1752 ; AVX2-NEXT:    retq
   1753   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   1754   ret <8 x i32> %shuffle
   1755 }
   1756 
   1757 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
   1758 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
   1759 ; AVX1:       # BB#0:
   1760 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1761 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1762 ; AVX1-NEXT:    retq
   1763 ;
   1764 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
   1765 ; AVX2:       # BB#0:
   1766 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1767 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1768 ; AVX2-NEXT:    retq
   1769   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
   1770   ret <8 x i32> %shuffle
   1771 }
   1772 
   1773 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
   1774 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
   1775 ; AVX1:       # BB#0:
   1776 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   1777 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1778 ; AVX1-NEXT:    retq
   1779 ;
   1780 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
   1781 ; AVX2:       # BB#0:
   1782 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1783 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1784 ; AVX2-NEXT:    retq
   1785   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
   1786   ret <8 x i32> %shuffle
   1787 }
   1788 
   1789 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
   1790 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
   1791 ; AVX1:       # BB#0:
   1792 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1793 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1794 ; AVX1-NEXT:    retq
   1795 ;
   1796 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
   1797 ; AVX2:       # BB#0:
   1798 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1799 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1800 ; AVX2-NEXT:    retq
   1801   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
   1802   ret <8 x i32> %shuffle
   1803 }
   1804 
   1805 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
   1806 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
   1807 ; AVX1:       # BB#0:
   1808 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1809 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1810 ; AVX1-NEXT:    retq
   1811 ;
   1812 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
   1813 ; AVX2:       # BB#0:
   1814 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1815 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1816 ; AVX2-NEXT:    retq
   1817   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
   1818   ret <8 x i32> %shuffle
   1819 }
   1820 
   1821 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
   1822 ; AVX1-LABEL: shuffle_v8i32_ba987654:
   1823 ; AVX1:       # BB#0:
   1824 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1825 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1826 ; AVX1-NEXT:    retq
   1827 ;
   1828 ; AVX2-LABEL: shuffle_v8i32_ba987654:
   1829 ; AVX2:       # BB#0:
   1830 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1831 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1832 ; AVX2-NEXT:    retq
   1833   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1834   ret <8 x i32> %shuffle
   1835 }
   1836 
   1837 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
   1838 ; AVX1-LABEL: shuffle_v8i32_ba983210:
   1839 ; AVX1:       # BB#0:
   1840 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1841 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1842 ; AVX1-NEXT:    retq
   1843 ;
   1844 ; AVX2-LABEL: shuffle_v8i32_ba983210:
   1845 ; AVX2:       # BB#0:
   1846 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1847 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1848 ; AVX2-NEXT:    retq
   1849   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1850   ret <8 x i32> %shuffle
   1851 }
   1852 
   1853 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
   1854 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
   1855 ; AVX1:       # BB#0:
   1856 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1857 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
   1858 ; AVX1-NEXT:    retq
   1859 ;
   1860 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
   1861 ; AVX2:       # BB#0:
   1862 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
   1863 ; AVX2-NEXT:    retq
   1864   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
   1865   ret <8 x i32> %shuffle
   1866 }
   1867 
   1868 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
   1869 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
   1870 ; AVX1:       # BB#0:
   1871 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1872 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
   1873 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   1874 ; AVX1-NEXT:    retq
   1875 ;
   1876 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
   1877 ; AVX2:       # BB#0:
   1878 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
   1879 ; AVX2-NEXT:    retq
   1880   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
   1881   ret <8 x i32> %shuffle
   1882 }
   1883 
   1884 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
   1885 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
   1886 ; AVX1:       # BB#0:
   1887 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1888 ; AVX1-NEXT:    retq
   1889 ;
   1890 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
   1891 ; AVX2:       # BB#0:
   1892 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1893 ; AVX2-NEXT:    retq
   1894   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
   1895   ret <8 x i32> %shuffle
   1896 }
   1897 
   1898 define <8 x float> @splat_mem_v8f32_2(float* %p) {
   1899 ; ALL-LABEL: splat_mem_v8f32_2:
   1900 ; ALL:       # BB#0:
   1901 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   1902 ; ALL-NEXT:    retq
   1903   %1 = load float, float* %p
   1904   %2 = insertelement <4 x float> undef, float %1, i32 0
   1905   %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
   1906   ret <8 x float> %3
   1907 }
   1908 
   1909 define <8 x float> @splat_v8f32(<4 x float> %r) {
   1910 ; AVX1-LABEL: splat_v8f32:
   1911 ; AVX1:       # BB#0:
   1912 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1913 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1914 ; AVX1-NEXT:    retq
   1915 ;
   1916 ; AVX2-LABEL: splat_v8f32:
   1917 ; AVX2:       # BB#0:
   1918 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
   1919 ; AVX2-NEXT:    retq
   1920   %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
   1921   ret <8 x float> %1
   1922 }
   1923 
   1924 ;
   1925 ; Shuffle to logical bit shifts
   1926 ;
   1927 
   1928 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
   1929 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
   1930 ; AVX1:       # BB#0:
   1931 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1932 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   1933 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
   1934 ; AVX1-NEXT:    retq
   1935 ;
   1936 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
   1937 ; AVX2:       # BB#0:
   1938 ; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
   1939 ; AVX2-NEXT:    retq
   1940   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
   1941   ret <8 x i32> %shuffle
   1942 }
   1943 
   1944 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
   1945 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
   1946 ; AVX1:       # BB#0:
   1947 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   1948 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   1949 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1950 ; AVX1-NEXT:    retq
   1951 ;
   1952 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
   1953 ; AVX2:       # BB#0:
   1954 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   1955 ; AVX2-NEXT:    retq
   1956   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
   1957   ret <8 x i32> %shuffle
   1958 }
   1959 
   1960 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
   1961 ; AVX1-LABEL: shuffle_v8i32_B012F456:
   1962 ; AVX1:       # BB#0:
   1963 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
   1964 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
   1965 ; AVX1-NEXT:    retq
   1966 ;
   1967 ; AVX2-LABEL: shuffle_v8i32_B012F456:
   1968 ; AVX2:       # BB#0:
   1969 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
   1970 ; AVX2-NEXT:    retq
   1971   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
   1972   ret <8 x i32> %shuffle
   1973 }
   1974 
   1975 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
   1976 ; AVX1-LABEL: shuffle_v8i32_1238567C:
   1977 ; AVX1:       # BB#0:
   1978 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   1979 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   1980 ; AVX1-NEXT:    retq
   1981 ;
   1982 ; AVX2-LABEL: shuffle_v8i32_1238567C:
   1983 ; AVX2:       # BB#0:
   1984 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
   1985 ; AVX2-NEXT:    retq
   1986   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   1987   ret <8 x i32> %shuffle
   1988 }
   1989 
   1990 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
   1991 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
   1992 ; AVX1:       # BB#0:
   1993 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
   1994 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
   1995 ; AVX1-NEXT:    retq
   1996 ;
   1997 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
   1998 ; AVX2:       # BB#0:
   1999 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
   2000 ; AVX2-NEXT:    retq
   2001   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
   2002   ret <8 x i32> %shuffle
   2003 }
   2004 
   2005 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
   2006 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
   2007 ; AVX1:       # BB#0:
   2008 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
   2009 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
   2010 ; AVX1-NEXT:    retq
   2011 ;
   2012 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
   2013 ; AVX2:       # BB#0:
   2014 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
   2015 ; AVX2-NEXT:    retq
   2016   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
   2017   ret <8 x i32> %shuffle
   2018 }
   2019 
   2020 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
   2021 ; AVX1-LABEL: shuffle_v8i32_30127456:
   2022 ; AVX1:       # BB#0:
   2023 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2024 ; AVX1-NEXT:    retq
   2025 ;
   2026 ; AVX2-LABEL: shuffle_v8i32_30127456:
   2027 ; AVX2:       # BB#0:
   2028 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2029 ; AVX2-NEXT:    retq
   2030   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   2031   ret <8 x i32> %shuffle
   2032 }
   2033 
   2034 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
   2035 ; AVX1-LABEL: shuffle_v8i32_12305674:
   2036 ; AVX1:       # BB#0:
   2037 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2038 ; AVX1-NEXT:    retq
   2039 ;
   2040 ; AVX2-LABEL: shuffle_v8i32_12305674:
   2041 ; AVX2:       # BB#0:
   2042 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2043 ; AVX2-NEXT:    retq
   2044   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   2045   ret <8 x i32> %shuffle
   2046 }
   2047 
   2048 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2049 ; ALL-LABEL: concat_v2f32_1:
   2050 ; ALL:       # BB#0: # %entry
   2051 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2052 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2053 ; ALL-NEXT:    retq
   2054 entry:
   2055   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2056   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2057   %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2058   %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2059   %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
   2060   ret <8 x float> %tmp76
   2061 }
   2062 
   2063 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2064 ; ALL-LABEL: concat_v2f32_2:
   2065 ; ALL:       # BB#0: # %entry
   2066 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2067 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2068 ; ALL-NEXT:    retq
   2069 entry:
   2070   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2071   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2072   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2073   ret <8 x float> %tmp76
   2074 }
   2075 
   2076 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2077 ; ALL-LABEL: concat_v2f32_3:
   2078 ; ALL:       # BB#0: # %entry
   2079 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2080 ; ALL-NEXT:    vmovhpd (%rsi), %xmm0, %xmm0
   2081 ; ALL-NEXT:    retq
   2082 entry:
   2083   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2084   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2085   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2086   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2087   ret <8 x float> %res
   2088 }
   2089 
   2090 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
   2091 ; AVX1-LABEL: insert_mem_and_zero_v8i32:
   2092 ; AVX1:       # BB#0:
   2093 ; AVX1-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2094 ; AVX1-NEXT:    retq
   2095 ;
   2096 ; AVX2-LABEL: insert_mem_and_zero_v8i32:
   2097 ; AVX2:       # BB#0:
   2098 ; AVX2-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2099 ; AVX2-NEXT:    retq
   2100   %a = load i32, i32* %ptr
   2101   %v = insertelement <8 x i32> undef, i32 %a, i32 0
   2102   %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2103   ret <8 x i32> %shuffle
   2104 }
   2105 
   2106