Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1
      3 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX2
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
      8 ; AVX1-LABEL: shuffle_v8f32_00000000:
      9 ; AVX1:       # BB#0:
     10 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
     11 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     12 ; AVX1-NEXT:    retq
     13 ;
     14 ; AVX2-LABEL: shuffle_v8f32_00000000:
     15 ; AVX2:       # BB#0:
     16 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
     17 ; AVX2-NEXT:    retq
     18   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     19   ret <8 x float> %shuffle
     20 }
     21 
     22 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
     23 ; AVX1-LABEL: shuffle_v8f32_00000010:
     24 ; AVX1:       # BB#0:
     25 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     26 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
     27 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     28 ; AVX1-NEXT:    retq
     29 ;
     30 ; AVX2-LABEL: shuffle_v8f32_00000010:
     31 ; AVX2:       # BB#0:
     32 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
     33 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     34 ; AVX2-NEXT:    retq
     35   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     36   ret <8 x float> %shuffle
     37 }
     38 
     39 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
     40 ; AVX1-LABEL: shuffle_v8f32_00000200:
     41 ; AVX1:       # BB#0:
     42 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     43 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
     44 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     45 ; AVX1-NEXT:    retq
     46 ;
     47 ; AVX2-LABEL: shuffle_v8f32_00000200:
     48 ; AVX2:       # BB#0:
     49 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
     50 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     51 ; AVX2-NEXT:    retq
     52   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     53   ret <8 x float> %shuffle
     54 }
     55 
     56 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
     57 ; AVX1-LABEL: shuffle_v8f32_00003000:
     58 ; AVX1:       # BB#0:
     59 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     60 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
     61 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     62 ; AVX1-NEXT:    retq
     63 ;
     64 ; AVX2-LABEL: shuffle_v8f32_00003000:
     65 ; AVX2:       # BB#0:
     66 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
     67 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     68 ; AVX2-NEXT:    retq
     69   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     70   ret <8 x float> %shuffle
     71 }
     72 
     73 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
     74 ; AVX1-LABEL: shuffle_v8f32_00040000:
     75 ; AVX1:       # BB#0:
     76 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
     77 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
     78 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
     79 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
     80 ; AVX1-NEXT:    retq
     81 ;
     82 ; AVX2-LABEL: shuffle_v8f32_00040000:
     83 ; AVX2:       # BB#0:
     84 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
     85 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     86 ; AVX2-NEXT:    retq
     87   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     88   ret <8 x float> %shuffle
     89 }
     90 
     91 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
     92 ; AVX1-LABEL: shuffle_v8f32_00500000:
     93 ; AVX1:       # BB#0:
     94 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     95 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
     96 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
     97 ; AVX1-NEXT:    retq
     98 ;
     99 ; AVX2-LABEL: shuffle_v8f32_00500000:
    100 ; AVX2:       # BB#0:
    101 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    102 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    103 ; AVX2-NEXT:    retq
    104   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    105   ret <8 x float> %shuffle
    106 }
    107 
    108 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
    109 ; AVX1-LABEL: shuffle_v8f32_06000000:
    110 ; AVX1:       # BB#0:
    111 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    112 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    113 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    114 ; AVX1-NEXT:    retq
    115 ;
    116 ; AVX2-LABEL: shuffle_v8f32_06000000:
    117 ; AVX2:       # BB#0:
    118 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    119 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    120 ; AVX2-NEXT:    retq
    121   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    122   ret <8 x float> %shuffle
    123 }
    124 
    125 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
    126 ; AVX1-LABEL: shuffle_v8f32_70000000:
    127 ; AVX1:       # BB#0:
    128 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    129 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    130 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
    131 ; AVX1-NEXT:    retq
    132 ;
    133 ; AVX2-LABEL: shuffle_v8f32_70000000:
    134 ; AVX2:       # BB#0:
    135 ; AVX2-NEXT:    movl $7, %eax
    136 ; AVX2-NEXT:    vmovd %eax, %xmm1
    137 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    138 ; AVX2-NEXT:    retq
    139   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    140   ret <8 x float> %shuffle
    141 }
    142 
    143 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
    144 ; ALL-LABEL: shuffle_v8f32_01014545:
    145 ; ALL:       # BB#0:
    146 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    147 ; ALL-NEXT:    retq
    148   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    149   ret <8 x float> %shuffle
    150 }
    151 
    152 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
    153 ; AVX1-LABEL: shuffle_v8f32_00112233:
    154 ; AVX1:       # BB#0:
    155 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
    156 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    157 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    158 ; AVX1-NEXT:    retq
    159 ;
    160 ; AVX2-LABEL: shuffle_v8f32_00112233:
    161 ; AVX2:       # BB#0:
    162 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
    163 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    164 ; AVX2-NEXT:    retq
    165   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    166   ret <8 x float> %shuffle
    167 }
    168 
    169 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
    170 ; AVX1-LABEL: shuffle_v8f32_00001111:
    171 ; AVX1:       # BB#0:
    172 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    173 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    174 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    175 ; AVX1-NEXT:    retq
    176 ;
    177 ; AVX2-LABEL: shuffle_v8f32_00001111:
    178 ; AVX2:       # BB#0:
    179 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
    180 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    181 ; AVX2-NEXT:    retq
    182   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
    183   ret <8 x float> %shuffle
    184 }
    185 
    186 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
    187 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
    188 ; ALL:       # BB#0:
    189 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
    190 ; ALL-NEXT:    retq
    191   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    192   ret <8 x float> %shuffle
    193 }
    194 
    195 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
    196 ; AVX1-LABEL: shuffle_v8f32_08080808:
    197 ; AVX1:       # BB#0:
    198 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    199 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    200 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    201 ; AVX1-NEXT:    retq
    202 ;
    203 ; AVX2-LABEL: shuffle_v8f32_08080808:
    204 ; AVX2:       # BB#0:
    205 ; AVX2-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],xmm1[0],zero,zero
    206 ; AVX2-NEXT:    vbroadcastsd %xmm0, %ymm0
    207 ; AVX2-NEXT:    retq
    208   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
    209   ret <8 x float> %shuffle
    210 }
    211 
    212 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
    213 ; ALL-LABEL: shuffle_v8f32_08084c4c:
    214 ; ALL:       # BB#0:
    215 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
    216 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
    217 ; ALL-NEXT:    retq
    218   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
    219   ret <8 x float> %shuffle
    220 }
    221 
    222 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
    223 ; ALL-LABEL: shuffle_v8f32_8823cc67:
    224 ; ALL:       # BB#0:
    225 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
    226 ; ALL-NEXT:    retq
    227   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
    228   ret <8 x float> %shuffle
    229 }
    230 
    231 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
    232 ; ALL-LABEL: shuffle_v8f32_9832dc76:
    233 ; ALL:       # BB#0:
    234 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
    235 ; ALL-NEXT:    retq
    236   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
    237   ret <8 x float> %shuffle
    238 }
    239 
    240 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
    241 ; ALL-LABEL: shuffle_v8f32_9810dc54:
    242 ; ALL:       # BB#0:
    243 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
    244 ; ALL-NEXT:    retq
    245   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
    246   ret <8 x float> %shuffle
    247 }
    248 
    249 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
    250 ; ALL-LABEL: shuffle_v8f32_08194c5d:
    251 ; ALL:       # BB#0:
    252 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    253 ; ALL-NEXT:    retq
    254   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
    255   ret <8 x float> %shuffle
    256 }
    257 
    258 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
    259 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
    260 ; ALL:       # BB#0:
    261 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
    262 ; ALL-NEXT:    retq
    263   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
    264   ret <8 x float> %shuffle
    265 }
    266 
    267 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
    268 ; AVX1-LABEL: shuffle_v8f32_08192a3b:
    269 ; AVX1:       # BB#0:
    270 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    271 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    272 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    273 ; AVX1-NEXT:    retq
    274 ;
    275 ; AVX2-LABEL: shuffle_v8f32_08192a3b:
    276 ; AVX2:       # BB#0:
    277 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
    278 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    279 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    280 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    281 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    282 ; AVX2-NEXT:    retq
    283   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    284   ret <8 x float> %shuffle
    285 }
    286 
    287 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
    288 ; AVX1-LABEL: shuffle_v8f32_08991abb:
    289 ; AVX1:       # BB#0:
    290 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
    291 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
    292 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
    293 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
    294 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    295 ; AVX1-NEXT:    retq
    296 ;
    297 ; AVX2-LABEL: shuffle_v8f32_08991abb:
    298 ; AVX2:       # BB#0:
    299 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    300 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    301 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
    302 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    303 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    304 ; AVX2-NEXT:    retq
    305   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
    306   ret <8 x float> %shuffle
    307 }
    308 
    309 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
    310 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
    311 ; AVX1:       # BB#0:
    312 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    313 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
    314 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    315 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    316 ; AVX1-NEXT:    retq
    317 ;
    318 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
    319 ; AVX2:       # BB#0:
    320 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    321 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    322 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    323 ; AVX2-NEXT:    retq
    324   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    325   ret <8 x float> %shuffle
    326 }
    327 
    328 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
    329 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
    330 ; AVX1:       # BB#0:
    331 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    332 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    333 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    334 ; AVX1-NEXT:    retq
    335 ;
    336 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
    337 ; AVX2:       # BB#0:
    338 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
    339 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    340 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    341 ; AVX2-NEXT:    retq
    342   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
    343   ret <8 x float> %shuffle
    344 }
    345 
    346 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
    347 ; ALL-LABEL: shuffle_v8f32_00014445:
    348 ; ALL:       # BB#0:
    349 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
    350 ; ALL-NEXT:    retq
    351   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
    352   ret <8 x float> %shuffle
    353 }
    354 
    355 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
    356 ; ALL-LABEL: shuffle_v8f32_00204464:
    357 ; ALL:       # BB#0:
    358 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
    359 ; ALL-NEXT:    retq
    360   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
    361   ret <8 x float> %shuffle
    362 }
    363 
    364 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
    365 ; ALL-LABEL: shuffle_v8f32_03004744:
    366 ; ALL:       # BB#0:
    367 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
    368 ; ALL-NEXT:    retq
    369   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
    370   ret <8 x float> %shuffle
    371 }
    372 
    373 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
    374 ; ALL-LABEL: shuffle_v8f32_10005444:
    375 ; ALL:       # BB#0:
    376 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
    377 ; ALL-NEXT:    retq
    378   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
    379   ret <8 x float> %shuffle
    380 }
    381 
    382 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
    383 ; ALL-LABEL: shuffle_v8f32_22006644:
    384 ; ALL:       # BB#0:
    385 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
    386 ; ALL-NEXT:    retq
    387   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
    388   ret <8 x float> %shuffle
    389 }
    390 
    391 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
    392 ; ALL-LABEL: shuffle_v8f32_33307774:
    393 ; ALL:       # BB#0:
    394 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
    395 ; ALL-NEXT:    retq
    396   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
    397   ret <8 x float> %shuffle
    398 }
    399 
    400 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
    401 ; ALL-LABEL: shuffle_v8f32_32107654:
    402 ; ALL:       # BB#0:
    403 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    404 ; ALL-NEXT:    retq
    405   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    406   ret <8 x float> %shuffle
    407 }
    408 
    409 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
    410 ; ALL-LABEL: shuffle_v8f32_00234467:
    411 ; ALL:       # BB#0:
    412 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
    413 ; ALL-NEXT:    retq
    414   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
    415   ret <8 x float> %shuffle
    416 }
    417 
    418 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
    419 ; ALL-LABEL: shuffle_v8f32_00224466:
    420 ; ALL:       # BB#0:
    421 ; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
    422 ; ALL-NEXT:    retq
    423   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    424   ret <8 x float> %shuffle
    425 }
    426 
    427 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
    428 ; ALL-LABEL: shuffle_v8f32_10325476:
    429 ; ALL:       # BB#0:
    430 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
    431 ; ALL-NEXT:    retq
    432   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    433   ret <8 x float> %shuffle
    434 }
    435 
    436 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
    437 ; ALL-LABEL: shuffle_v8f32_11335577:
    438 ; ALL:       # BB#0:
    439 ; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
    440 ; ALL-NEXT:    retq
    441   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    442   ret <8 x float> %shuffle
    443 }
    444 
    445 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
    446 ; ALL-LABEL: shuffle_v8f32_10235467:
    447 ; ALL:       # BB#0:
    448 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
    449 ; ALL-NEXT:    retq
    450   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    451   ret <8 x float> %shuffle
    452 }
    453 
    454 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
    455 ; ALL-LABEL: shuffle_v8f32_10225466:
    456 ; ALL:       # BB#0:
    457 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
    458 ; ALL-NEXT:    retq
    459   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
    460   ret <8 x float> %shuffle
    461 }
    462 
    463 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
    464 ; ALL-LABEL: shuffle_v8f32_00015444:
    465 ; ALL:       # BB#0:
    466 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
    467 ; ALL-NEXT:    retq
    468   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
    469   ret <8 x float> %shuffle
    470 }
    471 
    472 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
    473 ; ALL-LABEL: shuffle_v8f32_00204644:
    474 ; ALL:       # BB#0:
    475 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
    476 ; ALL-NEXT:    retq
    477   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
    478   ret <8 x float> %shuffle
    479 }
    480 
    481 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
    482 ; ALL-LABEL: shuffle_v8f32_03004474:
    483 ; ALL:       # BB#0:
    484 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
    485 ; ALL-NEXT:    retq
    486   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
    487   ret <8 x float> %shuffle
    488 }
    489 
    490 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
    491 ; ALL-LABEL: shuffle_v8f32_10004444:
    492 ; ALL:       # BB#0:
    493 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
    494 ; ALL-NEXT:    retq
    495   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    496   ret <8 x float> %shuffle
    497 }
    498 
    499 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
    500 ; ALL-LABEL: shuffle_v8f32_22006446:
    501 ; ALL:       # BB#0:
    502 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
    503 ; ALL-NEXT:    retq
    504   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
    505   ret <8 x float> %shuffle
    506 }
    507 
    508 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
    509 ; ALL-LABEL: shuffle_v8f32_33307474:
    510 ; ALL:       # BB#0:
    511 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
    512 ; ALL-NEXT:    retq
    513   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
    514   ret <8 x float> %shuffle
    515 }
    516 
    517 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
    518 ; ALL-LABEL: shuffle_v8f32_32104567:
    519 ; ALL:       # BB#0:
    520 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
    521 ; ALL-NEXT:    retq
    522   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
    523   ret <8 x float> %shuffle
    524 }
    525 
    526 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
    527 ; ALL-LABEL: shuffle_v8f32_00236744:
    528 ; ALL:       # BB#0:
    529 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
    530 ; ALL-NEXT:    retq
    531   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
    532   ret <8 x float> %shuffle
    533 }
    534 
    535 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
    536 ; ALL-LABEL: shuffle_v8f32_00226644:
    537 ; ALL:       # BB#0:
    538 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
    539 ; ALL-NEXT:    retq
    540   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
    541   ret <8 x float> %shuffle
    542 }
    543 
    544 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
    545 ; ALL-LABEL: shuffle_v8f32_10324567:
    546 ; ALL:       # BB#0:
    547 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
    548 ; ALL-NEXT:    retq
    549   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
    550   ret <8 x float> %shuffle
    551 }
    552 
    553 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
    554 ; ALL-LABEL: shuffle_v8f32_11334567:
    555 ; ALL:       # BB#0:
    556 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
    557 ; ALL-NEXT:    retq
    558   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
    559   ret <8 x float> %shuffle
    560 }
    561 
    562 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
    563 ; ALL-LABEL: shuffle_v8f32_01235467:
    564 ; ALL:       # BB#0:
    565 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
    566 ; ALL-NEXT:    retq
    567   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    568   ret <8 x float> %shuffle
    569 }
    570 
    571 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
    572 ; ALL-LABEL: shuffle_v8f32_01235466:
    573 ; ALL:       # BB#0:
    574 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
    575 ; ALL-NEXT:    retq
    576   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
    577   ret <8 x float> %shuffle
    578 }
    579 
    580 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
    581 ; ALL-LABEL: shuffle_v8f32_002u6u44:
    582 ; ALL:       # BB#0:
    583 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
    584 ; ALL-NEXT:    retq
    585   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
    586   ret <8 x float> %shuffle
    587 }
    588 
    589 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
    590 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
    591 ; ALL:       # BB#0:
    592 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
    593 ; ALL-NEXT:    retq
    594   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
    595   ret <8 x float> %shuffle
    596 }
    597 
    598 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
    599 ; ALL-LABEL: shuffle_v8f32_103245uu:
    600 ; ALL:       # BB#0:
    601 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
    602 ; ALL-NEXT:    retq
    603   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
    604   ret <8 x float> %shuffle
    605 }
    606 
    607 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
    608 ; ALL-LABEL: shuffle_v8f32_1133uu67:
    609 ; ALL:       # BB#0:
    610 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
    611 ; ALL-NEXT:    retq
    612   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
    613   ret <8 x float> %shuffle
    614 }
    615 
    616 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
    617 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
    618 ; ALL:       # BB#0:
    619 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
    620 ; ALL-NEXT:    retq
    621   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
    622   ret <8 x float> %shuffle
    623 }
    624 
    625 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
    626 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
    627 ; ALL:       # BB#0:
    628 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
    629 ; ALL-NEXT:    retq
    630   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
    631   ret <8 x float> %shuffle
    632 }
    633 
    634 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
    635 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
    636 ; AVX1:       # BB#0:
    637 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    638 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
    639 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
    640 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
    641 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm1 = ymm2[0],ymm1[1,2],ymm2[3]
    642 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    643 ; AVX1-NEXT:    retq
    644 ;
    645 ; AVX2-LABEL: shuffle_v8f32_c348cda0:
    646 ; AVX2:       # BB#0:
    647 ; AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
    648 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,1]
    649 ; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
    650 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,1]
    651 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    652 ; AVX2-NEXT:    retq
    653   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
    654   ret <8 x float> %shuffle
    655 }
    656 
    657 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
    658 ; AVX1-LABEL: shuffle_v8f32_f511235a:
    659 ; AVX1:       # BB#0:
    660 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
    661 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
    662 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    663 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm2 = ymm2[0,0,3,2]
    664 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
    665 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[2],ymm0[3]
    666 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    667 ; AVX1-NEXT:    retq
    668 ;
    669 ; AVX2-LABEL: shuffle_v8f32_f511235a:
    670 ; AVX2:       # BB#0:
    671 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,5,1,1,2,3,5,u>
    672 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    673 ; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
    674 ; AVX2-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
    675 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    676 ; AVX2-NEXT:    retq
    677   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
    678   ret <8 x float> %shuffle
    679 }
    680 
    681 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
    682 ; AVX1-LABEL: shuffle_v8f32_32103210:
    683 ; AVX1:       # BB#0:
    684 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    685 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    686 ; AVX1-NEXT:    retq
    687 ;
    688 ; AVX2-LABEL: shuffle_v8f32_32103210:
    689 ; AVX2:       # BB#0:
    690 ; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    691 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
    692 ; AVX2-NEXT:    retq
    693   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
    694   ret <8 x float> %shuffle
    695 }
    696 
    697 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
    698 ; ALL-LABEL: shuffle_v8f32_76547654:
    699 ; ALL:       # BB#0:
    700 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    701 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    702 ; ALL-NEXT:    retq
    703   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
    704   ret <8 x float> %shuffle
    705 }
    706 
    707 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
    708 ; ALL-LABEL: shuffle_v8f32_76543210:
    709 ; ALL:       # BB#0:
    710 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    711 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    712 ; ALL-NEXT:    retq
    713   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    714   ret <8 x float> %shuffle
    715 }
    716 
    717 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
    718 ; ALL-LABEL: shuffle_v8f32_3210ba98:
    719 ; ALL:       # BB#0:
    720 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    721 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    722 ; ALL-NEXT:    retq
    723   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
    724   ret <8 x float> %shuffle
    725 }
    726 
    727 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
    728 ; ALL-LABEL: shuffle_v8f32_3210fedc:
    729 ; ALL:       # BB#0:
    730 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    731 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    732 ; ALL-NEXT:    retq
    733   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
    734   ret <8 x float> %shuffle
    735 }
    736 
    737 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
    738 ; ALL-LABEL: shuffle_v8f32_7654fedc:
    739 ; ALL:       # BB#0:
    740 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    741 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    742 ; ALL-NEXT:    retq
    743   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
    744   ret <8 x float> %shuffle
    745 }
    746 
    747 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
    748 ; ALL-LABEL: shuffle_v8f32_fedc7654:
    749 ; ALL:       # BB#0:
    750 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
    751 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    752 ; ALL-NEXT:    retq
    753   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
    754   ret <8 x float> %shuffle
    755 }
    756 
    757 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
    758 ; AVX1-LABEL: PR21138:
    759 ; AVX1:       # BB#0:
    760 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    761 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
    762 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    763 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    764 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
    765 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    766 ; AVX1-NEXT:    retq
    767 ;
    768 ; AVX2-LABEL: PR21138:
    769 ; AVX2:       # BB#0:
    770 ; AVX2-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[1,3,1,3,5,7,5,7]
    771 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[0,1,0,3]
    772 ; AVX2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,3,1,3,5,7,5,7]
    773 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,3,2,3]
    774 ; AVX2-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
    775 ; AVX2-NEXT:    retq
    776   %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    777   ret <8 x float> %shuffle
    778 }
    779 
    780 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
    781 ; ALL-LABEL: shuffle_v8f32_ba987654:
    782 ; ALL:       # BB#0:
    783 ; ALL-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
    784 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    785 ; ALL-NEXT:    retq
    786   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    787   ret <8 x float> %shuffle
    788 }
    789 
    790 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
    791 ; ALL-LABEL: shuffle_v8f32_ba983210:
    792 ; ALL:       # BB#0:
    793 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    794 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    795 ; ALL-NEXT:    retq
    796   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
    797   ret <8 x float> %shuffle
    798 }
    799 
    800 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
    801 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
    802 ; ALL:       # BB#0:
    803 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
    804 ; ALL-NEXT:    retq
    805   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
    806   ret <8 x float> %shuffle
    807 }
    808 
    809 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
    810 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
    811 ; ALL:       # BB#0:
    812 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
    813 ; ALL-NEXT:    retq
    814   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
    815   ret <8 x float> %shuffle
    816 }
    817 
    818 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
    819 ; ALL-LABEL: shuffle_v8f32_uuuu1111:
    820 ; ALL:       # BB#0:
    821 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    822 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    823 ; ALL-NEXT:    retq
    824   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
    825   ret <8 x float> %shuffle
    826 }
    827 
    828 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
    829 ; AVX1-LABEL: shuffle_v8f32_44444444:
    830 ; AVX1:       # BB#0:
    831 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    832 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    833 ; AVX1-NEXT:    retq
    834 ;
    835 ; AVX2-LABEL: shuffle_v8f32_44444444:
    836 ; AVX2:       # BB#0:
    837 ; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
    838 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
    839 ; AVX2-NEXT:    retq
    840   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
    841   ret <8 x float> %shuffle
    842 }
    843 
    844 define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
    845 ; ALL-LABEL: shuffle_v8f32_1188uuuu:
    846 ; ALL:       # BB#0:
    847 ; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
    848 ; ALL-NEXT:    retq
    849   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
    850   ret <8 x float> %shuffle
    851 }
    852 
    853 define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
    854 ; ALL-LABEL: shuffle_v8f32_uuuu3210:
    855 ; ALL:       # BB#0:
    856 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    857 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    858 ; ALL-NEXT:    retq
    859   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
    860   ret <8 x float> %shuffle
    861 }
    862 
    863 define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
    864 ; ALL-LABEL: shuffle_v8f32_uuuu1188:
    865 ; ALL:       # BB#0:
    866 ; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
    867 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    868 ; ALL-NEXT:    retq
    869   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
    870   ret <8 x float> %shuffle
    871 }
    872 
    873 define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
    874 ; ALL-LABEL: shuffle_v8f32_1111uuuu:
    875 ; ALL:       # BB#0:
    876 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    877 ; ALL-NEXT:    retq
    878   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
    879   ret <8 x float> %shuffle
    880 }
    881 
    882 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
    883 ; ALL-LABEL: shuffle_v8f32_5555uuuu:
    884 ; ALL:       # BB#0:
    885 ; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
    886 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    887 ; ALL-NEXT:    retq
    888   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
    889   ret <8 x float> %shuffle
    890 }
    891 
    892 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
    893 ; AVX1-LABEL: shuffle_v8i32_00000000:
    894 ; AVX1:       # BB#0:
    895 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
    896 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    897 ; AVX1-NEXT:    retq
    898 ;
    899 ; AVX2-LABEL: shuffle_v8i32_00000000:
    900 ; AVX2:       # BB#0:
    901 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
    902 ; AVX2-NEXT:    retq
    903   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    904   ret <8 x i32> %shuffle
    905 }
    906 
    907 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
    908 ; AVX1-LABEL: shuffle_v8i32_00000010:
    909 ; AVX1:       # BB#0:
    910 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    911 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,0,1,0]
    912 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    913 ; AVX1-NEXT:    retq
    914 ;
    915 ; AVX2-LABEL: shuffle_v8i32_00000010:
    916 ; AVX2:       # BB#0:
    917 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,0,1,0]
    918 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    919 ; AVX2-NEXT:    retq
    920   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
    921   ret <8 x i32> %shuffle
    922 }
    923 
    924 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
    925 ; AVX1-LABEL: shuffle_v8i32_00000200:
    926 ; AVX1:       # BB#0:
    927 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    928 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[0,2,0,0]
    929 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    930 ; AVX1-NEXT:    retq
    931 ;
    932 ; AVX2-LABEL: shuffle_v8i32_00000200:
    933 ; AVX2:       # BB#0:
    934 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,0,2,0,0]
    935 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    936 ; AVX2-NEXT:    retq
    937   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
    938   ret <8 x i32> %shuffle
    939 }
    940 
    941 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
    942 ; AVX1-LABEL: shuffle_v8i32_00003000:
    943 ; AVX1:       # BB#0:
    944 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
    945 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,0,0,0]
    946 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    947 ; AVX1-NEXT:    retq
    948 ;
    949 ; AVX2-LABEL: shuffle_v8i32_00003000:
    950 ; AVX2:       # BB#0:
    951 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,3,0,0,0]
    952 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    953 ; AVX2-NEXT:    retq
    954   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
    955   ret <8 x i32> %shuffle
    956 }
    957 
    958 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
    959 ; AVX1-LABEL: shuffle_v8i32_00040000:
    960 ; AVX1:       # BB#0:
    961 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
    962 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    963 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
    964 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
    965 ; AVX1-NEXT:    retq
    966 ;
    967 ; AVX2-LABEL: shuffle_v8i32_00040000:
    968 ; AVX2:       # BB#0:
    969 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
    970 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    971 ; AVX2-NEXT:    retq
    972   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
    973   ret <8 x i32> %shuffle
    974 }
    975 
    976 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
    977 ; AVX1-LABEL: shuffle_v8i32_00500000:
    978 ; AVX1:       # BB#0:
    979 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    980 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
    981 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
    982 ; AVX1-NEXT:    retq
    983 ;
    984 ; AVX2-LABEL: shuffle_v8i32_00500000:
    985 ; AVX2:       # BB#0:
    986 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    987 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    988 ; AVX2-NEXT:    retq
    989   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    990   ret <8 x i32> %shuffle
    991 }
    992 
    993 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
    994 ; AVX1-LABEL: shuffle_v8i32_06000000:
    995 ; AVX1:       # BB#0:
    996 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    997 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
    998 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    999 ; AVX1-NEXT:    retq
   1000 ;
   1001 ; AVX2-LABEL: shuffle_v8i32_06000000:
   1002 ; AVX2:       # BB#0:
   1003 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
   1004 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1005 ; AVX2-NEXT:    retq
   1006   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1007   ret <8 x i32> %shuffle
   1008 }
   1009 
   1010 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
   1011 ; AVX1-LABEL: shuffle_v8i32_70000000:
   1012 ; AVX1:       # BB#0:
   1013 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   1014 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3]
   1015 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
   1016 ; AVX1-NEXT:    retq
   1017 ;
   1018 ; AVX2-LABEL: shuffle_v8i32_70000000:
   1019 ; AVX2:       # BB#0:
   1020 ; AVX2-NEXT:    movl $7, %eax
   1021 ; AVX2-NEXT:    vmovd %eax, %xmm1
   1022 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1023 ; AVX2-NEXT:    retq
   1024   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1025   ret <8 x i32> %shuffle
   1026 }
   1027 
   1028 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
   1029 ; AVX1-LABEL: shuffle_v8i32_01014545:
   1030 ; AVX1:       # BB#0:
   1031 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
   1032 ; AVX1-NEXT:    retq
   1033 ;
   1034 ; AVX2-LABEL: shuffle_v8i32_01014545:
   1035 ; AVX2:       # BB#0:
   1036 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1037 ; AVX2-NEXT:    retq
   1038   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   1039   ret <8 x i32> %shuffle
   1040 }
   1041 
   1042 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
   1043 ; AVX1-LABEL: shuffle_v8i32_00112233:
   1044 ; AVX1:       # BB#0:
   1045 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,1,1]
   1046 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1047 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1048 ; AVX1-NEXT:    retq
   1049 ;
   1050 ; AVX2-LABEL: shuffle_v8i32_00112233:
   1051 ; AVX2:       # BB#0:
   1052 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
   1053 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1054 ; AVX2-NEXT:    retq
   1055   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   1056   ret <8 x i32> %shuffle
   1057 }
   1058 
   1059 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
   1060 ; AVX1-LABEL: shuffle_v8i32_00001111:
   1061 ; AVX1:       # BB#0:
   1062 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1063 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1064 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1065 ; AVX1-NEXT:    retq
   1066 ;
   1067 ; AVX2-LABEL: shuffle_v8i32_00001111:
   1068 ; AVX2:       # BB#0:
   1069 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,0,1,1,1,1]
   1070 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1071 ; AVX2-NEXT:    retq
   1072   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   1073   ret <8 x i32> %shuffle
   1074 }
   1075 
   1076 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
   1077 ; AVX1-LABEL: shuffle_v8i32_81a3c5e7:
   1078 ; AVX1:       # BB#0:
   1079 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1080 ; AVX1-NEXT:    retq
   1081 ;
   1082 ; AVX2-LABEL: shuffle_v8i32_81a3c5e7:
   1083 ; AVX2:       # BB#0:
   1084 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1085 ; AVX2-NEXT:    retq
   1086   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   1087   ret <8 x i32> %shuffle
   1088 }
   1089 
   1090 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
   1091 ; AVX1-LABEL: shuffle_v8i32_08080808:
   1092 ; AVX1:       # BB#0:
   1093 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
   1094 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
   1095 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1096 ; AVX1-NEXT:    retq
   1097 ;
   1098 ; AVX2-LABEL: shuffle_v8i32_08080808:
   1099 ; AVX2:       # BB#0:
   1100 ; AVX2-NEXT:    vpunpckldq {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1101 ; AVX2-NEXT:    vpbroadcastq %xmm0, %ymm0
   1102 ; AVX2-NEXT:    retq
   1103   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   1104   ret <8 x i32> %shuffle
   1105 }
   1106 
   1107 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
   1108 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
   1109 ; AVX1:       # BB#0:
   1110 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
   1111 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1112 ; AVX1-NEXT:    retq
   1113 ;
   1114 ; AVX2-LABEL: shuffle_v8i32_08084c4c:
   1115 ; AVX2:       # BB#0:
   1116 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
   1117 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1118 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1119 ; AVX2-NEXT:    retq
   1120   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   1121   ret <8 x i32> %shuffle
   1122 }
   1123 
   1124 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
   1125 ; AVX1-LABEL: shuffle_v8i32_8823cc67:
   1126 ; AVX1:       # BB#0:
   1127 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
   1128 ; AVX1-NEXT:    retq
   1129 ;
   1130 ; AVX2-LABEL: shuffle_v8i32_8823cc67:
   1131 ; AVX2:       # BB#0:
   1132 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,3,4,4,6,7]
   1133 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1134 ; AVX2-NEXT:    retq
   1135   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   1136   ret <8 x i32> %shuffle
   1137 }
   1138 
   1139 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
   1140 ; AVX1-LABEL: shuffle_v8i32_9832dc76:
   1141 ; AVX1:       # BB#0:
   1142 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
   1143 ; AVX1-NEXT:    retq
   1144 ;
   1145 ; AVX2-LABEL: shuffle_v8i32_9832dc76:
   1146 ; AVX2:       # BB#0:
   1147 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1148 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1149 ; AVX2-NEXT:    retq
   1150   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   1151   ret <8 x i32> %shuffle
   1152 }
   1153 
   1154 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
   1155 ; AVX1-LABEL: shuffle_v8i32_9810dc54:
   1156 ; AVX1:       # BB#0:
   1157 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
   1158 ; AVX1-NEXT:    retq
   1159 ;
   1160 ; AVX2-LABEL: shuffle_v8i32_9810dc54:
   1161 ; AVX2:       # BB#0:
   1162 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,1,1,0,4,5,5,4]
   1163 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[1,0,2,3,5,4,6,7]
   1164 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3],ymm1[4,5],ymm0[6,7]
   1165 ; AVX2-NEXT:    retq
   1166   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   1167   ret <8 x i32> %shuffle
   1168 }
   1169 
   1170 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
   1171 ; AVX1-LABEL: shuffle_v8i32_08194c5d:
   1172 ; AVX1:       # BB#0:
   1173 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1174 ; AVX1-NEXT:    retq
   1175 ;
   1176 ; AVX2-LABEL: shuffle_v8i32_08194c5d:
   1177 ; AVX2:       # BB#0:
   1178 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1179 ; AVX2-NEXT:    retq
   1180   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   1181   ret <8 x i32> %shuffle
   1182 }
   1183 
   1184 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
   1185 ; AVX1-LABEL: shuffle_v8i32_2a3b6e7f:
   1186 ; AVX1:       # BB#0:
   1187 ; AVX1-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1188 ; AVX1-NEXT:    retq
   1189 ;
   1190 ; AVX2-LABEL: shuffle_v8i32_2a3b6e7f:
   1191 ; AVX2:       # BB#0:
   1192 ; AVX2-NEXT:    vpunpckhdq {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1193 ; AVX2-NEXT:    retq
   1194   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   1195   ret <8 x i32> %shuffle
   1196 }
   1197 
   1198 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
   1199 ; AVX1-LABEL: shuffle_v8i32_08192a3b:
   1200 ; AVX1:       # BB#0:
   1201 ; AVX1-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1202 ; AVX1-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1203 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1204 ; AVX1-NEXT:    retq
   1205 ;
   1206 ; AVX2-LABEL: shuffle_v8i32_08192a3b:
   1207 ; AVX2:       # BB#0:
   1208 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,u,1,u,2,u,3>
   1209 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1210 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1211 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1212 ; AVX2-NEXT:    retq
   1213   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1214   ret <8 x i32> %shuffle
   1215 }
   1216 
   1217 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
   1218 ; AVX1-LABEL: shuffle_v8i32_08991abb:
   1219 ; AVX1:       # BB#0:
   1220 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
   1221 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
   1222 ; AVX1-NEXT:    vblendpd {{.*#+}} xmm0 = xmm0[0],xmm1[1]
   1223 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1224 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1225 ; AVX1-NEXT:    retq
   1226 ;
   1227 ; AVX2-LABEL: shuffle_v8i32_08991abb:
   1228 ; AVX2:       # BB#0:
   1229 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1230 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1231 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
   1232 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1233 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1234 ; AVX2-NEXT:    retq
   1235   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   1236   ret <8 x i32> %shuffle
   1237 }
   1238 
   1239 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
   1240 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
   1241 ; AVX1:       # BB#0:
   1242 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[0,1,1,3]
   1243 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,1,3,3]
   1244 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1245 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1246 ; AVX1-NEXT:    retq
   1247 ;
   1248 ; AVX2-LABEL: shuffle_v8i32_091b2d3f:
   1249 ; AVX2:       # BB#0:
   1250 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1251 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1252 ; AVX2-NEXT:    retq
   1253   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1254   ret <8 x i32> %shuffle
   1255 }
   1256 
   1257 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
   1258 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
   1259 ; AVX1:       # BB#0:
   1260 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm2 = xmm0[1,1,3,3]
   1261 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1262 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1263 ; AVX1-NEXT:    retq
   1264 ;
   1265 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
   1266 ; AVX2:       # BB#0:
   1267 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <0,u,u,u,1,u,u,u>
   1268 ; AVX2-NEXT:    vpermd %ymm0, %ymm2, %ymm0
   1269 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1270 ; AVX2-NEXT:    retq
   1271   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1272   ret <8 x i32> %shuffle
   1273 }
   1274 
   1275 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
   1276 ; AVX1-LABEL: shuffle_v8i32_00014445:
   1277 ; AVX1:       # BB#0:
   1278 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1279 ; AVX1-NEXT:    retq
   1280 ;
   1281 ; AVX2-LABEL: shuffle_v8i32_00014445:
   1282 ; AVX2:       # BB#0:
   1283 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1284 ; AVX2-NEXT:    retq
   1285   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   1286   ret <8 x i32> %shuffle
   1287 }
   1288 
   1289 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
   1290 ; AVX1-LABEL: shuffle_v8i32_00204464:
   1291 ; AVX1:       # BB#0:
   1292 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1293 ; AVX1-NEXT:    retq
   1294 ;
   1295 ; AVX2-LABEL: shuffle_v8i32_00204464:
   1296 ; AVX2:       # BB#0:
   1297 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1298 ; AVX2-NEXT:    retq
   1299   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   1300   ret <8 x i32> %shuffle
   1301 }
   1302 
   1303 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
   1304 ; AVX1-LABEL: shuffle_v8i32_03004744:
   1305 ; AVX1:       # BB#0:
   1306 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1307 ; AVX1-NEXT:    retq
   1308 ;
   1309 ; AVX2-LABEL: shuffle_v8i32_03004744:
   1310 ; AVX2:       # BB#0:
   1311 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1312 ; AVX2-NEXT:    retq
   1313   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   1314   ret <8 x i32> %shuffle
   1315 }
   1316 
   1317 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
   1318 ; AVX1-LABEL: shuffle_v8i32_10005444:
   1319 ; AVX1:       # BB#0:
   1320 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1321 ; AVX1-NEXT:    retq
   1322 ;
   1323 ; AVX2-LABEL: shuffle_v8i32_10005444:
   1324 ; AVX2:       # BB#0:
   1325 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1326 ; AVX2-NEXT:    retq
   1327   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   1328   ret <8 x i32> %shuffle
   1329 }
   1330 
   1331 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
   1332 ; AVX1-LABEL: shuffle_v8i32_22006644:
   1333 ; AVX1:       # BB#0:
   1334 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1335 ; AVX1-NEXT:    retq
   1336 ;
   1337 ; AVX2-LABEL: shuffle_v8i32_22006644:
   1338 ; AVX2:       # BB#0:
   1339 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1340 ; AVX2-NEXT:    retq
   1341   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   1342   ret <8 x i32> %shuffle
   1343 }
   1344 
   1345 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
   1346 ; AVX1-LABEL: shuffle_v8i32_33307774:
   1347 ; AVX1:       # BB#0:
   1348 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1349 ; AVX1-NEXT:    retq
   1350 ;
   1351 ; AVX2-LABEL: shuffle_v8i32_33307774:
   1352 ; AVX2:       # BB#0:
   1353 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1354 ; AVX2-NEXT:    retq
   1355   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   1356   ret <8 x i32> %shuffle
   1357 }
   1358 
   1359 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
   1360 ; AVX1-LABEL: shuffle_v8i32_32107654:
   1361 ; AVX1:       # BB#0:
   1362 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1363 ; AVX1-NEXT:    retq
   1364 ;
   1365 ; AVX2-LABEL: shuffle_v8i32_32107654:
   1366 ; AVX2:       # BB#0:
   1367 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1368 ; AVX2-NEXT:    retq
   1369   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1370   ret <8 x i32> %shuffle
   1371 }
   1372 
   1373 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
   1374 ; AVX1-LABEL: shuffle_v8i32_00234467:
   1375 ; AVX1:       # BB#0:
   1376 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1377 ; AVX1-NEXT:    retq
   1378 ;
   1379 ; AVX2-LABEL: shuffle_v8i32_00234467:
   1380 ; AVX2:       # BB#0:
   1381 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1382 ; AVX2-NEXT:    retq
   1383   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   1384   ret <8 x i32> %shuffle
   1385 }
   1386 
   1387 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
   1388 ; AVX1-LABEL: shuffle_v8i32_00224466:
   1389 ; AVX1:       # BB#0:
   1390 ; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1391 ; AVX1-NEXT:    retq
   1392 ;
   1393 ; AVX2-LABEL: shuffle_v8i32_00224466:
   1394 ; AVX2:       # BB#0:
   1395 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1396 ; AVX2-NEXT:    retq
   1397   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   1398   ret <8 x i32> %shuffle
   1399 }
   1400 
   1401 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
   1402 ; AVX1-LABEL: shuffle_v8i32_10325476:
   1403 ; AVX1:       # BB#0:
   1404 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1405 ; AVX1-NEXT:    retq
   1406 ;
   1407 ; AVX2-LABEL: shuffle_v8i32_10325476:
   1408 ; AVX2:       # BB#0:
   1409 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1410 ; AVX2-NEXT:    retq
   1411   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1412   ret <8 x i32> %shuffle
   1413 }
   1414 
   1415 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
   1416 ; AVX1-LABEL: shuffle_v8i32_11335577:
   1417 ; AVX1:       # BB#0:
   1418 ; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1419 ; AVX1-NEXT:    retq
   1420 ;
   1421 ; AVX2-LABEL: shuffle_v8i32_11335577:
   1422 ; AVX2:       # BB#0:
   1423 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1424 ; AVX2-NEXT:    retq
   1425   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   1426   ret <8 x i32> %shuffle
   1427 }
   1428 
   1429 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
   1430 ; AVX1-LABEL: shuffle_v8i32_10235467:
   1431 ; AVX1:       # BB#0:
   1432 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1433 ; AVX1-NEXT:    retq
   1434 ;
   1435 ; AVX2-LABEL: shuffle_v8i32_10235467:
   1436 ; AVX2:       # BB#0:
   1437 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1438 ; AVX2-NEXT:    retq
   1439   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1440   ret <8 x i32> %shuffle
   1441 }
   1442 
   1443 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
   1444 ; AVX1-LABEL: shuffle_v8i32_10225466:
   1445 ; AVX1:       # BB#0:
   1446 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1447 ; AVX1-NEXT:    retq
   1448 ;
   1449 ; AVX2-LABEL: shuffle_v8i32_10225466:
   1450 ; AVX2:       # BB#0:
   1451 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1452 ; AVX2-NEXT:    retq
   1453   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   1454   ret <8 x i32> %shuffle
   1455 }
   1456 
   1457 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
   1458 ; AVX1-LABEL: shuffle_v8i32_00015444:
   1459 ; AVX1:       # BB#0:
   1460 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
   1461 ; AVX1-NEXT:    retq
   1462 ;
   1463 ; AVX2-LABEL: shuffle_v8i32_00015444:
   1464 ; AVX2:       # BB#0:
   1465 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
   1466 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1467 ; AVX2-NEXT:    retq
   1468   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   1469   ret <8 x i32> %shuffle
   1470 }
   1471 
   1472 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
   1473 ; AVX1-LABEL: shuffle_v8i32_00204644:
   1474 ; AVX1:       # BB#0:
   1475 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
   1476 ; AVX1-NEXT:    retq
   1477 ;
   1478 ; AVX2-LABEL: shuffle_v8i32_00204644:
   1479 ; AVX2:       # BB#0:
   1480 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
   1481 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1482 ; AVX2-NEXT:    retq
   1483   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   1484   ret <8 x i32> %shuffle
   1485 }
   1486 
   1487 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
   1488 ; AVX1-LABEL: shuffle_v8i32_03004474:
   1489 ; AVX1:       # BB#0:
   1490 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
   1491 ; AVX1-NEXT:    retq
   1492 ;
   1493 ; AVX2-LABEL: shuffle_v8i32_03004474:
   1494 ; AVX2:       # BB#0:
   1495 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
   1496 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1497 ; AVX2-NEXT:    retq
   1498   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   1499   ret <8 x i32> %shuffle
   1500 }
   1501 
   1502 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
   1503 ; AVX1-LABEL: shuffle_v8i32_10004444:
   1504 ; AVX1:       # BB#0:
   1505 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
   1506 ; AVX1-NEXT:    retq
   1507 ;
   1508 ; AVX2-LABEL: shuffle_v8i32_10004444:
   1509 ; AVX2:       # BB#0:
   1510 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
   1511 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1512 ; AVX2-NEXT:    retq
   1513   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   1514   ret <8 x i32> %shuffle
   1515 }
   1516 
   1517 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
   1518 ; AVX1-LABEL: shuffle_v8i32_22006446:
   1519 ; AVX1:       # BB#0:
   1520 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
   1521 ; AVX1-NEXT:    retq
   1522 ;
   1523 ; AVX2-LABEL: shuffle_v8i32_22006446:
   1524 ; AVX2:       # BB#0:
   1525 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
   1526 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1527 ; AVX2-NEXT:    retq
   1528   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   1529   ret <8 x i32> %shuffle
   1530 }
   1531 
   1532 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
   1533 ; AVX1-LABEL: shuffle_v8i32_33307474:
   1534 ; AVX1:       # BB#0:
   1535 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
   1536 ; AVX1-NEXT:    retq
   1537 ;
   1538 ; AVX2-LABEL: shuffle_v8i32_33307474:
   1539 ; AVX2:       # BB#0:
   1540 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
   1541 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1542 ; AVX2-NEXT:    retq
   1543   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   1544   ret <8 x i32> %shuffle
   1545 }
   1546 
   1547 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
   1548 ; AVX1-LABEL: shuffle_v8i32_32104567:
   1549 ; AVX1:       # BB#0:
   1550 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
   1551 ; AVX1-NEXT:    retq
   1552 ;
   1553 ; AVX2-LABEL: shuffle_v8i32_32104567:
   1554 ; AVX2:       # BB#0:
   1555 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
   1556 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1557 ; AVX2-NEXT:    retq
   1558   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   1559   ret <8 x i32> %shuffle
   1560 }
   1561 
   1562 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
   1563 ; AVX1-LABEL: shuffle_v8i32_00236744:
   1564 ; AVX1:       # BB#0:
   1565 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
   1566 ; AVX1-NEXT:    retq
   1567 ;
   1568 ; AVX2-LABEL: shuffle_v8i32_00236744:
   1569 ; AVX2:       # BB#0:
   1570 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
   1571 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1572 ; AVX2-NEXT:    retq
   1573   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   1574   ret <8 x i32> %shuffle
   1575 }
   1576 
   1577 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
   1578 ; AVX1-LABEL: shuffle_v8i32_00226644:
   1579 ; AVX1:       # BB#0:
   1580 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
   1581 ; AVX1-NEXT:    retq
   1582 ;
   1583 ; AVX2-LABEL: shuffle_v8i32_00226644:
   1584 ; AVX2:       # BB#0:
   1585 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
   1586 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1587 ; AVX2-NEXT:    retq
   1588   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   1589   ret <8 x i32> %shuffle
   1590 }
   1591 
   1592 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
   1593 ; AVX1-LABEL: shuffle_v8i32_10324567:
   1594 ; AVX1:       # BB#0:
   1595 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
   1596 ; AVX1-NEXT:    retq
   1597 ;
   1598 ; AVX2-LABEL: shuffle_v8i32_10324567:
   1599 ; AVX2:       # BB#0:
   1600 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
   1601 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1602 ; AVX2-NEXT:    retq
   1603   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   1604   ret <8 x i32> %shuffle
   1605 }
   1606 
   1607 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
   1608 ; AVX1-LABEL: shuffle_v8i32_11334567:
   1609 ; AVX1:       # BB#0:
   1610 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
   1611 ; AVX1-NEXT:    retq
   1612 ;
   1613 ; AVX2-LABEL: shuffle_v8i32_11334567:
   1614 ; AVX2:       # BB#0:
   1615 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
   1616 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1617 ; AVX2-NEXT:    retq
   1618   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   1619   ret <8 x i32> %shuffle
   1620 }
   1621 
   1622 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
   1623 ; AVX1-LABEL: shuffle_v8i32_01235467:
   1624 ; AVX1:       # BB#0:
   1625 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
   1626 ; AVX1-NEXT:    retq
   1627 ;
   1628 ; AVX2-LABEL: shuffle_v8i32_01235467:
   1629 ; AVX2:       # BB#0:
   1630 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
   1631 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1632 ; AVX2-NEXT:    retq
   1633   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1634   ret <8 x i32> %shuffle
   1635 }
   1636 
   1637 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
   1638 ; AVX1-LABEL: shuffle_v8i32_01235466:
   1639 ; AVX1:       # BB#0:
   1640 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
   1641 ; AVX1-NEXT:    retq
   1642 ;
   1643 ; AVX2-LABEL: shuffle_v8i32_01235466:
   1644 ; AVX2:       # BB#0:
   1645 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
   1646 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1647 ; AVX2-NEXT:    retq
   1648   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   1649   ret <8 x i32> %shuffle
   1650 }
   1651 
   1652 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
   1653 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
   1654 ; AVX1:       # BB#0:
   1655 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
   1656 ; AVX1-NEXT:    retq
   1657 ;
   1658 ; AVX2-LABEL: shuffle_v8i32_002u6u44:
   1659 ; AVX2:       # BB#0:
   1660 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
   1661 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1662 ; AVX2-NEXT:    retq
   1663   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   1664   ret <8 x i32> %shuffle
   1665 }
   1666 
   1667 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
   1668 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
   1669 ; AVX1:       # BB#0:
   1670 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
   1671 ; AVX1-NEXT:    retq
   1672 ;
   1673 ; AVX2-LABEL: shuffle_v8i32_00uu66uu:
   1674 ; AVX2:       # BB#0:
   1675 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
   1676 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1677 ; AVX2-NEXT:    retq
   1678   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   1679   ret <8 x i32> %shuffle
   1680 }
   1681 
   1682 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
   1683 ; AVX1-LABEL: shuffle_v8i32_103245uu:
   1684 ; AVX1:       # BB#0:
   1685 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
   1686 ; AVX1-NEXT:    retq
   1687 ;
   1688 ; AVX2-LABEL: shuffle_v8i32_103245uu:
   1689 ; AVX2:       # BB#0:
   1690 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
   1691 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1692 ; AVX2-NEXT:    retq
   1693   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   1694   ret <8 x i32> %shuffle
   1695 }
   1696 
   1697 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
   1698 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
   1699 ; AVX1:       # BB#0:
   1700 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
   1701 ; AVX1-NEXT:    retq
   1702 ;
   1703 ; AVX2-LABEL: shuffle_v8i32_1133uu67:
   1704 ; AVX2:       # BB#0:
   1705 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
   1706 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1707 ; AVX2-NEXT:    retq
   1708   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   1709   ret <8 x i32> %shuffle
   1710 }
   1711 
   1712 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
   1713 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
   1714 ; AVX1:       # BB#0:
   1715 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
   1716 ; AVX1-NEXT:    retq
   1717 ;
   1718 ; AVX2-LABEL: shuffle_v8i32_0uu354uu:
   1719 ; AVX2:       # BB#0:
   1720 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
   1721 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1722 ; AVX2-NEXT:    retq
   1723   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   1724   ret <8 x i32> %shuffle
   1725 }
   1726 
   1727 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
   1728 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
   1729 ; AVX1:       # BB#0:
   1730 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
   1731 ; AVX1-NEXT:    retq
   1732 ;
   1733 ; AVX2-LABEL: shuffle_v8i32_uuu3uu66:
   1734 ; AVX2:       # BB#0:
   1735 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
   1736 ; AVX2-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1737 ; AVX2-NEXT:    retq
   1738   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   1739   ret <8 x i32> %shuffle
   1740 }
   1741 
   1742 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
   1743 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
   1744 ; AVX1:       # BB#0:
   1745 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
   1746 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1747 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
   1748 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
   1749 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1750 ; AVX1-NEXT:    retq
   1751 ;
   1752 ; AVX2-LABEL: shuffle_v8i32_6caa87e5:
   1753 ; AVX2:       # BB#0:
   1754 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[3,1,3,2]
   1755 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
   1756 ; AVX2-NEXT:    vpermq {{.*#+}} ymm1 = ymm1[2,1,0,3]
   1757 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1758 ; AVX2-NEXT:    retq
   1759   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   1760   ret <8 x i32> %shuffle
   1761 }
   1762 
   1763 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
   1764 ; AVX1-LABEL: shuffle_v8i32_32103210:
   1765 ; AVX1:       # BB#0:
   1766 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1767 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1768 ; AVX1-NEXT:    retq
   1769 ;
   1770 ; AVX2-LABEL: shuffle_v8i32_32103210:
   1771 ; AVX2:       # BB#0:
   1772 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1773 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,0,1]
   1774 ; AVX2-NEXT:    retq
   1775   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
   1776   ret <8 x i32> %shuffle
   1777 }
   1778 
   1779 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
   1780 ; AVX1-LABEL: shuffle_v8i32_76547654:
   1781 ; AVX1:       # BB#0:
   1782 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1783 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1784 ; AVX1-NEXT:    retq
   1785 ;
   1786 ; AVX2-LABEL: shuffle_v8i32_76547654:
   1787 ; AVX2:       # BB#0:
   1788 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1789 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1790 ; AVX2-NEXT:    retq
   1791   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
   1792   ret <8 x i32> %shuffle
   1793 }
   1794 
   1795 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
   1796 ; AVX1-LABEL: shuffle_v8i32_76543210:
   1797 ; AVX1:       # BB#0:
   1798 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1799 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1800 ; AVX1-NEXT:    retq
   1801 ;
   1802 ; AVX2-LABEL: shuffle_v8i32_76543210:
   1803 ; AVX2:       # BB#0:
   1804 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1805 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1806 ; AVX2-NEXT:    retq
   1807   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   1808   ret <8 x i32> %shuffle
   1809 }
   1810 
   1811 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
   1812 ; AVX1-LABEL: shuffle_v8i32_3210ba98:
   1813 ; AVX1:       # BB#0:
   1814 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1815 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1816 ; AVX1-NEXT:    retq
   1817 ;
   1818 ; AVX2-LABEL: shuffle_v8i32_3210ba98:
   1819 ; AVX2:       # BB#0:
   1820 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
   1821 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1822 ; AVX2-NEXT:    retq
   1823   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
   1824   ret <8 x i32> %shuffle
   1825 }
   1826 
   1827 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
   1828 ; AVX1-LABEL: shuffle_v8i32_3210fedc:
   1829 ; AVX1:       # BB#0:
   1830 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   1831 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1832 ; AVX1-NEXT:    retq
   1833 ;
   1834 ; AVX2-LABEL: shuffle_v8i32_3210fedc:
   1835 ; AVX2:       # BB#0:
   1836 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1837 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1838 ; AVX2-NEXT:    retq
   1839   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
   1840   ret <8 x i32> %shuffle
   1841 }
   1842 
   1843 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
   1844 ; AVX1-LABEL: shuffle_v8i32_7654fedc:
   1845 ; AVX1:       # BB#0:
   1846 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1847 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1848 ; AVX1-NEXT:    retq
   1849 ;
   1850 ; AVX2-LABEL: shuffle_v8i32_7654fedc:
   1851 ; AVX2:       # BB#0:
   1852 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   1853 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1854 ; AVX2-NEXT:    retq
   1855   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
   1856   ret <8 x i32> %shuffle
   1857 }
   1858 
   1859 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
   1860 ; AVX1-LABEL: shuffle_v8i32_fedc7654:
   1861 ; AVX1:       # BB#0:
   1862 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1863 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1864 ; AVX1-NEXT:    retq
   1865 ;
   1866 ; AVX2-LABEL: shuffle_v8i32_fedc7654:
   1867 ; AVX2:       # BB#0:
   1868 ; AVX2-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   1869 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1870 ; AVX2-NEXT:    retq
   1871   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
   1872   ret <8 x i32> %shuffle
   1873 }
   1874 
   1875 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
   1876 ; AVX1-LABEL: shuffle_v8i32_ba987654:
   1877 ; AVX1:       # BB#0:
   1878 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1879 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1880 ; AVX1-NEXT:    retq
   1881 ;
   1882 ; AVX2-LABEL: shuffle_v8i32_ba987654:
   1883 ; AVX2:       # BB#0:
   1884 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1885 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1886 ; AVX2-NEXT:    retq
   1887   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1888   ret <8 x i32> %shuffle
   1889 }
   1890 
   1891 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
   1892 ; AVX1-LABEL: shuffle_v8i32_ba983210:
   1893 ; AVX1:       # BB#0:
   1894 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,3]
   1895 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1896 ; AVX1-NEXT:    retq
   1897 ;
   1898 ; AVX2-LABEL: shuffle_v8i32_ba983210:
   1899 ; AVX2:       # BB#0:
   1900 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   1901 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1902 ; AVX2-NEXT:    retq
   1903   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   1904   ret <8 x i32> %shuffle
   1905 }
   1906 
   1907 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
   1908 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
   1909 ; AVX1:       # BB#0:
   1910 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   1911 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
   1912 ; AVX1-NEXT:    retq
   1913 ;
   1914 ; AVX2-LABEL: shuffle_v8i32_zuu8zuuc:
   1915 ; AVX2:       # BB#0:
   1916 ; AVX2-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
   1917 ; AVX2-NEXT:    retq
   1918   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
   1919   ret <8 x i32> %shuffle
   1920 }
   1921 
   1922 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
   1923 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
   1924 ; AVX1:       # BB#0:
   1925 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   1926 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
   1927 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   1928 ; AVX1-NEXT:    retq
   1929 ;
   1930 ; AVX2-LABEL: shuffle_v8i32_9ubzdefz:
   1931 ; AVX2:       # BB#0:
   1932 ; AVX2-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
   1933 ; AVX2-NEXT:    retq
   1934   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
   1935   ret <8 x i32> %shuffle
   1936 }
   1937 
   1938 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
   1939 ; AVX1-LABEL: shuffle_v8i32_80u1b4uu:
   1940 ; AVX1:       # BB#0:
   1941 ; AVX1-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1942 ; AVX1-NEXT:    retq
   1943 ;
   1944 ; AVX2-LABEL: shuffle_v8i32_80u1b4uu:
   1945 ; AVX2:       # BB#0:
   1946 ; AVX2-NEXT:    vpunpckldq {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   1947 ; AVX2-NEXT:    retq
   1948   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
   1949   ret <8 x i32> %shuffle
   1950 }
   1951 
   1952 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
   1953 ; AVX1-LABEL: shuffle_v8i32_uuuu1111:
   1954 ; AVX1:       # BB#0:
   1955 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1956 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1957 ; AVX1-NEXT:    retq
   1958 ;
   1959 ; AVX2-LABEL: shuffle_v8i32_uuuu1111:
   1960 ; AVX2:       # BB#0:
   1961 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1962 ; AVX2-NEXT:    vinserti128 $1, %xmm0, %ymm0, %ymm0
   1963 ; AVX2-NEXT:    retq
   1964   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
   1965   ret <8 x i32> %shuffle
   1966 }
   1967 
   1968 define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
   1969 ; ALL-LABEL: shuffle_v8i32_2222uuuu:
   1970 ; ALL:       # BB#0:
   1971 ; ALL-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[2,2,2,2]
   1972 ; ALL-NEXT:    retq
   1973   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
   1974   ret <8 x i32> %shuffle
   1975 }
   1976 
   1977 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
   1978 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
   1979 ; ALL:       # BB#0:
   1980 ; ALL-NEXT:    vpunpckhdq {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1981 ; ALL-NEXT:    retq
   1982   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   1983   ret <8 x i32> %shuffle
   1984 }
   1985 
   1986 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
   1987 ; AVX1-LABEL: shuffle_v8i32_44444444:
   1988 ; AVX1:       # BB#0:
   1989 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   1990 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1991 ; AVX1-NEXT:    retq
   1992 ;
   1993 ; AVX2-LABEL: shuffle_v8i32_44444444:
   1994 ; AVX2:       # BB#0:
   1995 ; AVX2-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1996 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
   1997 ; AVX2-NEXT:    retq
   1998   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   1999   ret <8 x i32> %shuffle
   2000 }
   2001 
   2002 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
   2003 ; AVX1-LABEL: shuffle_v8i32_5555uuuu:
   2004 ; AVX1:       # BB#0:
   2005 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2006 ; AVX1-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
   2007 ; AVX1-NEXT:    retq
   2008 ;
   2009 ; AVX2-LABEL: shuffle_v8i32_5555uuuu:
   2010 ; AVX2:       # BB#0:
   2011 ; AVX2-NEXT:    vextracti128 $1, %ymm0, %xmm0
   2012 ; AVX2-NEXT:    vpshufd {{.*#+}} xmm0 = xmm0[1,1,1,1]
   2013 ; AVX2-NEXT:    retq
   2014   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   2015   ret <8 x i32> %shuffle
   2016 }
   2017 
   2018 define <8 x float> @splat_mem_v8f32_2(float* %p) {
   2019 ; ALL-LABEL: splat_mem_v8f32_2:
   2020 ; ALL:       # BB#0:
   2021 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2022 ; ALL-NEXT:    retq
   2023   %1 = load float, float* %p
   2024   %2 = insertelement <4 x float> undef, float %1, i32 0
   2025   %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
   2026   ret <8 x float> %3
   2027 }
   2028 
   2029 define <8 x float> @splat_v8f32(<4 x float> %r) {
   2030 ; AVX1-LABEL: splat_v8f32:
   2031 ; AVX1:       # BB#0:
   2032 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2033 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2034 ; AVX1-NEXT:    retq
   2035 ;
   2036 ; AVX2-LABEL: splat_v8f32:
   2037 ; AVX2:       # BB#0:
   2038 ; AVX2-NEXT:    vbroadcastss %xmm0, %ymm0
   2039 ; AVX2-NEXT:    retq
   2040   %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
   2041   ret <8 x float> %1
   2042 }
   2043 
   2044 ;
   2045 ; Shuffle to logical bit shifts
   2046 ;
   2047 
   2048 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
   2049 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
   2050 ; AVX1:       # BB#0:
   2051 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   2052 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2053 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
   2054 ; AVX1-NEXT:    retq
   2055 ;
   2056 ; AVX2-LABEL: shuffle_v8i32_z0U2zUz6:
   2057 ; AVX2:       # BB#0:
   2058 ; AVX2-NEXT:    vpsllq $32, %ymm0, %ymm0
   2059 ; AVX2-NEXT:    retq
   2060   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
   2061   ret <8 x i32> %shuffle
   2062 }
   2063 
   2064 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
   2065 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
   2066 ; AVX1:       # BB#0:
   2067 ; AVX1-NEXT:    vxorps %ymm1, %ymm1, %ymm1
   2068 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2069 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   2070 ; AVX1-NEXT:    retq
   2071 ;
   2072 ; AVX2-LABEL: shuffle_v8i32_1U3z5zUU:
   2073 ; AVX2:       # BB#0:
   2074 ; AVX2-NEXT:    vpsrlq $32, %ymm0, %ymm0
   2075 ; AVX2-NEXT:    retq
   2076   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
   2077   ret <8 x i32> %shuffle
   2078 }
   2079 
   2080 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
   2081 ; AVX1-LABEL: shuffle_v8i32_B012F456:
   2082 ; AVX1:       # BB#0:
   2083 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
   2084 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
   2085 ; AVX1-NEXT:    retq
   2086 ;
   2087 ; AVX2-LABEL: shuffle_v8i32_B012F456:
   2088 ; AVX2:       # BB#0:
   2089 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
   2090 ; AVX2-NEXT:    retq
   2091   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
   2092   ret <8 x i32> %shuffle
   2093 }
   2094 
   2095 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
   2096 ; AVX1-LABEL: shuffle_v8i32_1238567C:
   2097 ; AVX1:       # BB#0:
   2098 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   2099 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2100 ; AVX1-NEXT:    retq
   2101 ;
   2102 ; AVX2-LABEL: shuffle_v8i32_1238567C:
   2103 ; AVX2:       # BB#0:
   2104 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
   2105 ; AVX2-NEXT:    retq
   2106   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   2107   ret <8 x i32> %shuffle
   2108 }
   2109 
   2110 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
   2111 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
   2112 ; AVX1:       # BB#0:
   2113 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
   2114 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
   2115 ; AVX1-NEXT:    retq
   2116 ;
   2117 ; AVX2-LABEL: shuffle_v8i32_9AB0DEF4:
   2118 ; AVX2:       # BB#0:
   2119 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
   2120 ; AVX2-NEXT:    retq
   2121   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
   2122   ret <8 x i32> %shuffle
   2123 }
   2124 
   2125 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
   2126 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
   2127 ; AVX1:       # BB#0:
   2128 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
   2129 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
   2130 ; AVX1-NEXT:    retq
   2131 ;
   2132 ; AVX2-LABEL: shuffle_v8i32_389A7CDE:
   2133 ; AVX2:       # BB#0:
   2134 ; AVX2-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
   2135 ; AVX2-NEXT:    retq
   2136   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
   2137   ret <8 x i32> %shuffle
   2138 }
   2139 
   2140 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
   2141 ; AVX1-LABEL: shuffle_v8i32_30127456:
   2142 ; AVX1:       # BB#0:
   2143 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2144 ; AVX1-NEXT:    retq
   2145 ;
   2146 ; AVX2-LABEL: shuffle_v8i32_30127456:
   2147 ; AVX2:       # BB#0:
   2148 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2149 ; AVX2-NEXT:    retq
   2150   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   2151   ret <8 x i32> %shuffle
   2152 }
   2153 
   2154 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
   2155 ; AVX1-LABEL: shuffle_v8i32_12305674:
   2156 ; AVX1:       # BB#0:
   2157 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2158 ; AVX1-NEXT:    retq
   2159 ;
   2160 ; AVX2-LABEL: shuffle_v8i32_12305674:
   2161 ; AVX2:       # BB#0:
   2162 ; AVX2-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2163 ; AVX2-NEXT:    retq
   2164   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   2165   ret <8 x i32> %shuffle
   2166 }
   2167 
   2168 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2169 ; ALL-LABEL: concat_v2f32_1:
   2170 ; ALL:       # BB#0: # %entry
   2171 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2172 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2173 ; ALL-NEXT:    retq
   2174 entry:
   2175   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2176   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2177   %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2178   %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2179   %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
   2180   ret <8 x float> %tmp76
   2181 }
   2182 
   2183 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2184 ; ALL-LABEL: concat_v2f32_2:
   2185 ; ALL:       # BB#0: # %entry
   2186 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2187 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2188 ; ALL-NEXT:    retq
   2189 entry:
   2190   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2191   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2192   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2193   ret <8 x float> %tmp76
   2194 }
   2195 
   2196 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2197 ; ALL-LABEL: concat_v2f32_3:
   2198 ; ALL:       # BB#0: # %entry
   2199 ; ALL-NEXT:    vmovq {{.*#+}} xmm0 = mem[0],zero
   2200 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2201 ; ALL-NEXT:    retq
   2202 entry:
   2203   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2204   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2205   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2206   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2207   ret <8 x float> %res
   2208 }
   2209 
   2210 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
   2211 ; ALL-LABEL: insert_mem_and_zero_v8i32:
   2212 ; ALL:       # BB#0:
   2213 ; ALL-NEXT:    vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2214 ; ALL-NEXT:    retq
   2215   %a = load i32, i32* %ptr
   2216   %v = insertelement <8 x i32> undef, i32 %a, i32 0
   2217   %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2218   ret <8 x i32> %shuffle
   2219 }
   2220 
   2221 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
   2222 ; AVX1-LABEL: concat_v8i32_0123CDEF:
   2223 ; AVX1:       # BB#0:
   2224 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3]
   2225 ; AVX1-NEXT:    retq
   2226 ;
   2227 ; AVX2-LABEL: concat_v8i32_0123CDEF:
   2228 ; AVX2:       # BB#0:
   2229 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   2230 ; AVX2-NEXT:    retq
   2231   %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2232   %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2233   %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2234   ret <8 x i32> %shuf
   2235 }
   2236 
   2237 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
   2238 ; ALL-LABEL: concat_v8i32_4567CDEF_bc:
   2239 ; ALL:       # BB#0:
   2240 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2241 ; ALL-NEXT:    retq
   2242   %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2243   %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   2244   %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
   2245   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2246   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2247   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
   2248   ret <8 x i32> %shuffle32
   2249 }
   2250 
   2251 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
   2252 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
   2253 ; ALL:       # BB#0:
   2254 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2255 ; ALL-NEXT:    retq
   2256   %a0 = bitcast <8 x float> %f0 to <4 x i64>
   2257   %a1 = bitcast <8 x float> %f1 to <8 x i32>
   2258   %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   2259   %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2260   %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
   2261   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2262   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2263   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
   2264   ret <8 x float> %shuffle32
   2265 }
   2266 
   2267 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
   2268 ; ALL-LABEL: insert_dup_mem_v8i32:
   2269 ; ALL:       # BB#0:
   2270 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2271 ; ALL-NEXT:    retq
   2272   %tmp = load i32, i32* %ptr, align 4
   2273   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2274   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
   2275   ret <8 x i32> %tmp2
   2276 }
   2277