Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-SLOW
      4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX1OR2 --check-prefix=AVX2OR512VL --check-prefix=AVX2 --check-prefix=AVX2-FAST
      5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-SLOW
      6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vl,+avx512dq,+fast-variable-shuffle | FileCheck %s --check-prefix=ALL --check-prefix=AVX2OR512VL --check-prefix=AVX512VL --check-prefix=AVX512VL-FAST
      7 
      8 define <8 x float> @shuffle_v8f32_00000000(<8 x float> %a, <8 x float> %b) {
      9 ; AVX1-LABEL: shuffle_v8f32_00000000:
     10 ; AVX1:       # %bb.0:
     11 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
     12 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
     13 ; AVX1-NEXT:    retq
     14 ;
     15 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000000:
     16 ; AVX2OR512VL:       # %bb.0:
     17 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
     18 ; AVX2OR512VL-NEXT:    retq
     19   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     20   ret <8 x float> %shuffle
     21 }
     22 
     23 define <8 x float> @shuffle_v8f32_00000010(<8 x float> %a, <8 x float> %b) {
     24 ; AVX1-LABEL: shuffle_v8f32_00000010:
     25 ; AVX1:       # %bb.0:
     26 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     27 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
     28 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     29 ; AVX1-NEXT:    retq
     30 ;
     31 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000010:
     32 ; AVX2OR512VL:       # %bb.0:
     33 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
     34 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
     35 ; AVX2OR512VL-NEXT:    retq
     36   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     37   ret <8 x float> %shuffle
     38 }
     39 
     40 define <8 x float> @shuffle_v8f32_00000200(<8 x float> %a, <8 x float> %b) {
     41 ; AVX1-LABEL: shuffle_v8f32_00000200:
     42 ; AVX1:       # %bb.0:
     43 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     44 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
     45 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     46 ; AVX1-NEXT:    retq
     47 ;
     48 ; AVX2OR512VL-LABEL: shuffle_v8f32_00000200:
     49 ; AVX2OR512VL:       # %bb.0:
     50 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
     51 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
     52 ; AVX2OR512VL-NEXT:    retq
     53   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     54   ret <8 x float> %shuffle
     55 }
     56 
     57 define <8 x float> @shuffle_v8f32_00003000(<8 x float> %a, <8 x float> %b) {
     58 ; AVX1-LABEL: shuffle_v8f32_00003000:
     59 ; AVX1:       # %bb.0:
     60 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
     61 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
     62 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
     63 ; AVX1-NEXT:    retq
     64 ;
     65 ; AVX2OR512VL-LABEL: shuffle_v8f32_00003000:
     66 ; AVX2OR512VL:       # %bb.0:
     67 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
     68 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
     69 ; AVX2OR512VL-NEXT:    retq
     70   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     71   ret <8 x float> %shuffle
     72 }
     73 
     74 define <8 x float> @shuffle_v8f32_00040000(<8 x float> %a, <8 x float> %b) {
     75 ; AVX1-LABEL: shuffle_v8f32_00040000:
     76 ; AVX1:       # %bb.0:
     77 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
     78 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
     79 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
     80 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
     81 ; AVX1-NEXT:    retq
     82 ;
     83 ; AVX2OR512VL-LABEL: shuffle_v8f32_00040000:
     84 ; AVX2OR512VL:       # %bb.0:
     85 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
     86 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
     87 ; AVX2OR512VL-NEXT:    retq
     88   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     89   ret <8 x float> %shuffle
     90 }
     91 
     92 define <8 x float> @shuffle_v8f32_00500000(<8 x float> %a, <8 x float> %b) {
     93 ; AVX1-LABEL: shuffle_v8f32_00500000:
     94 ; AVX1:       # %bb.0:
     95 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
     96 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
     97 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
     98 ; AVX1-NEXT:    retq
     99 ;
    100 ; AVX2OR512VL-LABEL: shuffle_v8f32_00500000:
    101 ; AVX2OR512VL:       # %bb.0:
    102 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
    103 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    104 ; AVX2OR512VL-NEXT:    retq
    105   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
    106   ret <8 x float> %shuffle
    107 }
    108 
    109 define <8 x float> @shuffle_v8f32_06000000(<8 x float> %a, <8 x float> %b) {
    110 ; AVX1-LABEL: shuffle_v8f32_06000000:
    111 ; AVX1:       # %bb.0:
    112 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    113 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    114 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
    115 ; AVX1-NEXT:    retq
    116 ;
    117 ; AVX2OR512VL-LABEL: shuffle_v8f32_06000000:
    118 ; AVX2OR512VL:       # %bb.0:
    119 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
    120 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    121 ; AVX2OR512VL-NEXT:    retq
    122   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    123   ret <8 x float> %shuffle
    124 }
    125 
    126 define <8 x float> @shuffle_v8f32_70000000(<8 x float> %a, <8 x float> %b) {
    127 ; AVX1-LABEL: shuffle_v8f32_70000000:
    128 ; AVX1:       # %bb.0:
    129 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    130 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
    131 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
    132 ; AVX1-NEXT:    retq
    133 ;
    134 ; AVX2OR512VL-LABEL: shuffle_v8f32_70000000:
    135 ; AVX2OR512VL:       # %bb.0:
    136 ; AVX2OR512VL-NEXT:    movl $7, %eax
    137 ; AVX2OR512VL-NEXT:    vmovd %eax, %xmm1
    138 ; AVX2OR512VL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
    139 ; AVX2OR512VL-NEXT:    retq
    140   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    141   ret <8 x float> %shuffle
    142 }
    143 
    144 define <8 x float> @shuffle_v8f32_01014545(<8 x float> %a, <8 x float> %b) {
    145 ; ALL-LABEL: shuffle_v8f32_01014545:
    146 ; ALL:       # %bb.0:
    147 ; ALL-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
    148 ; ALL-NEXT:    retq
    149   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    150   ret <8 x float> %shuffle
    151 }
    152 
    153 define <8 x float> @shuffle_v8f32_00112233(<8 x float> %a, <8 x float> %b) {
    154 ; AVX1-LABEL: shuffle_v8f32_00112233:
    155 ; AVX1:       # %bb.0:
    156 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
    157 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
    158 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    159 ; AVX1-NEXT:    retq
    160 ;
    161 ; AVX2OR512VL-LABEL: shuffle_v8f32_00112233:
    162 ; AVX2OR512VL:       # %bb.0:
    163 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
    164 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    165 ; AVX2OR512VL-NEXT:    retq
    166   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    167   ret <8 x float> %shuffle
    168 }
    169 
    170 define <8 x float> @shuffle_v8f32_00001111(<8 x float> %a, <8 x float> %b) {
    171 ; AVX1-LABEL: shuffle_v8f32_00001111:
    172 ; AVX1:       # %bb.0:
    173 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
    174 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
    175 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    176 ; AVX1-NEXT:    retq
    177 ;
    178 ; AVX2OR512VL-LABEL: shuffle_v8f32_00001111:
    179 ; AVX2OR512VL:       # %bb.0:
    180 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
    181 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
    182 ; AVX2OR512VL-NEXT:    retq
    183   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
    184   ret <8 x float> %shuffle
    185 }
    186 
    187 define <8 x float> @shuffle_v8f32_81a3c5e7(<8 x float> %a, <8 x float> %b) {
    188 ; ALL-LABEL: shuffle_v8f32_81a3c5e7:
    189 ; ALL:       # %bb.0:
    190 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
    191 ; ALL-NEXT:    retq
    192   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    193   ret <8 x float> %shuffle
    194 }
    195 
    196 define <8 x float> @shuffle_v8f32_08080808(<8 x float> %a, <8 x float> %b) {
    197 ; AVX1-LABEL: shuffle_v8f32_08080808:
    198 ; AVX1:       # %bb.0:
    199 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
    200 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
    201 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    202 ; AVX1-NEXT:    retq
    203 ;
    204 ; AVX2OR512VL-LABEL: shuffle_v8f32_08080808:
    205 ; AVX2OR512VL:       # %bb.0:
    206 ; AVX2OR512VL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    207 ; AVX2OR512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
    208 ; AVX2OR512VL-NEXT:    retq
    209   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
    210   ret <8 x float> %shuffle
    211 }
    212 
    213 define <8 x float> @shuffle_v8f32_08084c4c(<8 x float> %a, <8 x float> %b) {
    214 ; ALL-LABEL: shuffle_v8f32_08084c4c:
    215 ; ALL:       # %bb.0:
    216 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
    217 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
    218 ; ALL-NEXT:    retq
    219   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
    220   ret <8 x float> %shuffle
    221 }
    222 
    223 define <8 x float> @shuffle_v8f32_8823cc67(<8 x float> %a, <8 x float> %b) {
    224 ; ALL-LABEL: shuffle_v8f32_8823cc67:
    225 ; ALL:       # %bb.0:
    226 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
    227 ; ALL-NEXT:    retq
    228   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
    229   ret <8 x float> %shuffle
    230 }
    231 
    232 define <8 x float> @shuffle_v8f32_9832dc76(<8 x float> %a, <8 x float> %b) {
    233 ; ALL-LABEL: shuffle_v8f32_9832dc76:
    234 ; ALL:       # %bb.0:
    235 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
    236 ; ALL-NEXT:    retq
    237   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
    238   ret <8 x float> %shuffle
    239 }
    240 
    241 define <8 x float> @shuffle_v8f32_9810dc54(<8 x float> %a, <8 x float> %b) {
    242 ; ALL-LABEL: shuffle_v8f32_9810dc54:
    243 ; ALL:       # %bb.0:
    244 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
    245 ; ALL-NEXT:    retq
    246   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
    247   ret <8 x float> %shuffle
    248 }
    249 
    250 define <8 x float> @shuffle_v8f32_08194c5d(<8 x float> %a, <8 x float> %b) {
    251 ; ALL-LABEL: shuffle_v8f32_08194c5d:
    252 ; ALL:       # %bb.0:
    253 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    254 ; ALL-NEXT:    retq
    255   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
    256   ret <8 x float> %shuffle
    257 }
    258 
    259 define <8 x float> @shuffle_v8f32_2a3b6e7f(<8 x float> %a, <8 x float> %b) {
    260 ; ALL-LABEL: shuffle_v8f32_2a3b6e7f:
    261 ; ALL:       # %bb.0:
    262 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
    263 ; ALL-NEXT:    retq
    264   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
    265   ret <8 x float> %shuffle
    266 }
    267 
    268 define <8 x float> @shuffle_v8f32_08192a3b(<8 x float> %a, <8 x float> %b) {
    269 ; AVX1OR2-LABEL: shuffle_v8f32_08192a3b:
    270 ; AVX1OR2:       # %bb.0:
    271 ; AVX1OR2-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
    272 ; AVX1OR2-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
    273 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    274 ; AVX1OR2-NEXT:    retq
    275 ;
    276 ; AVX512VL-LABEL: shuffle_v8f32_08192a3b:
    277 ; AVX512VL:       # %bb.0:
    278 ; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
    279 ; AVX512VL-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    280 ; AVX512VL-NEXT:    retq
    281   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    282   ret <8 x float> %shuffle
    283 }
    284 
    285 define <8 x float> @shuffle_v8f32_08991abb(<8 x float> %a, <8 x float> %b) {
    286 ; AVX1-LABEL: shuffle_v8f32_08991abb:
    287 ; AVX1:       # %bb.0:
    288 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
    289 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
    290 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
    291 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
    292 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    293 ; AVX1-NEXT:    retq
    294 ;
    295 ; AVX2-LABEL: shuffle_v8f32_08991abb:
    296 ; AVX2:       # %bb.0:
    297 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
    298 ; AVX2-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    299 ; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
    300 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    301 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    302 ; AVX2-NEXT:    retq
    303 ;
    304 ; AVX512VL-LABEL: shuffle_v8f32_08991abb:
    305 ; AVX512VL:       # %bb.0:
    306 ; AVX512VL-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    307 ; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
    308 ; AVX512VL-NEXT:    vpermi2ps %ymm2, %ymm1, %ymm0
    309 ; AVX512VL-NEXT:    retq
    310   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
    311   ret <8 x float> %shuffle
    312 }
    313 
    314 define <8 x float> @shuffle_v8f32_091b2d3f(<8 x float> %a, <8 x float> %b) {
    315 ; AVX1-LABEL: shuffle_v8f32_091b2d3f:
    316 ; AVX1:       # %bb.0:
    317 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    318 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
    319 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
    320 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    321 ; AVX1-NEXT:    retq
    322 ;
    323 ; AVX2-LABEL: shuffle_v8f32_091b2d3f:
    324 ; AVX2:       # %bb.0:
    325 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm2 = <0,u,1,u,2,u,3,u>
    326 ; AVX2-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    327 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    328 ; AVX2-NEXT:    retq
    329 ;
    330 ; AVX512VL-LABEL: shuffle_v8f32_091b2d3f:
    331 ; AVX512VL:       # %bb.0:
    332 ; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
    333 ; AVX512VL-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    334 ; AVX512VL-NEXT:    retq
    335   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    336   ret <8 x float> %shuffle
    337 }
    338 
    339 define <8 x float> @shuffle_v8f32_09ab1def(<8 x float> %a, <8 x float> %b) {
    340 ; AVX1-LABEL: shuffle_v8f32_09ab1def:
    341 ; AVX1:       # %bb.0:
    342 ; AVX1-NEXT:    vmovshdup {{.*#+}} xmm2 = xmm0[1,1,3,3]
    343 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
    344 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    345 ; AVX1-NEXT:    retq
    346 ;
    347 ; AVX2-LABEL: shuffle_v8f32_09ab1def:
    348 ; AVX2:       # %bb.0:
    349 ; AVX2-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
    350 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    351 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    352 ; AVX2-NEXT:    retq
    353 ;
    354 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_09ab1def:
    355 ; AVX512VL-SLOW:       # %bb.0:
    356 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,1,1,3]
    357 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,1,3]
    358 ; AVX512VL-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
    359 ; AVX512VL-SLOW-NEXT:    retq
    360 ;
    361 ; AVX512VL-FAST-LABEL: shuffle_v8f32_09ab1def:
    362 ; AVX512VL-FAST:       # %bb.0:
    363 ; AVX512VL-FAST-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
    364 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7]
    365 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm2, %ymm1, %ymm0
    366 ; AVX512VL-FAST-NEXT:    retq
    367   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
    368   ret <8 x float> %shuffle
    369 }
    370 
    371 define <8 x float> @shuffle_v8f32_00014445(<8 x float> %a, <8 x float> %b) {
    372 ; ALL-LABEL: shuffle_v8f32_00014445:
    373 ; ALL:       # %bb.0:
    374 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
    375 ; ALL-NEXT:    retq
    376   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
    377   ret <8 x float> %shuffle
    378 }
    379 
    380 define <8 x float> @shuffle_v8f32_00204464(<8 x float> %a, <8 x float> %b) {
    381 ; ALL-LABEL: shuffle_v8f32_00204464:
    382 ; ALL:       # %bb.0:
    383 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
    384 ; ALL-NEXT:    retq
    385   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
    386   ret <8 x float> %shuffle
    387 }
    388 
    389 define <8 x float> @shuffle_v8f32_03004744(<8 x float> %a, <8 x float> %b) {
    390 ; ALL-LABEL: shuffle_v8f32_03004744:
    391 ; ALL:       # %bb.0:
    392 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
    393 ; ALL-NEXT:    retq
    394   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
    395   ret <8 x float> %shuffle
    396 }
    397 
    398 define <8 x float> @shuffle_v8f32_10005444(<8 x float> %a, <8 x float> %b) {
    399 ; ALL-LABEL: shuffle_v8f32_10005444:
    400 ; ALL:       # %bb.0:
    401 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
    402 ; ALL-NEXT:    retq
    403   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
    404   ret <8 x float> %shuffle
    405 }
    406 
    407 define <8 x float> @shuffle_v8f32_22006644(<8 x float> %a, <8 x float> %b) {
    408 ; ALL-LABEL: shuffle_v8f32_22006644:
    409 ; ALL:       # %bb.0:
    410 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
    411 ; ALL-NEXT:    retq
    412   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
    413   ret <8 x float> %shuffle
    414 }
    415 
    416 define <8 x float> @shuffle_v8f32_33307774(<8 x float> %a, <8 x float> %b) {
    417 ; ALL-LABEL: shuffle_v8f32_33307774:
    418 ; ALL:       # %bb.0:
    419 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
    420 ; ALL-NEXT:    retq
    421   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
    422   ret <8 x float> %shuffle
    423 }
    424 
    425 define <8 x float> @shuffle_v8f32_32107654(<8 x float> %a, <8 x float> %b) {
    426 ; ALL-LABEL: shuffle_v8f32_32107654:
    427 ; ALL:       # %bb.0:
    428 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    429 ; ALL-NEXT:    retq
    430   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    431   ret <8 x float> %shuffle
    432 }
    433 
    434 define <8 x float> @shuffle_v8f32_00234467(<8 x float> %a, <8 x float> %b) {
    435 ; ALL-LABEL: shuffle_v8f32_00234467:
    436 ; ALL:       # %bb.0:
    437 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
    438 ; ALL-NEXT:    retq
    439   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
    440   ret <8 x float> %shuffle
    441 }
    442 
    443 define <8 x float> @shuffle_v8f32_00224466(<8 x float> %a, <8 x float> %b) {
    444 ; ALL-LABEL: shuffle_v8f32_00224466:
    445 ; ALL:       # %bb.0:
    446 ; ALL-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
    447 ; ALL-NEXT:    retq
    448   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    449   ret <8 x float> %shuffle
    450 }
    451 
    452 define <8 x float> @shuffle_v8f32_10325476(<8 x float> %a, <8 x float> %b) {
    453 ; ALL-LABEL: shuffle_v8f32_10325476:
    454 ; ALL:       # %bb.0:
    455 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
    456 ; ALL-NEXT:    retq
    457   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    458   ret <8 x float> %shuffle
    459 }
    460 
    461 define <8 x float> @shuffle_v8f32_11335577(<8 x float> %a, <8 x float> %b) {
    462 ; ALL-LABEL: shuffle_v8f32_11335577:
    463 ; ALL:       # %bb.0:
    464 ; ALL-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
    465 ; ALL-NEXT:    retq
    466   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    467   ret <8 x float> %shuffle
    468 }
    469 
    470 define <8 x float> @shuffle_v8f32_10235467(<8 x float> %a, <8 x float> %b) {
    471 ; ALL-LABEL: shuffle_v8f32_10235467:
    472 ; ALL:       # %bb.0:
    473 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
    474 ; ALL-NEXT:    retq
    475   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    476   ret <8 x float> %shuffle
    477 }
    478 
    479 define <8 x float> @shuffle_v8f32_10225466(<8 x float> %a, <8 x float> %b) {
    480 ; ALL-LABEL: shuffle_v8f32_10225466:
    481 ; ALL:       # %bb.0:
    482 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
    483 ; ALL-NEXT:    retq
    484   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
    485   ret <8 x float> %shuffle
    486 }
    487 
    488 define <8 x float> @shuffle_v8f32_00015444(<8 x float> %a, <8 x float> %b) {
    489 ; ALL-LABEL: shuffle_v8f32_00015444:
    490 ; ALL:       # %bb.0:
    491 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
    492 ; ALL-NEXT:    retq
    493   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
    494   ret <8 x float> %shuffle
    495 }
    496 
    497 define <8 x float> @shuffle_v8f32_00204644(<8 x float> %a, <8 x float> %b) {
    498 ; ALL-LABEL: shuffle_v8f32_00204644:
    499 ; ALL:       # %bb.0:
    500 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
    501 ; ALL-NEXT:    retq
    502   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
    503   ret <8 x float> %shuffle
    504 }
    505 
    506 define <8 x float> @shuffle_v8f32_03004474(<8 x float> %a, <8 x float> %b) {
    507 ; ALL-LABEL: shuffle_v8f32_03004474:
    508 ; ALL:       # %bb.0:
    509 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
    510 ; ALL-NEXT:    retq
    511   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
    512   ret <8 x float> %shuffle
    513 }
    514 
    515 define <8 x float> @shuffle_v8f32_10004444(<8 x float> %a, <8 x float> %b) {
    516 ; ALL-LABEL: shuffle_v8f32_10004444:
    517 ; ALL:       # %bb.0:
    518 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
    519 ; ALL-NEXT:    retq
    520   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    521   ret <8 x float> %shuffle
    522 }
    523 
    524 define <8 x float> @shuffle_v8f32_22006446(<8 x float> %a, <8 x float> %b) {
    525 ; ALL-LABEL: shuffle_v8f32_22006446:
    526 ; ALL:       # %bb.0:
    527 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
    528 ; ALL-NEXT:    retq
    529   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
    530   ret <8 x float> %shuffle
    531 }
    532 
    533 define <8 x float> @shuffle_v8f32_33307474(<8 x float> %a, <8 x float> %b) {
    534 ; ALL-LABEL: shuffle_v8f32_33307474:
    535 ; ALL:       # %bb.0:
    536 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
    537 ; ALL-NEXT:    retq
    538   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
    539   ret <8 x float> %shuffle
    540 }
    541 
    542 define <8 x float> @shuffle_v8f32_32104567(<8 x float> %a, <8 x float> %b) {
    543 ; ALL-LABEL: shuffle_v8f32_32104567:
    544 ; ALL:       # %bb.0:
    545 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
    546 ; ALL-NEXT:    retq
    547   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
    548   ret <8 x float> %shuffle
    549 }
    550 
    551 define <8 x float> @shuffle_v8f32_00236744(<8 x float> %a, <8 x float> %b) {
    552 ; ALL-LABEL: shuffle_v8f32_00236744:
    553 ; ALL:       # %bb.0:
    554 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
    555 ; ALL-NEXT:    retq
    556   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
    557   ret <8 x float> %shuffle
    558 }
    559 
    560 define <8 x float> @shuffle_v8f32_00226644(<8 x float> %a, <8 x float> %b) {
    561 ; ALL-LABEL: shuffle_v8f32_00226644:
    562 ; ALL:       # %bb.0:
    563 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
    564 ; ALL-NEXT:    retq
    565   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
    566   ret <8 x float> %shuffle
    567 }
    568 
    569 define <8 x float> @shuffle_v8f32_10324567(<8 x float> %a, <8 x float> %b) {
    570 ; ALL-LABEL: shuffle_v8f32_10324567:
    571 ; ALL:       # %bb.0:
    572 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
    573 ; ALL-NEXT:    retq
    574   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
    575   ret <8 x float> %shuffle
    576 }
    577 
    578 define <8 x float> @shuffle_v8f32_11334567(<8 x float> %a, <8 x float> %b) {
    579 ; ALL-LABEL: shuffle_v8f32_11334567:
    580 ; ALL:       # %bb.0:
    581 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
    582 ; ALL-NEXT:    retq
    583   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
    584   ret <8 x float> %shuffle
    585 }
    586 
    587 define <8 x float> @shuffle_v8f32_01235467(<8 x float> %a, <8 x float> %b) {
    588 ; ALL-LABEL: shuffle_v8f32_01235467:
    589 ; ALL:       # %bb.0:
    590 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
    591 ; ALL-NEXT:    retq
    592   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    593   ret <8 x float> %shuffle
    594 }
    595 
    596 define <8 x float> @shuffle_v8f32_01235466(<8 x float> %a, <8 x float> %b) {
    597 ; ALL-LABEL: shuffle_v8f32_01235466:
    598 ; ALL:       # %bb.0:
    599 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
    600 ; ALL-NEXT:    retq
    601   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
    602   ret <8 x float> %shuffle
    603 }
    604 
    605 define <8 x float> @shuffle_v8f32_002u6u44(<8 x float> %a, <8 x float> %b) {
    606 ; ALL-LABEL: shuffle_v8f32_002u6u44:
    607 ; ALL:       # %bb.0:
    608 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
    609 ; ALL-NEXT:    retq
    610   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
    611   ret <8 x float> %shuffle
    612 }
    613 
    614 define <8 x float> @shuffle_v8f32_00uu66uu(<8 x float> %a, <8 x float> %b) {
    615 ; ALL-LABEL: shuffle_v8f32_00uu66uu:
    616 ; ALL:       # %bb.0:
    617 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
    618 ; ALL-NEXT:    retq
    619   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
    620   ret <8 x float> %shuffle
    621 }
    622 
    623 define <8 x float> @shuffle_v8f32_103245uu(<8 x float> %a, <8 x float> %b) {
    624 ; ALL-LABEL: shuffle_v8f32_103245uu:
    625 ; ALL:       # %bb.0:
    626 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
    627 ; ALL-NEXT:    retq
    628   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
    629   ret <8 x float> %shuffle
    630 }
    631 
    632 define <8 x float> @shuffle_v8f32_1133uu67(<8 x float> %a, <8 x float> %b) {
    633 ; ALL-LABEL: shuffle_v8f32_1133uu67:
    634 ; ALL:       # %bb.0:
    635 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
    636 ; ALL-NEXT:    retq
    637   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
    638   ret <8 x float> %shuffle
    639 }
    640 
    641 define <8 x float> @shuffle_v8f32_0uu354uu(<8 x float> %a, <8 x float> %b) {
    642 ; ALL-LABEL: shuffle_v8f32_0uu354uu:
    643 ; ALL:       # %bb.0:
    644 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
    645 ; ALL-NEXT:    retq
    646   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
    647   ret <8 x float> %shuffle
    648 }
    649 
    650 define <8 x float> @shuffle_v8f32_uuu3uu66(<8 x float> %a, <8 x float> %b) {
    651 ; ALL-LABEL: shuffle_v8f32_uuu3uu66:
    652 ; ALL:       # %bb.0:
    653 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
    654 ; ALL-NEXT:    retq
    655   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
    656   ret <8 x float> %shuffle
    657 }
    658 
    659 define <8 x float> @shuffle_v8f32_c348cda0(<8 x float> %a, <8 x float> %b) {
    660 ; AVX1-LABEL: shuffle_v8f32_c348cda0:
    661 ; AVX1:       # %bb.0:
    662 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    663 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,3],ymm2[0,0],ymm0[4,7],ymm2[4,4]
    664 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
    665 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,1,2,0,4,5,6,4]
    666 ; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm2[0,1],ymm1[2,3,4,5],ymm2[6,7]
    667 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    668 ; AVX1-NEXT:    retq
    669 ;
    670 ; AVX2-SLOW-LABEL: shuffle_v8f32_c348cda0:
    671 ; AVX2-SLOW:       # %bb.0:
    672 ; AVX2-SLOW-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
    673 ; AVX2-SLOW-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    674 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,2,0,4,7,6,4]
    675 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,2,1]
    676 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    677 ; AVX2-SLOW-NEXT:    retq
    678 ;
    679 ; AVX2-FAST-LABEL: shuffle_v8f32_c348cda0:
    680 ; AVX2-FAST:       # %bb.0:
    681 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [0,3,4,7,4,7,2,0]
    682 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    683 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = <4,u,u,0,4,5,2,u>
    684 ; AVX2-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    685 ; AVX2-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2],ymm1[3,4,5,6],ymm0[7]
    686 ; AVX2-FAST-NEXT:    retq
    687 ;
    688 ; AVX512VL-LABEL: shuffle_v8f32_c348cda0:
    689 ; AVX512VL:       # %bb.0:
    690 ; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = [4,11,12,0,4,5,2,8]
    691 ; AVX512VL-NEXT:    vpermi2ps %ymm0, %ymm1, %ymm2
    692 ; AVX512VL-NEXT:    vmovaps %ymm2, %ymm0
    693 ; AVX512VL-NEXT:    retq
    694   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
    695   ret <8 x float> %shuffle
    696 }
    697 
    698 define <8 x float> @shuffle_v8f32_f511235a(<8 x float> %a, <8 x float> %b) {
    699 ; AVX1-LABEL: shuffle_v8f32_f511235a:
    700 ; AVX1:       # %bb.0:
    701 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,1,2,2,7,5,6,6]
    702 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm1[2,3,0,1]
    703 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    704 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,1,4,5,5,5]
    705 ; AVX1-NEXT:    vshufpd {{.*#+}} ymm0 = ymm2[0],ymm0[1],ymm2[3],ymm0[3]
    706 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    707 ; AVX1-NEXT:    retq
    708 ;
    709 ; AVX2-SLOW-LABEL: shuffle_v8f32_f511235a:
    710 ; AVX2-SLOW:       # %bb.0:
    711 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[3,2,2,3,7,6,6,7]
    712 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,1,2,0]
    713 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,2,3,5,5,6,7]
    714 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,0,1,2]
    715 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    716 ; AVX2-SLOW-NEXT:    retq
    717 ;
    718 ; AVX2-FAST-LABEL: shuffle_v8f32_f511235a:
    719 ; AVX2-FAST:       # %bb.0:
    720 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [7,6,2,3,7,6,3,2]
    721 ; AVX2-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
    722 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [5,5,1,1,2,3,5,5]
    723 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm2, %ymm0
    724 ; AVX2-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6],ymm1[7]
    725 ; AVX2-FAST-NEXT:    retq
    726 ;
    727 ; AVX512VL-LABEL: shuffle_v8f32_f511235a:
    728 ; AVX512VL:       # %bb.0:
    729 ; AVX512VL-NEXT:    vmovaps {{.*#+}} ymm2 = [15,5,1,1,2,3,5,10]
    730 ; AVX512VL-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    731 ; AVX512VL-NEXT:    retq
    732   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
    733   ret <8 x float> %shuffle
    734 }
    735 
    736 define <8 x float> @shuffle_v8f32_32103210(<8 x float> %a, <8 x float> %b) {
    737 ; AVX1-LABEL: shuffle_v8f32_32103210:
    738 ; AVX1:       # %bb.0:
    739 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    740 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
    741 ; AVX1-NEXT:    retq
    742 ;
    743 ; AVX2OR512VL-LABEL: shuffle_v8f32_32103210:
    744 ; AVX2OR512VL:       # %bb.0:
    745 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
    746 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
    747 ; AVX2OR512VL-NEXT:    retq
    748   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
    749   ret <8 x float> %shuffle
    750 }
    751 
    752 define <8 x float> @shuffle_v8f32_76547654(<8 x float> %a, <8 x float> %b) {
    753 ; AVX1-LABEL: shuffle_v8f32_76547654:
    754 ; AVX1:       # %bb.0:
    755 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    756 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
    757 ; AVX1-NEXT:    retq
    758 ;
    759 ; AVX2-SLOW-LABEL: shuffle_v8f32_76547654:
    760 ; AVX2-SLOW:       # %bb.0:
    761 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    762 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
    763 ; AVX2-SLOW-NEXT:    retq
    764 ;
    765 ; AVX2-FAST-LABEL: shuffle_v8f32_76547654:
    766 ; AVX2-FAST:       # %bb.0:
    767 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
    768 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    769 ; AVX2-FAST-NEXT:    retq
    770 ;
    771 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76547654:
    772 ; AVX512VL-SLOW:       # %bb.0:
    773 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    774 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
    775 ; AVX512VL-SLOW-NEXT:    retq
    776 ;
    777 ; AVX512VL-FAST-LABEL: shuffle_v8f32_76547654:
    778 ; AVX512VL-FAST:       # %bb.0:
    779 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
    780 ; AVX512VL-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    781 ; AVX512VL-FAST-NEXT:    retq
    782   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
    783   ret <8 x float> %shuffle
    784 }
    785 
    786 define <8 x float> @shuffle_v8f32_76543210(<8 x float> %a, <8 x float> %b) {
    787 ; AVX1-LABEL: shuffle_v8f32_76543210:
    788 ; AVX1:       # %bb.0:
    789 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    790 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
    791 ; AVX1-NEXT:    retq
    792 ;
    793 ; AVX2-SLOW-LABEL: shuffle_v8f32_76543210:
    794 ; AVX2-SLOW:       # %bb.0:
    795 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    796 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
    797 ; AVX2-SLOW-NEXT:    retq
    798 ;
    799 ; AVX2-FAST-LABEL: shuffle_v8f32_76543210:
    800 ; AVX2-FAST:       # %bb.0:
    801 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
    802 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    803 ; AVX2-FAST-NEXT:    retq
    804 ;
    805 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_76543210:
    806 ; AVX512VL-SLOW:       # %bb.0:
    807 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    808 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
    809 ; AVX512VL-SLOW-NEXT:    retq
    810 ;
    811 ; AVX512VL-FAST-LABEL: shuffle_v8f32_76543210:
    812 ; AVX512VL-FAST:       # %bb.0:
    813 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
    814 ; AVX512VL-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
    815 ; AVX512VL-FAST-NEXT:    retq
    816   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
    817   ret <8 x float> %shuffle
    818 }
    819 
    820 define <8 x float> @shuffle_v8f32_3210ba98(<8 x float> %a, <8 x float> %b) {
    821 ; ALL-LABEL: shuffle_v8f32_3210ba98:
    822 ; ALL:       # %bb.0:
    823 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    824 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    825 ; ALL-NEXT:    retq
    826   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
    827   ret <8 x float> %shuffle
    828 }
    829 
    830 define <8 x float> @shuffle_v8f32_3210fedc(<8 x float> %a, <8 x float> %b) {
    831 ; ALL-LABEL: shuffle_v8f32_3210fedc:
    832 ; ALL:       # %bb.0:
    833 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
    834 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    835 ; ALL-NEXT:    retq
    836   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
    837   ret <8 x float> %shuffle
    838 }
    839 
    840 define <8 x float> @shuffle_v8f32_7654fedc(<8 x float> %a, <8 x float> %b) {
    841 ; AVX1OR2-LABEL: shuffle_v8f32_7654fedc:
    842 ; AVX1OR2:       # %bb.0:
    843 ; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    844 ; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    845 ; AVX1OR2-NEXT:    retq
    846 ;
    847 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_7654fedc:
    848 ; AVX512VL-SLOW:       # %bb.0:
    849 ; AVX512VL-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
    850 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    851 ; AVX512VL-SLOW-NEXT:    retq
    852 ;
    853 ; AVX512VL-FAST-LABEL: shuffle_v8f32_7654fedc:
    854 ; AVX512VL-FAST:       # %bb.0:
    855 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
    856 ; AVX512VL-FAST-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    857 ; AVX512VL-FAST-NEXT:    retq
    858   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
    859   ret <8 x float> %shuffle
    860 }
    861 
    862 define <8 x float> @shuffle_v8f32_fedc7654(<8 x float> %a, <8 x float> %b) {
    863 ; AVX1OR2-LABEL: shuffle_v8f32_fedc7654:
    864 ; AVX1OR2:       # %bb.0:
    865 ; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
    866 ; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    867 ; AVX1OR2-NEXT:    retq
    868 ;
    869 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_fedc7654:
    870 ; AVX512VL-SLOW:       # %bb.0:
    871 ; AVX512VL-SLOW-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
    872 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    873 ; AVX512VL-SLOW-NEXT:    retq
    874 ;
    875 ; AVX512VL-FAST-LABEL: shuffle_v8f32_fedc7654:
    876 ; AVX512VL-FAST:       # %bb.0:
    877 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
    878 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm0, %ymm1, %ymm2
    879 ; AVX512VL-FAST-NEXT:    vmovaps %ymm2, %ymm0
    880 ; AVX512VL-FAST-NEXT:    retq
    881   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
    882   ret <8 x float> %shuffle
    883 }
    884 
    885 define <8 x float> @PR21138(<8 x float> %truc, <8 x float> %tchose) {
    886 ; AVX1-LABEL: PR21138:
    887 ; AVX1:       # %bb.0:
    888 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
    889 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
    890 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
    891 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
    892 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
    893 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
    894 ; AVX1-NEXT:    retq
    895 ;
    896 ; AVX2-LABEL: PR21138:
    897 ; AVX2:       # %bb.0:
    898 ; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
    899 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
    900 ; AVX2-NEXT:    retq
    901 ;
    902 ; AVX512VL-SLOW-LABEL: PR21138:
    903 ; AVX512VL-SLOW:       # %bb.0:
    904 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
    905 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
    906 ; AVX512VL-SLOW-NEXT:    retq
    907 ;
    908 ; AVX512VL-FAST-LABEL: PR21138:
    909 ; AVX512VL-FAST:       # %bb.0:
    910 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [1,3,5,7,9,11,13,15]
    911 ; AVX512VL-FAST-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    912 ; AVX512VL-FAST-NEXT:    retq
    913   %shuffle = shufflevector <8 x float> %truc, <8 x float> %tchose, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
    914   ret <8 x float> %shuffle
    915 }
    916 
    917 define <8 x float> @shuffle_v8f32_ba987654(<8 x float> %a, <8 x float> %b) {
    918 ; ALL-LABEL: shuffle_v8f32_ba987654:
    919 ; ALL:       # %bb.0:
    920 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
    921 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    922 ; ALL-NEXT:    retq
    923   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
    924   ret <8 x float> %shuffle
    925 }
    926 
    927 define <8 x float> @shuffle_v8f32_ba983210(<8 x float> %a, <8 x float> %b) {
    928 ; ALL-LABEL: shuffle_v8f32_ba983210:
    929 ; ALL:       # %bb.0:
    930 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    931 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
    932 ; ALL-NEXT:    retq
    933   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 3, i32 2, i32 1, i32 0>
    934   ret <8 x float> %shuffle
    935 }
    936 
    937 define <8 x float> @shuffle_v8f32_80u1c4u5(<8 x float> %a, <8 x float> %b) {
    938 ; ALL-LABEL: shuffle_v8f32_80u1c4u5:
    939 ; ALL:       # %bb.0:
    940 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
    941 ; ALL-NEXT:    retq
    942   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 5>
    943   ret <8 x float> %shuffle
    944 }
    945 
    946 define <8 x float> @shuffle_v8f32_a2u3e6f7(<8 x float> %a, <8 x float> %b) {
    947 ; ALL-LABEL: shuffle_v8f32_a2u3e6f7:
    948 ; ALL:       # %bb.0:
    949 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[6],ymm0[6],ymm1[7],ymm0[7]
    950 ; ALL-NEXT:    retq
    951   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 10, i32 2, i32 undef, i32 3, i32 14, i32 6, i32 15, i32 7>
    952   ret <8 x float> %shuffle
    953 }
    954 
    955 define <8 x float> @shuffle_v8f32_084c195d(<8 x float> %a, <8 x float> %b) {
    956 ; AVX1-LABEL: shuffle_v8f32_084c195d:
    957 ; AVX1:       # %bb.0:
    958 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
    959 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm2 = ymm2[0,1,2,0,4,5,6,4]
    960 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,1,4,4,6,5]
    961 ; AVX1-NEXT:    vblendps {{.*#+}} ymm1 = ymm1[0,1],ymm2[2,3,4,5],ymm1[6,7]
    962 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm0[2,3,0,1]
    963 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm2 = ymm2[1,1,0,3,5,5,4,7]
    964 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,1,3,4,5,5,7]
    965 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm2[2,3,4,5],ymm0[6,7]
    966 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
    967 ; AVX1-NEXT:    retq
    968 ;
    969 ; AVX2-LABEL: shuffle_v8f32_084c195d:
    970 ; AVX2:       # %bb.0:
    971 ; AVX2-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    972 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
    973 ; AVX2-NEXT:    retq
    974 ;
    975 ; AVX512VL-SLOW-LABEL: shuffle_v8f32_084c195d:
    976 ; AVX512VL-SLOW:       # %bb.0:
    977 ; AVX512VL-SLOW-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
    978 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
    979 ; AVX512VL-SLOW-NEXT:    retq
    980 ;
    981 ; AVX512VL-FAST-LABEL: shuffle_v8f32_084c195d:
    982 ; AVX512VL-FAST:       # %bb.0:
    983 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [0,8,4,12,1,9,5,13]
    984 ; AVX512VL-FAST-NEXT:    vpermt2ps %ymm1, %ymm2, %ymm0
    985 ; AVX512VL-FAST-NEXT:    retq
    986   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 8, i32 4, i32 12, i32 1, i32 9, i32 5, i32 13>
    987   ret <8 x float> %shuffle
    988 }
    989 
    990 define <8 x float> @shuffle_v8f32_01452367d(<8 x float> %a) {
    991 ; AVX1-LABEL: shuffle_v8f32_01452367d:
    992 ; AVX1:       # %bb.0:
    993 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
    994 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm1 = ymm1[0,0,3,2]
    995 ; AVX1-NEXT:    vblendpd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2],ymm0[3]
    996 ; AVX1-NEXT:    retq
    997 ;
    998 ; AVX2OR512VL-LABEL: shuffle_v8f32_01452367d:
    999 ; AVX2OR512VL:       # %bb.0:
   1000 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   1001 ; AVX2OR512VL-NEXT:    retq
   1002   %shuffle = shufflevector <8 x float> %a, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 2, i32 3, i32 6, i32 7>
   1003   ret <8 x float> %shuffle
   1004 }
   1005 
   1006 define <8 x float> @shuffle_v8f32_uuuu1111(<8 x float> %a, <8 x float> %b) {
   1007 ; ALL-LABEL: shuffle_v8f32_uuuu1111:
   1008 ; ALL:       # %bb.0:
   1009 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1010 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1011 ; ALL-NEXT:    retq
   1012   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
   1013   ret <8 x float> %shuffle
   1014 }
   1015 
   1016 define <8 x float> @shuffle_v8f32_44444444(<8 x float> %a, <8 x float> %b) {
   1017 ; AVX1-LABEL: shuffle_v8f32_44444444:
   1018 ; AVX1:       # %bb.0:
   1019 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   1020 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1021 ; AVX1-NEXT:    retq
   1022 ;
   1023 ; AVX2OR512VL-LABEL: shuffle_v8f32_44444444:
   1024 ; AVX2OR512VL:       # %bb.0:
   1025 ; AVX2OR512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1026 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
   1027 ; AVX2OR512VL-NEXT:    retq
   1028   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   1029   ret <8 x float> %shuffle
   1030 }
   1031 
   1032 define <8 x float> @shuffle_v8f32_1188uuuu(<8 x float> %a, <8 x float> %b) {
   1033 ; ALL-LABEL: shuffle_v8f32_1188uuuu:
   1034 ; ALL:       # %bb.0:
   1035 ; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
   1036 ; ALL-NEXT:    retq
   1037   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 8, i32 8, i32 undef, i32 undef, i32 undef, i32 undef>
   1038   ret <8 x float> %shuffle
   1039 }
   1040 
   1041 define <8 x float> @shuffle_v8f32_uuuu3210(<8 x float> %a, <8 x float> %b) {
   1042 ; ALL-LABEL: shuffle_v8f32_uuuu3210:
   1043 ; ALL:       # %bb.0:
   1044 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1045 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1046 ; ALL-NEXT:    retq
   1047   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 3, i32 2, i32 1, i32 0>
   1048   ret <8 x float> %shuffle
   1049 }
   1050 
   1051 define <8 x float> @shuffle_v8f32_uuuu1188(<8 x float> %a, <8 x float> %b) {
   1052 ; ALL-LABEL: shuffle_v8f32_uuuu1188:
   1053 ; ALL:       # %bb.0:
   1054 ; ALL-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,1],xmm1[0,0]
   1055 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1056 ; ALL-NEXT:    retq
   1057   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 8, i32 8>
   1058   ret <8 x float> %shuffle
   1059 }
   1060 
   1061 define <8 x float> @shuffle_v8f32_1111uuuu(<8 x float> %a, <8 x float> %b) {
   1062 ; ALL-LABEL: shuffle_v8f32_1111uuuu:
   1063 ; ALL:       # %bb.0:
   1064 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1065 ; ALL-NEXT:    retq
   1066   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 1, i32 1, i32 1, i32 undef, i32 undef, i32 undef, i32 undef>
   1067   ret <8 x float> %shuffle
   1068 }
   1069 
   1070 define <8 x float> @shuffle_v8f32_5555uuuu(<8 x float> %a, <8 x float> %b) {
   1071 ; ALL-LABEL: shuffle_v8f32_5555uuuu:
   1072 ; ALL:       # %bb.0:
   1073 ; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
   1074 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1075 ; ALL-NEXT:    retq
   1076   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   1077   ret <8 x float> %shuffle
   1078 }
   1079 
   1080 define <8 x i32> @shuffle_v8i32_00000000(<8 x i32> %a, <8 x i32> %b) {
   1081 ; AVX1-LABEL: shuffle_v8i32_00000000:
   1082 ; AVX1:       # %bb.0:
   1083 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   1084 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1085 ; AVX1-NEXT:    retq
   1086 ;
   1087 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000000:
   1088 ; AVX2OR512VL:       # %bb.0:
   1089 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
   1090 ; AVX2OR512VL-NEXT:    retq
   1091   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1092   ret <8 x i32> %shuffle
   1093 }
   1094 
   1095 define <8 x i32> @shuffle_v8i32_00000010(<8 x i32> %a, <8 x i32> %b) {
   1096 ; AVX1-LABEL: shuffle_v8i32_00000010:
   1097 ; AVX1:       # %bb.0:
   1098 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1099 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
   1100 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1101 ; AVX1-NEXT:    retq
   1102 ;
   1103 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000010:
   1104 ; AVX2OR512VL:       # %bb.0:
   1105 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,0]
   1106 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,0,1]
   1107 ; AVX2OR512VL-NEXT:    retq
   1108   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   1109   ret <8 x i32> %shuffle
   1110 }
   1111 
   1112 define <8 x i32> @shuffle_v8i32_00000200(<8 x i32> %a, <8 x i32> %b) {
   1113 ; AVX1-LABEL: shuffle_v8i32_00000200:
   1114 ; AVX1:       # %bb.0:
   1115 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1116 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,0,0]
   1117 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1118 ; AVX1-NEXT:    retq
   1119 ;
   1120 ; AVX2OR512VL-LABEL: shuffle_v8i32_00000200:
   1121 ; AVX2OR512VL:       # %bb.0:
   1122 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,2]
   1123 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
   1124 ; AVX2OR512VL-NEXT:    retq
   1125   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   1126   ret <8 x i32> %shuffle
   1127 }
   1128 
   1129 define <8 x i32> @shuffle_v8i32_00003000(<8 x i32> %a, <8 x i32> %b) {
   1130 ; AVX1-LABEL: shuffle_v8i32_00003000:
   1131 ; AVX1:       # %bb.0:
   1132 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1133 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,0,0,0]
   1134 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1135 ; AVX1-NEXT:    retq
   1136 ;
   1137 ; AVX2OR512VL-LABEL: shuffle_v8i32_00003000:
   1138 ; AVX2OR512VL:       # %bb.0:
   1139 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,3,0]
   1140 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,0]
   1141 ; AVX2OR512VL-NEXT:    retq
   1142   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   1143   ret <8 x i32> %shuffle
   1144 }
   1145 
   1146 define <8 x i32> @shuffle_v8i32_00040000(<8 x i32> %a, <8 x i32> %b) {
   1147 ; AVX1-LABEL: shuffle_v8i32_00040000:
   1148 ; AVX1:       # %bb.0:
   1149 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,3]
   1150 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1151 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   1152 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2],ymm0[3,4,5,6,7]
   1153 ; AVX1-NEXT:    retq
   1154 ;
   1155 ; AVX2OR512VL-LABEL: shuffle_v8i32_00040000:
   1156 ; AVX2OR512VL:       # %bb.0:
   1157 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,4,0,0,0,0]
   1158 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1159 ; AVX2OR512VL-NEXT:    retq
   1160   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   1161   ret <8 x i32> %shuffle
   1162 }
   1163 
   1164 define <8 x i32> @shuffle_v8i32_00500000(<8 x i32> %a, <8 x i32> %b) {
   1165 ; AVX1-LABEL: shuffle_v8i32_00500000:
   1166 ; AVX1:       # %bb.0:
   1167 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   1168 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4,5,6,7]
   1169 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,1,0,4,4,4,4]
   1170 ; AVX1-NEXT:    retq
   1171 ;
   1172 ; AVX2OR512VL-LABEL: shuffle_v8i32_00500000:
   1173 ; AVX2OR512VL:       # %bb.0:
   1174 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,5,0,0,0,0,0]
   1175 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1176 ; AVX2OR512VL-NEXT:    retq
   1177   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   1178   ret <8 x i32> %shuffle
   1179 }
   1180 
   1181 define <8 x i32> @shuffle_v8i32_06000000(<8 x i32> %a, <8 x i32> %b) {
   1182 ; AVX1-LABEL: shuffle_v8i32_06000000:
   1183 ; AVX1:       # %bb.0:
   1184 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   1185 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
   1186 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,0,0,4,4,4,4]
   1187 ; AVX1-NEXT:    retq
   1188 ;
   1189 ; AVX2OR512VL-LABEL: shuffle_v8i32_06000000:
   1190 ; AVX2OR512VL:       # %bb.0:
   1191 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,6,0,0,0,0,0,0]
   1192 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1193 ; AVX2OR512VL-NEXT:    retq
   1194   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1195   ret <8 x i32> %shuffle
   1196 }
   1197 
   1198 define <8 x i32> @shuffle_v8i32_70000000(<8 x i32> %a, <8 x i32> %b) {
   1199 ; AVX1-LABEL: shuffle_v8i32_70000000:
   1200 ; AVX1:       # %bb.0:
   1201 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   1202 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1],ymm1[2,3,4,5,6,7]
   1203 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,0,4,4,4,4]
   1204 ; AVX1-NEXT:    retq
   1205 ;
   1206 ; AVX2OR512VL-LABEL: shuffle_v8i32_70000000:
   1207 ; AVX2OR512VL:       # %bb.0:
   1208 ; AVX2OR512VL-NEXT:    movl $7, %eax
   1209 ; AVX2OR512VL-NEXT:    vmovd %eax, %xmm1
   1210 ; AVX2OR512VL-NEXT:    vpermd %ymm0, %ymm1, %ymm0
   1211 ; AVX2OR512VL-NEXT:    retq
   1212   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1213   ret <8 x i32> %shuffle
   1214 }
   1215 
   1216 define <8 x i32> @shuffle_v8i32_01014545(<8 x i32> %a, <8 x i32> %b) {
   1217 ; AVX1-LABEL: shuffle_v8i32_01014545:
   1218 ; AVX1:       # %bb.0:
   1219 ; AVX1-NEXT:    vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
   1220 ; AVX1-NEXT:    retq
   1221 ;
   1222 ; AVX2OR512VL-LABEL: shuffle_v8i32_01014545:
   1223 ; AVX2OR512VL:       # %bb.0:
   1224 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1225 ; AVX2OR512VL-NEXT:    retq
   1226   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   1227   ret <8 x i32> %shuffle
   1228 }
   1229 
   1230 define <8 x i32> @shuffle_v8i32_00112233(<8 x i32> %a, <8 x i32> %b) {
   1231 ; AVX1-LABEL: shuffle_v8i32_00112233:
   1232 ; AVX1:       # %bb.0:
   1233 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,1,1]
   1234 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,3,3]
   1235 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1236 ; AVX1-NEXT:    retq
   1237 ;
   1238 ; AVX2OR512VL-LABEL: shuffle_v8i32_00112233:
   1239 ; AVX2OR512VL:       # %bb.0:
   1240 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,1,1,2,2,3,3]
   1241 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1242 ; AVX2OR512VL-NEXT:    retq
   1243   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   1244   ret <8 x i32> %shuffle
   1245 }
   1246 
   1247 define <8 x i32> @shuffle_v8i32_00001111(<8 x i32> %a, <8 x i32> %b) {
   1248 ; AVX1-LABEL: shuffle_v8i32_00001111:
   1249 ; AVX1:       # %bb.0:
   1250 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm1 = xmm0[0,0,0,0]
   1251 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   1252 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
   1253 ; AVX1-NEXT:    retq
   1254 ;
   1255 ; AVX2OR512VL-LABEL: shuffle_v8i32_00001111:
   1256 ; AVX2OR512VL:       # %bb.0:
   1257 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,1,1]
   1258 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,0,1,1]
   1259 ; AVX2OR512VL-NEXT:    retq
   1260   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   1261   ret <8 x i32> %shuffle
   1262 }
   1263 
   1264 define <8 x i32> @shuffle_v8i32_81a3c5e7(<8 x i32> %a, <8 x i32> %b) {
   1265 ; ALL-LABEL: shuffle_v8i32_81a3c5e7:
   1266 ; ALL:       # %bb.0:
   1267 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1],ymm1[2],ymm0[3],ymm1[4],ymm0[5],ymm1[6],ymm0[7]
   1268 ; ALL-NEXT:    retq
   1269   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   1270   ret <8 x i32> %shuffle
   1271 }
   1272 
   1273 define <8 x i32> @shuffle_v8i32_08080808(<8 x i32> %a, <8 x i32> %b) {
   1274 ; AVX1-LABEL: shuffle_v8i32_08080808:
   1275 ; AVX1:       # %bb.0:
   1276 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[0,0],xmm1[0,0]
   1277 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,2,1,3]
   1278 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1279 ; AVX1-NEXT:    retq
   1280 ;
   1281 ; AVX2OR512VL-LABEL: shuffle_v8i32_08080808:
   1282 ; AVX2OR512VL:       # %bb.0:
   1283 ; AVX2OR512VL-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1284 ; AVX2OR512VL-NEXT:    vbroadcastsd %xmm0, %ymm0
   1285 ; AVX2OR512VL-NEXT:    retq
   1286   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   1287   ret <8 x i32> %shuffle
   1288 }
   1289 
   1290 define <8 x i32> @shuffle_v8i32_08084c4c(<8 x i32> %a, <8 x i32> %b) {
   1291 ; AVX1-LABEL: shuffle_v8i32_08084c4c:
   1292 ; AVX1:       # %bb.0:
   1293 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[0,0],ymm0[4,4],ymm1[4,4]
   1294 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   1295 ; AVX1-NEXT:    retq
   1296 ;
   1297 ; AVX2OR512VL-LABEL: shuffle_v8i32_08084c4c:
   1298 ; AVX2OR512VL:       # %bb.0:
   1299 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,0,4,4,6,4]
   1300 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,0,1,4,5,4,5]
   1301 ; AVX2OR512VL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1302 ; AVX2OR512VL-NEXT:    retq
   1303   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   1304   ret <8 x i32> %shuffle
   1305 }
   1306 
   1307 define <8 x i32> @shuffle_v8i32_8823cc67(<8 x i32> %a, <8 x i32> %b) {
   1308 ; ALL-LABEL: shuffle_v8i32_8823cc67:
   1309 ; ALL:       # %bb.0:
   1310 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,0],ymm0[2,3],ymm1[4,4],ymm0[6,7]
   1311 ; ALL-NEXT:    retq
   1312   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   1313   ret <8 x i32> %shuffle
   1314 }
   1315 
   1316 define <8 x i32> @shuffle_v8i32_9832dc76(<8 x i32> %a, <8 x i32> %b) {
   1317 ; ALL-LABEL: shuffle_v8i32_9832dc76:
   1318 ; ALL:       # %bb.0:
   1319 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[3,2],ymm1[5,4],ymm0[7,6]
   1320 ; ALL-NEXT:    retq
   1321   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   1322   ret <8 x i32> %shuffle
   1323 }
   1324 
   1325 define <8 x i32> @shuffle_v8i32_9810dc54(<8 x i32> %a, <8 x i32> %b) {
   1326 ; ALL-LABEL: shuffle_v8i32_9810dc54:
   1327 ; ALL:       # %bb.0:
   1328 ; ALL-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,0],ymm0[1,0],ymm1[5,4],ymm0[5,4]
   1329 ; ALL-NEXT:    retq
   1330   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   1331   ret <8 x i32> %shuffle
   1332 }
   1333 
   1334 define <8 x i32> @shuffle_v8i32_08194c5d(<8 x i32> %a, <8 x i32> %b) {
   1335 ; ALL-LABEL: shuffle_v8i32_08194c5d:
   1336 ; ALL:       # %bb.0:
   1337 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
   1338 ; ALL-NEXT:    retq
   1339   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   1340   ret <8 x i32> %shuffle
   1341 }
   1342 
   1343 define <8 x i32> @shuffle_v8i32_2a3b6e7f(<8 x i32> %a, <8 x i32> %b) {
   1344 ; ALL-LABEL: shuffle_v8i32_2a3b6e7f:
   1345 ; ALL:       # %bb.0:
   1346 ; ALL-NEXT:    vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
   1347 ; ALL-NEXT:    retq
   1348   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   1349   ret <8 x i32> %shuffle
   1350 }
   1351 
   1352 define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
   1353 ; AVX1OR2-LABEL: shuffle_v8i32_08192a3b:
   1354 ; AVX1OR2:       # %bb.0:
   1355 ; AVX1OR2-NEXT:    vunpckhps {{.*#+}} xmm2 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   1356 ; AVX1OR2-NEXT:    vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
   1357 ; AVX1OR2-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1358 ; AVX1OR2-NEXT:    retq
   1359 ;
   1360 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
   1361 ; AVX512VL:       # %bb.0:
   1362 ; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1363 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
   1364 ; AVX512VL-NEXT:    vpermi2d %ymm1, %ymm2, %ymm0
   1365 ; AVX512VL-NEXT:    retq
   1366   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1367   ret <8 x i32> %shuffle
   1368 }
   1369 
   1370 define <8 x i32> @shuffle_v8i32_08991abb(<8 x i32> %a, <8 x i32> %b) {
   1371 ; AVX1-LABEL: shuffle_v8i32_08991abb:
   1372 ; AVX1:       # %bb.0:
   1373 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm0[0,0],xmm1[0,0]
   1374 ; AVX1-NEXT:    vshufps {{.*#+}} xmm2 = xmm2[0,2],xmm1[1,1]
   1375 ; AVX1-NEXT:    vblendps {{.*#+}} xmm0 = xmm0[0,1],xmm1[2,3]
   1376 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,3]
   1377 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1378 ; AVX1-NEXT:    retq
   1379 ;
   1380 ; AVX2-LABEL: shuffle_v8i32_08991abb:
   1381 ; AVX2:       # %bb.0:
   1382 ; AVX2-NEXT:    vmovdqa {{.*#+}} ymm2 = <u,0,1,1,u,2,3,3>
   1383 ; AVX2-NEXT:    vpermd %ymm1, %ymm2, %ymm1
   1384 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1385 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
   1386 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1387 ; AVX2-NEXT:    retq
   1388 ;
   1389 ; AVX512VL-LABEL: shuffle_v8i32_08991abb:
   1390 ; AVX512VL:       # %bb.0:
   1391 ; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
   1392 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm0 = [8,0,1,1,10,2,3,3]
   1393 ; AVX512VL-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0
   1394 ; AVX512VL-NEXT:    retq
   1395   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   1396   ret <8 x i32> %shuffle
   1397 }
   1398 
   1399 define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
   1400 ; AVX1-LABEL: shuffle_v8i32_091b2d3f:
   1401 ; AVX1:       # %bb.0:
   1402 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[0,1,1,3]
   1403 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,3]
   1404 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm2, %ymm0
   1405 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1406 ; AVX1-NEXT:    retq
   1407 ;
   1408 ; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
   1409 ; AVX2OR512VL:       # %bb.0:
   1410 ; AVX2OR512VL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
   1411 ; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
   1412 ; AVX2OR512VL-NEXT:    retq
   1413   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1414   ret <8 x i32> %shuffle
   1415 }
   1416 
   1417 define <8 x i32> @shuffle_v8i32_09ab1def(<8 x i32> %a, <8 x i32> %b) {
   1418 ; AVX1-LABEL: shuffle_v8i32_09ab1def:
   1419 ; AVX1:       # %bb.0:
   1420 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm2 = xmm0[1,1,3,3]
   1421 ; AVX1-NEXT:    vinsertf128 $1, %xmm2, %ymm0, %ymm0
   1422 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1423 ; AVX1-NEXT:    retq
   1424 ;
   1425 ; AVX2-LABEL: shuffle_v8i32_09ab1def:
   1426 ; AVX2:       # %bb.0:
   1427 ; AVX2-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1428 ; AVX2-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
   1429 ; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1430 ; AVX2-NEXT:    retq
   1431 ;
   1432 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_09ab1def:
   1433 ; AVX512VL-SLOW:       # %bb.0:
   1434 ; AVX512VL-SLOW-NEXT:    vpmovzxdq {{.*#+}} xmm0 = xmm0[0],zero,xmm0[1],zero
   1435 ; AVX512VL-SLOW-NEXT:    vpermq {{.*#+}} ymm0 = ymm0[0,1,1,3]
   1436 ; AVX512VL-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3],ymm0[4],ymm1[5,6,7]
   1437 ; AVX512VL-SLOW-NEXT:    retq
   1438 ;
   1439 ; AVX512VL-FAST-LABEL: shuffle_v8i32_09ab1def:
   1440 ; AVX512VL-FAST:       # %bb.0:
   1441 ; AVX512VL-FAST-NEXT:    vpmovzxdq {{.*#+}} xmm2 = xmm0[0],zero,xmm0[1],zero
   1442 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm0 = [8,1,2,3,10,5,6,7]
   1443 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm2, %ymm1, %ymm0
   1444 ; AVX512VL-FAST-NEXT:    retq
   1445   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1446   ret <8 x i32> %shuffle
   1447 }
   1448 
   1449 define <8 x i32> @shuffle_v8i32_00014445(<8 x i32> %a, <8 x i32> %b) {
   1450 ; ALL-LABEL: shuffle_v8i32_00014445:
   1451 ; ALL:       # %bb.0:
   1452 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,4,4,4,5]
   1453 ; ALL-NEXT:    retq
   1454   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   1455   ret <8 x i32> %shuffle
   1456 }
   1457 
   1458 define <8 x i32> @shuffle_v8i32_00204464(<8 x i32> %a, <8 x i32> %b) {
   1459 ; ALL-LABEL: shuffle_v8i32_00204464:
   1460 ; ALL:       # %bb.0:
   1461 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,4,6,4]
   1462 ; ALL-NEXT:    retq
   1463   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   1464   ret <8 x i32> %shuffle
   1465 }
   1466 
   1467 define <8 x i32> @shuffle_v8i32_03004744(<8 x i32> %a, <8 x i32> %b) {
   1468 ; ALL-LABEL: shuffle_v8i32_03004744:
   1469 ; ALL:       # %bb.0:
   1470 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,7,4,4]
   1471 ; ALL-NEXT:    retq
   1472   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   1473   ret <8 x i32> %shuffle
   1474 }
   1475 
   1476 define <8 x i32> @shuffle_v8i32_10005444(<8 x i32> %a, <8 x i32> %b) {
   1477 ; ALL-LABEL: shuffle_v8i32_10005444:
   1478 ; ALL:       # %bb.0:
   1479 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,5,4,4,4]
   1480 ; ALL-NEXT:    retq
   1481   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   1482   ret <8 x i32> %shuffle
   1483 }
   1484 
   1485 define <8 x i32> @shuffle_v8i32_22006644(<8 x i32> %a, <8 x i32> %b) {
   1486 ; ALL-LABEL: shuffle_v8i32_22006644:
   1487 ; ALL:       # %bb.0:
   1488 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,6,4,4]
   1489 ; ALL-NEXT:    retq
   1490   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   1491   ret <8 x i32> %shuffle
   1492 }
   1493 
   1494 define <8 x i32> @shuffle_v8i32_33307774(<8 x i32> %a, <8 x i32> %b) {
   1495 ; ALL-LABEL: shuffle_v8i32_33307774:
   1496 ; ALL:       # %bb.0:
   1497 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,7,7,4]
   1498 ; ALL-NEXT:    retq
   1499   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   1500   ret <8 x i32> %shuffle
   1501 }
   1502 
   1503 define <8 x i32> @shuffle_v8i32_32107654(<8 x i32> %a, <8 x i32> %b) {
   1504 ; ALL-LABEL: shuffle_v8i32_32107654:
   1505 ; ALL:       # %bb.0:
   1506 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1507 ; ALL-NEXT:    retq
   1508   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1509   ret <8 x i32> %shuffle
   1510 }
   1511 
   1512 define <8 x i32> @shuffle_v8i32_00234467(<8 x i32> %a, <8 x i32> %b) {
   1513 ; ALL-LABEL: shuffle_v8i32_00234467:
   1514 ; ALL:       # %bb.0:
   1515 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,4,4,6,7]
   1516 ; ALL-NEXT:    retq
   1517   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   1518   ret <8 x i32> %shuffle
   1519 }
   1520 
   1521 define <8 x i32> @shuffle_v8i32_00224466(<8 x i32> %a, <8 x i32> %b) {
   1522 ; AVX1-LABEL: shuffle_v8i32_00224466:
   1523 ; AVX1:       # %bb.0:
   1524 ; AVX1-NEXT:    vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1525 ; AVX1-NEXT:    retq
   1526 ;
   1527 ; AVX2OR512VL-LABEL: shuffle_v8i32_00224466:
   1528 ; AVX2OR512VL:       # %bb.0:
   1529 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
   1530 ; AVX2OR512VL-NEXT:    retq
   1531   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   1532   ret <8 x i32> %shuffle
   1533 }
   1534 
   1535 define <8 x i32> @shuffle_v8i32_10325476(<8 x i32> %a, <8 x i32> %b) {
   1536 ; ALL-LABEL: shuffle_v8i32_10325476:
   1537 ; ALL:       # %bb.0:
   1538 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,5,4,7,6]
   1539 ; ALL-NEXT:    retq
   1540   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1541   ret <8 x i32> %shuffle
   1542 }
   1543 
   1544 define <8 x i32> @shuffle_v8i32_11335577(<8 x i32> %a, <8 x i32> %b) {
   1545 ; AVX1-LABEL: shuffle_v8i32_11335577:
   1546 ; AVX1:       # %bb.0:
   1547 ; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1548 ; AVX1-NEXT:    retq
   1549 ;
   1550 ; AVX2OR512VL-LABEL: shuffle_v8i32_11335577:
   1551 ; AVX2OR512VL:       # %bb.0:
   1552 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   1553 ; AVX2OR512VL-NEXT:    retq
   1554   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   1555   ret <8 x i32> %shuffle
   1556 }
   1557 
   1558 define <8 x i32> @shuffle_v8i32_10235467(<8 x i32> %a, <8 x i32> %b) {
   1559 ; ALL-LABEL: shuffle_v8i32_10235467:
   1560 ; ALL:       # %bb.0:
   1561 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,3,5,4,6,7]
   1562 ; ALL-NEXT:    retq
   1563   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1564   ret <8 x i32> %shuffle
   1565 }
   1566 
   1567 define <8 x i32> @shuffle_v8i32_10225466(<8 x i32> %a, <8 x i32> %b) {
   1568 ; ALL-LABEL: shuffle_v8i32_10225466:
   1569 ; ALL:       # %bb.0:
   1570 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,2,2,5,4,6,6]
   1571 ; ALL-NEXT:    retq
   1572   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   1573   ret <8 x i32> %shuffle
   1574 }
   1575 
   1576 define <8 x i32> @shuffle_v8i32_00015444(<8 x i32> %a, <8 x i32> %b) {
   1577 ; AVX1-LABEL: shuffle_v8i32_00015444:
   1578 ; AVX1:       # %bb.0:
   1579 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,1,5,4,4,4]
   1580 ; AVX1-NEXT:    retq
   1581 ;
   1582 ; AVX2OR512VL-LABEL: shuffle_v8i32_00015444:
   1583 ; AVX2OR512VL:       # %bb.0:
   1584 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,0,1,5,4,4,4]
   1585 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1586 ; AVX2OR512VL-NEXT:    retq
   1587   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   1588   ret <8 x i32> %shuffle
   1589 }
   1590 
   1591 define <8 x i32> @shuffle_v8i32_00204644(<8 x i32> %a, <8 x i32> %b) {
   1592 ; AVX1-LABEL: shuffle_v8i32_00204644:
   1593 ; AVX1:       # %bb.0:
   1594 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,0,4,6,4,4]
   1595 ; AVX1-NEXT:    retq
   1596 ;
   1597 ; AVX2OR512VL-LABEL: shuffle_v8i32_00204644:
   1598 ; AVX2OR512VL:       # %bb.0:
   1599 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,2,0,4,6,4,4]
   1600 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1601 ; AVX2OR512VL-NEXT:    retq
   1602   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   1603   ret <8 x i32> %shuffle
   1604 }
   1605 
   1606 define <8 x i32> @shuffle_v8i32_03004474(<8 x i32> %a, <8 x i32> %b) {
   1607 ; AVX1-LABEL: shuffle_v8i32_03004474:
   1608 ; AVX1:       # %bb.0:
   1609 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,3,0,0,4,4,7,4]
   1610 ; AVX1-NEXT:    retq
   1611 ;
   1612 ; AVX2OR512VL-LABEL: shuffle_v8i32_03004474:
   1613 ; AVX2OR512VL:       # %bb.0:
   1614 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,3,0,0,4,4,7,4]
   1615 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1616 ; AVX2OR512VL-NEXT:    retq
   1617   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   1618   ret <8 x i32> %shuffle
   1619 }
   1620 
   1621 define <8 x i32> @shuffle_v8i32_10004444(<8 x i32> %a, <8 x i32> %b) {
   1622 ; AVX1-LABEL: shuffle_v8i32_10004444:
   1623 ; AVX1:       # %bb.0:
   1624 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,0,0,4,4,4,4]
   1625 ; AVX1-NEXT:    retq
   1626 ;
   1627 ; AVX2OR512VL-LABEL: shuffle_v8i32_10004444:
   1628 ; AVX2OR512VL:       # %bb.0:
   1629 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [1,0,0,0,4,4,4,4]
   1630 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1631 ; AVX2OR512VL-NEXT:    retq
   1632   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   1633   ret <8 x i32> %shuffle
   1634 }
   1635 
   1636 define <8 x i32> @shuffle_v8i32_22006446(<8 x i32> %a, <8 x i32> %b) {
   1637 ; AVX1-LABEL: shuffle_v8i32_22006446:
   1638 ; AVX1:       # %bb.0:
   1639 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,2,0,0,6,4,4,6]
   1640 ; AVX1-NEXT:    retq
   1641 ;
   1642 ; AVX2OR512VL-LABEL: shuffle_v8i32_22006446:
   1643 ; AVX2OR512VL:       # %bb.0:
   1644 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [2,2,0,0,6,4,4,6]
   1645 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1646 ; AVX2OR512VL-NEXT:    retq
   1647   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   1648   ret <8 x i32> %shuffle
   1649 }
   1650 
   1651 define <8 x i32> @shuffle_v8i32_33307474(<8 x i32> %a, <8 x i32> %b) {
   1652 ; AVX1-LABEL: shuffle_v8i32_33307474:
   1653 ; AVX1:       # %bb.0:
   1654 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,3,3,0,7,4,7,4]
   1655 ; AVX1-NEXT:    retq
   1656 ;
   1657 ; AVX2OR512VL-LABEL: shuffle_v8i32_33307474:
   1658 ; AVX2OR512VL:       # %bb.0:
   1659 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [3,3,3,0,7,4,7,4]
   1660 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1661 ; AVX2OR512VL-NEXT:    retq
   1662   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   1663   ret <8 x i32> %shuffle
   1664 }
   1665 
   1666 define <8 x i32> @shuffle_v8i32_32104567(<8 x i32> %a, <8 x i32> %b) {
   1667 ; AVX1-LABEL: shuffle_v8i32_32104567:
   1668 ; AVX1:       # %bb.0:
   1669 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,4,5,6,7]
   1670 ; AVX1-NEXT:    retq
   1671 ;
   1672 ; AVX2OR512VL-LABEL: shuffle_v8i32_32104567:
   1673 ; AVX2OR512VL:       # %bb.0:
   1674 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [3,2,1,0,4,5,6,7]
   1675 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1676 ; AVX2OR512VL-NEXT:    retq
   1677   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   1678   ret <8 x i32> %shuffle
   1679 }
   1680 
   1681 define <8 x i32> @shuffle_v8i32_00236744(<8 x i32> %a, <8 x i32> %b) {
   1682 ; AVX1-LABEL: shuffle_v8i32_00236744:
   1683 ; AVX1:       # %bb.0:
   1684 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,3,6,7,4,4]
   1685 ; AVX1-NEXT:    retq
   1686 ;
   1687 ; AVX2OR512VL-LABEL: shuffle_v8i32_00236744:
   1688 ; AVX2OR512VL:       # %bb.0:
   1689 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,2,3,6,7,4,4]
   1690 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1691 ; AVX2OR512VL-NEXT:    retq
   1692   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   1693   ret <8 x i32> %shuffle
   1694 }
   1695 
   1696 define <8 x i32> @shuffle_v8i32_00226644(<8 x i32> %a, <8 x i32> %b) {
   1697 ; AVX1-LABEL: shuffle_v8i32_00226644:
   1698 ; AVX1:       # %bb.0:
   1699 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,2,6,6,4,4]
   1700 ; AVX1-NEXT:    retq
   1701 ;
   1702 ; AVX2OR512VL-LABEL: shuffle_v8i32_00226644:
   1703 ; AVX2OR512VL:       # %bb.0:
   1704 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,0,2,2,6,6,4,4]
   1705 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1706 ; AVX2OR512VL-NEXT:    retq
   1707   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   1708   ret <8 x i32> %shuffle
   1709 }
   1710 
   1711 define <8 x i32> @shuffle_v8i32_10324567(<8 x i32> %a, <8 x i32> %b) {
   1712 ; AVX1-LABEL: shuffle_v8i32_10324567:
   1713 ; AVX1:       # %bb.0:
   1714 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,6,7]
   1715 ; AVX1-NEXT:    retq
   1716 ;
   1717 ; AVX2OR512VL-LABEL: shuffle_v8i32_10324567:
   1718 ; AVX2OR512VL:       # %bb.0:
   1719 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [1,0,3,2,4,5,6,7]
   1720 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1721 ; AVX2OR512VL-NEXT:    retq
   1722   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   1723   ret <8 x i32> %shuffle
   1724 }
   1725 
   1726 define <8 x i32> @shuffle_v8i32_11334567(<8 x i32> %a, <8 x i32> %b) {
   1727 ; AVX1-LABEL: shuffle_v8i32_11334567:
   1728 ; AVX1:       # %bb.0:
   1729 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,4,5,6,7]
   1730 ; AVX1-NEXT:    retq
   1731 ;
   1732 ; AVX2OR512VL-LABEL: shuffle_v8i32_11334567:
   1733 ; AVX2OR512VL:       # %bb.0:
   1734 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [1,1,3,3,4,5,6,7]
   1735 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1736 ; AVX2OR512VL-NEXT:    retq
   1737   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   1738   ret <8 x i32> %shuffle
   1739 }
   1740 
   1741 define <8 x i32> @shuffle_v8i32_01235467(<8 x i32> %a, <8 x i32> %b) {
   1742 ; AVX1-LABEL: shuffle_v8i32_01235467:
   1743 ; AVX1:       # %bb.0:
   1744 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,7]
   1745 ; AVX1-NEXT:    retq
   1746 ;
   1747 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235467:
   1748 ; AVX2OR512VL:       # %bb.0:
   1749 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,7]
   1750 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1751 ; AVX2OR512VL-NEXT:    retq
   1752   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1753   ret <8 x i32> %shuffle
   1754 }
   1755 
   1756 define <8 x i32> @shuffle_v8i32_01235466(<8 x i32> %a, <8 x i32> %b) {
   1757 ; AVX1-LABEL: shuffle_v8i32_01235466:
   1758 ; AVX1:       # %bb.0:
   1759 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,5,4,6,6]
   1760 ; AVX1-NEXT:    retq
   1761 ;
   1762 ; AVX2OR512VL-LABEL: shuffle_v8i32_01235466:
   1763 ; AVX2OR512VL:       # %bb.0:
   1764 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = [0,1,2,3,5,4,6,6]
   1765 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1766 ; AVX2OR512VL-NEXT:    retq
   1767   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   1768   ret <8 x i32> %shuffle
   1769 }
   1770 
   1771 define <8 x i32> @shuffle_v8i32_002u6u44(<8 x i32> %a, <8 x i32> %b) {
   1772 ; AVX1-LABEL: shuffle_v8i32_002u6u44:
   1773 ; AVX1:       # %bb.0:
   1774 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,2,u,6,u,4,4]
   1775 ; AVX1-NEXT:    retq
   1776 ;
   1777 ; AVX2OR512VL-LABEL: shuffle_v8i32_002u6u44:
   1778 ; AVX2OR512VL:       # %bb.0:
   1779 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <0,0,2,u,6,u,4,4>
   1780 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1781 ; AVX2OR512VL-NEXT:    retq
   1782   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   1783   ret <8 x i32> %shuffle
   1784 }
   1785 
   1786 define <8 x i32> @shuffle_v8i32_00uu66uu(<8 x i32> %a, <8 x i32> %b) {
   1787 ; AVX1-LABEL: shuffle_v8i32_00uu66uu:
   1788 ; AVX1:       # %bb.0:
   1789 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,u,u,6,6,u,u]
   1790 ; AVX1-NEXT:    retq
   1791 ;
   1792 ; AVX2OR512VL-LABEL: shuffle_v8i32_00uu66uu:
   1793 ; AVX2OR512VL:       # %bb.0:
   1794 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <0,0,u,u,6,6,u,u>
   1795 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1796 ; AVX2OR512VL-NEXT:    retq
   1797   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   1798   ret <8 x i32> %shuffle
   1799 }
   1800 
   1801 define <8 x i32> @shuffle_v8i32_103245uu(<8 x i32> %a, <8 x i32> %b) {
   1802 ; AVX1-LABEL: shuffle_v8i32_103245uu:
   1803 ; AVX1:       # %bb.0:
   1804 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,0,3,2,4,5,u,u]
   1805 ; AVX1-NEXT:    retq
   1806 ;
   1807 ; AVX2OR512VL-LABEL: shuffle_v8i32_103245uu:
   1808 ; AVX2OR512VL:       # %bb.0:
   1809 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <1,0,3,2,4,5,u,u>
   1810 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1811 ; AVX2OR512VL-NEXT:    retq
   1812   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   1813   ret <8 x i32> %shuffle
   1814 }
   1815 
   1816 define <8 x i32> @shuffle_v8i32_1133uu67(<8 x i32> %a, <8 x i32> %b) {
   1817 ; AVX1-LABEL: shuffle_v8i32_1133uu67:
   1818 ; AVX1:       # %bb.0:
   1819 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,1,3,3,u,u,6,7]
   1820 ; AVX1-NEXT:    retq
   1821 ;
   1822 ; AVX2OR512VL-LABEL: shuffle_v8i32_1133uu67:
   1823 ; AVX2OR512VL:       # %bb.0:
   1824 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <1,1,3,3,u,u,6,7>
   1825 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1826 ; AVX2OR512VL-NEXT:    retq
   1827   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   1828   ret <8 x i32> %shuffle
   1829 }
   1830 
   1831 define <8 x i32> @shuffle_v8i32_0uu354uu(<8 x i32> %a, <8 x i32> %b) {
   1832 ; AVX1-LABEL: shuffle_v8i32_0uu354uu:
   1833 ; AVX1:       # %bb.0:
   1834 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,u,u,3,5,4,u,u]
   1835 ; AVX1-NEXT:    retq
   1836 ;
   1837 ; AVX2OR512VL-LABEL: shuffle_v8i32_0uu354uu:
   1838 ; AVX2OR512VL:       # %bb.0:
   1839 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <0,u,u,3,5,4,u,u>
   1840 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1841 ; AVX2OR512VL-NEXT:    retq
   1842   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   1843   ret <8 x i32> %shuffle
   1844 }
   1845 
   1846 define <8 x i32> @shuffle_v8i32_uuu3uu66(<8 x i32> %a, <8 x i32> %b) {
   1847 ; AVX1-LABEL: shuffle_v8i32_uuu3uu66:
   1848 ; AVX1:       # %bb.0:
   1849 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[u,u,u,3,u,u,6,6]
   1850 ; AVX1-NEXT:    retq
   1851 ;
   1852 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuu3uu66:
   1853 ; AVX2OR512VL:       # %bb.0:
   1854 ; AVX2OR512VL-NEXT:    vmovaps {{.*#+}} ymm1 = <u,u,u,3,u,u,6,6>
   1855 ; AVX2OR512VL-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1856 ; AVX2OR512VL-NEXT:    retq
   1857   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   1858   ret <8 x i32> %shuffle
   1859 }
   1860 
   1861 define <8 x i32> @shuffle_v8i32_6caa87e5(<8 x i32> %a, <8 x i32> %b) {
   1862 ; AVX1-LABEL: shuffle_v8i32_6caa87e5:
   1863 ; AVX1:       # %bb.0:
   1864 ; AVX1-NEXT:    vpermilpd {{.*#+}} ymm0 = ymm0[1,0,3,2]
   1865 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1866 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm2 = ymm1[2,3,0,1]
   1867 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm2[0,0],ymm1[2,2],ymm2[4,4],ymm1[6,6]
   1868 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1869 ; AVX1-NEXT:    retq
   1870 ;
   1871 ; AVX2-SLOW-LABEL: shuffle_v8i32_6caa87e5:
   1872 ; AVX2-SLOW:       # %bb.0:
   1873 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
   1874 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm1 = ymm1[0,0,2,2,4,4,6,6]
   1875 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm1 = ymm1[2,1,0,3]
   1876 ; AVX2-SLOW-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1877 ; AVX2-SLOW-NEXT:    retq
   1878 ;
   1879 ; AVX2-FAST-LABEL: shuffle_v8i32_6caa87e5:
   1880 ; AVX2-FAST:       # %bb.0:
   1881 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [4,4,2,2,0,0,6,6]
   1882 ; AVX2-FAST-NEXT:    vpermps %ymm1, %ymm2, %ymm1
   1883 ; AVX2-FAST-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[3,1,3,2]
   1884 ; AVX2-FAST-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1,2,3,4],ymm0[5],ymm1[6],ymm0[7]
   1885 ; AVX2-FAST-NEXT:    retq
   1886 ;
   1887 ; AVX512VL-LABEL: shuffle_v8i32_6caa87e5:
   1888 ; AVX512VL:       # %bb.0:
   1889 ; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [14,4,2,2,0,15,6,13]
   1890 ; AVX512VL-NEXT:    vpermi2d %ymm0, %ymm1, %ymm2
   1891 ; AVX512VL-NEXT:    vmovdqa %ymm2, %ymm0
   1892 ; AVX512VL-NEXT:    retq
   1893   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   1894   ret <8 x i32> %shuffle
   1895 }
   1896 
   1897 define <8 x i32> @shuffle_v8i32_32103210(<8 x i32> %a, <8 x i32> %b) {
   1898 ; AVX1-LABEL: shuffle_v8i32_32103210:
   1899 ; AVX1:       # %bb.0:
   1900 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1901 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   1902 ; AVX1-NEXT:    retq
   1903 ;
   1904 ; AVX2OR512VL-LABEL: shuffle_v8i32_32103210:
   1905 ; AVX2OR512VL:       # %bb.0:
   1906 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[3,2,1,0]
   1907 ; AVX2OR512VL-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,1,0,1]
   1908 ; AVX2OR512VL-NEXT:    retq
   1909   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 3, i32 2, i32 1, i32 0>
   1910   ret <8 x i32> %shuffle
   1911 }
   1912 
   1913 define <8 x i32> @shuffle_v8i32_76547654(<8 x i32> %a, <8 x i32> %b) {
   1914 ; AVX1-LABEL: shuffle_v8i32_76547654:
   1915 ; AVX1:       # %bb.0:
   1916 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1917 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1918 ; AVX1-NEXT:    retq
   1919 ;
   1920 ; AVX2-SLOW-LABEL: shuffle_v8i32_76547654:
   1921 ; AVX2-SLOW:       # %bb.0:
   1922 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1923 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1924 ; AVX2-SLOW-NEXT:    retq
   1925 ;
   1926 ; AVX2-FAST-LABEL: shuffle_v8i32_76547654:
   1927 ; AVX2-FAST:       # %bb.0:
   1928 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
   1929 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1930 ; AVX2-FAST-NEXT:    retq
   1931 ;
   1932 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76547654:
   1933 ; AVX512VL-SLOW:       # %bb.0:
   1934 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1935 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,2,3]
   1936 ; AVX512VL-SLOW-NEXT:    retq
   1937 ;
   1938 ; AVX512VL-FAST-LABEL: shuffle_v8i32_76547654:
   1939 ; AVX512VL-FAST:       # %bb.0:
   1940 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,7,6,5,4]
   1941 ; AVX512VL-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1942 ; AVX512VL-FAST-NEXT:    retq
   1943   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 7, i32 6, i32 5, i32 4>
   1944   ret <8 x i32> %shuffle
   1945 }
   1946 
   1947 define <8 x i32> @shuffle_v8i32_76543210(<8 x i32> %a, <8 x i32> %b) {
   1948 ; AVX1-LABEL: shuffle_v8i32_76543210:
   1949 ; AVX1:       # %bb.0:
   1950 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1951 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1952 ; AVX1-NEXT:    retq
   1953 ;
   1954 ; AVX2-SLOW-LABEL: shuffle_v8i32_76543210:
   1955 ; AVX2-SLOW:       # %bb.0:
   1956 ; AVX2-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1957 ; AVX2-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1958 ; AVX2-SLOW-NEXT:    retq
   1959 ;
   1960 ; AVX2-FAST-LABEL: shuffle_v8i32_76543210:
   1961 ; AVX2-FAST:       # %bb.0:
   1962 ; AVX2-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
   1963 ; AVX2-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1964 ; AVX2-FAST-NEXT:    retq
   1965 ;
   1966 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_76543210:
   1967 ; AVX512VL-SLOW:       # %bb.0:
   1968 ; AVX512VL-SLOW-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1969 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[2,3,0,1]
   1970 ; AVX512VL-SLOW-NEXT:    retq
   1971 ;
   1972 ; AVX512VL-FAST-LABEL: shuffle_v8i32_76543210:
   1973 ; AVX512VL-FAST:       # %bb.0:
   1974 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm1 = [7,6,5,4,3,2,1,0]
   1975 ; AVX512VL-FAST-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   1976 ; AVX512VL-FAST-NEXT:    retq
   1977   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
   1978   ret <8 x i32> %shuffle
   1979 }
   1980 
   1981 define <8 x i32> @shuffle_v8i32_3210ba98(<8 x i32> %a, <8 x i32> %b) {
   1982 ; ALL-LABEL: shuffle_v8i32_3210ba98:
   1983 ; ALL:       # %bb.0:
   1984 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   1985 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1986 ; ALL-NEXT:    retq
   1987   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 11, i32 10, i32 9, i32 8>
   1988   ret <8 x i32> %shuffle
   1989 }
   1990 
   1991 define <8 x i32> @shuffle_v8i32_3210fedc(<8 x i32> %a, <8 x i32> %b) {
   1992 ; ALL-LABEL: shuffle_v8i32_3210fedc:
   1993 ; ALL:       # %bb.0:
   1994 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   1995 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   1996 ; ALL-NEXT:    retq
   1997   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 15, i32 14, i32 13, i32 12>
   1998   ret <8 x i32> %shuffle
   1999 }
   2000 
   2001 define <8 x i32> @shuffle_v8i32_7654fedc(<8 x i32> %a, <8 x i32> %b) {
   2002 ; AVX1OR2-LABEL: shuffle_v8i32_7654fedc:
   2003 ; AVX1OR2:       # %bb.0:
   2004 ; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2005 ; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2006 ; AVX1OR2-NEXT:    retq
   2007 ;
   2008 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_7654fedc:
   2009 ; AVX512VL-SLOW:       # %bb.0:
   2010 ; AVX512VL-SLOW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2011 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2012 ; AVX512VL-SLOW-NEXT:    retq
   2013 ;
   2014 ; AVX512VL-FAST-LABEL: shuffle_v8i32_7654fedc:
   2015 ; AVX512VL-FAST:       # %bb.0:
   2016 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
   2017 ; AVX512VL-FAST-NEXT:    vpermt2d %ymm1, %ymm2, %ymm0
   2018 ; AVX512VL-FAST-NEXT:    retq
   2019   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 15, i32 14, i32 13, i32 12>
   2020   ret <8 x i32> %shuffle
   2021 }
   2022 
   2023 define <8 x i32> @shuffle_v8i32_fedc7654(<8 x i32> %a, <8 x i32> %b) {
   2024 ; AVX1OR2-LABEL: shuffle_v8i32_fedc7654:
   2025 ; AVX1OR2:       # %bb.0:
   2026 ; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   2027 ; AVX1OR2-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2028 ; AVX1OR2-NEXT:    retq
   2029 ;
   2030 ; AVX512VL-SLOW-LABEL: shuffle_v8i32_fedc7654:
   2031 ; AVX512VL-SLOW:       # %bb.0:
   2032 ; AVX512VL-SLOW-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm1[2,3],ymm0[2,3]
   2033 ; AVX512VL-SLOW-NEXT:    vpshufd {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2034 ; AVX512VL-SLOW-NEXT:    retq
   2035 ;
   2036 ; AVX512VL-FAST-LABEL: shuffle_v8i32_fedc7654:
   2037 ; AVX512VL-FAST:       # %bb.0:
   2038 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [7,6,5,4,15,14,13,12]
   2039 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm0, %ymm1, %ymm2
   2040 ; AVX512VL-FAST-NEXT:    vmovdqa %ymm2, %ymm0
   2041 ; AVX512VL-FAST-NEXT:    retq
   2042   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 15, i32 14, i32 13, i32 12, i32 7, i32 6, i32 5, i32 4>
   2043   ret <8 x i32> %shuffle
   2044 }
   2045 
   2046 define <8 x i32> @shuffle_v8i32_ba987654(<8 x i32> %a, <8 x i32> %b) {
   2047 ; ALL-LABEL: shuffle_v8i32_ba987654:
   2048 ; ALL:       # %bb.0:
   2049 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   2050 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2051 ; ALL-NEXT:    retq
   2052   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   2053   ret <8 x i32> %shuffle
   2054 }
   2055 
   2056 define <8 x i32> @shuffle_v8i32_ba983210(<8 x i32> %a, <8 x i32> %b) {
   2057 ; ALL-LABEL: shuffle_v8i32_ba983210:
   2058 ; ALL:       # %bb.0:
   2059 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   2060 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
   2061 ; ALL-NEXT:    retq
   2062   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4>
   2063   ret <8 x i32> %shuffle
   2064 }
   2065 
   2066 define <8 x i32> @shuffle_v8i32_zuu8zuuc(<8 x i32> %a) {
   2067 ; AVX1-LABEL: shuffle_v8i32_zuu8zuuc:
   2068 ; AVX1:       # %bb.0:
   2069 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2070 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,1],ymm0[2,0],ymm1[4,5],ymm0[6,4]
   2071 ; AVX1-NEXT:    retq
   2072 ;
   2073 ; AVX2OR512VL-LABEL: shuffle_v8i32_zuu8zuuc:
   2074 ; AVX2OR512VL:       # %bb.0:
   2075 ; AVX2OR512VL-NEXT:    vpslldq {{.*#+}} ymm0 = zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[0,1,2,3],zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,zero,ymm0[16,17,18,19]
   2076 ; AVX2OR512VL-NEXT:    retq
   2077   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 0, i32 undef, i32 undef, i32 8, i32 0, i32 undef, i32 undef, i32 12>
   2078   ret <8 x i32> %shuffle
   2079 }
   2080 
   2081 define <8 x i32> @shuffle_v8i32_9ubzdefz(<8 x i32> %a) {
   2082 ; AVX1-LABEL: shuffle_v8i32_9ubzdefz:
   2083 ; AVX1:       # %bb.0:
   2084 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2085 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[3,0],ymm1[7,4],ymm0[7,4]
   2086 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2087 ; AVX1-NEXT:    retq
   2088 ;
   2089 ; AVX2OR512VL-LABEL: shuffle_v8i32_9ubzdefz:
   2090 ; AVX2OR512VL:       # %bb.0:
   2091 ; AVX2OR512VL-NEXT:    vpsrldq {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],zero,zero,zero,zero,ymm0[20,21,22,23,24,25,26,27,28,29,30,31],zero,zero,zero,zero
   2092 ; AVX2OR512VL-NEXT:    retq
   2093   %shuffle = shufflevector <8 x i32> zeroinitializer, <8 x i32> %a, <8 x i32> <i32 9, i32 undef, i32 11, i32 0, i32 13, i32 14, i32 15, i32 0>
   2094   ret <8 x i32> %shuffle
   2095 }
   2096 
   2097 define <8 x i32> @shuffle_v8i32_80u1b4uu(<8 x i32> %a, <8 x i32> %b) {
   2098 ; ALL-LABEL: shuffle_v8i32_80u1b4uu:
   2099 ; ALL:       # %bb.0:
   2100 ; ALL-NEXT:    vunpcklps {{.*#+}} ymm0 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[4],ymm0[4],ymm1[5],ymm0[5]
   2101 ; ALL-NEXT:    retq
   2102   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 0, i32 undef, i32 1, i32 12, i32 4, i32 undef, i32 undef>
   2103   ret <8 x i32> %shuffle
   2104 }
   2105 
   2106 define <8 x i32> @shuffle_v8i32_uuuu1111(<8 x i32> %a, <8 x i32> %b) {
   2107 ; ALL-LABEL: shuffle_v8i32_uuuu1111:
   2108 ; ALL:       # %bb.0:
   2109 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   2110 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2111 ; ALL-NEXT:    retq
   2112   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 1, i32 1, i32 1, i32 1>
   2113   ret <8 x i32> %shuffle
   2114 }
   2115 
   2116 define <8 x i32> @shuffle_v8i32_2222uuuu(<8 x i32> %a, <8 x i32> %b) {
   2117 ; ALL-LABEL: shuffle_v8i32_2222uuuu:
   2118 ; ALL:       # %bb.0:
   2119 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[2,2,2,2]
   2120 ; ALL-NEXT:    retq
   2121   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 2, i32 2, i32 2, i32 undef, i32 undef, i32 undef, i32 undef>
   2122   ret <8 x i32> %shuffle
   2123 }
   2124 
   2125 define <8 x i32> @shuffle_v8i32_2A3Buuuu(<8 x i32> %a, <8 x i32> %b) {
   2126 ; ALL-LABEL: shuffle_v8i32_2A3Buuuu:
   2127 ; ALL:       # %bb.0:
   2128 ; ALL-NEXT:    vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
   2129 ; ALL-NEXT:    retq
   2130   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 undef, i32 undef, i32 undef, i32 undef>
   2131   ret <8 x i32> %shuffle
   2132 }
   2133 
   2134 define <8 x i32> @shuffle_v8i32_44444444(<8 x i32> %a, <8 x i32> %b) {
   2135 ; AVX1-LABEL: shuffle_v8i32_44444444:
   2136 ; AVX1:       # %bb.0:
   2137 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   2138 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   2139 ; AVX1-NEXT:    retq
   2140 ;
   2141 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444:
   2142 ; AVX2OR512VL:       # %bb.0:
   2143 ; AVX2OR512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2144 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
   2145 ; AVX2OR512VL-NEXT:    retq
   2146   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   2147   ret <8 x i32> %shuffle
   2148 }
   2149 
   2150 define <8 x i32> @shuffle_v8i32_44444444_bc(<8 x float> %a, <8 x float> %b) {
   2151 ; AVX1-LABEL: shuffle_v8i32_44444444_bc:
   2152 ; AVX1:       # %bb.0:
   2153 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,0,0,0,4,4,4,4]
   2154 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3,2,3]
   2155 ; AVX1-NEXT:    retq
   2156 ;
   2157 ; AVX2OR512VL-LABEL: shuffle_v8i32_44444444_bc:
   2158 ; AVX2OR512VL:       # %bb.0:
   2159 ; AVX2OR512VL-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2160 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
   2161 ; AVX2OR512VL-NEXT:    retq
   2162   %tmp0 = bitcast <8 x float> %a to <8 x i32>
   2163   %tmp1 = bitcast <8 x float> %b to <8 x i32>
   2164   %shuffle = shufflevector <8 x i32> %tmp0, <8 x i32> %tmp1, <8 x i32> <i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4, i32 4>
   2165   ret <8 x i32> %shuffle
   2166 }
   2167 
   2168 define <8 x i32> @shuffle_v8i32_5555uuuu(<8 x i32> %a, <8 x i32> %b) {
   2169 ; ALL-LABEL: shuffle_v8i32_5555uuuu:
   2170 ; ALL:       # %bb.0:
   2171 ; ALL-NEXT:    vextractf128 $1, %ymm0, %xmm0
   2172 ; ALL-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[1,1,1,1]
   2173 ; ALL-NEXT:    retq
   2174   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 5, i32 5, i32 5, i32 5, i32 undef, i32 undef, i32 undef, i32 undef>
   2175   ret <8 x i32> %shuffle
   2176 }
   2177 
   2178 ; PR32453
   2179 define <8 x i32> @shuffle_v8i32_uuuuuu7u(<8 x i32> %a, <8 x i32> %b) nounwind {
   2180 ; AVX1-LABEL: shuffle_v8i32_uuuuuu7u:
   2181 ; AVX1:       # %bb.0:
   2182 ; AVX1-NEXT:    vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
   2183 ; AVX1-NEXT:    retq
   2184 ;
   2185 ; AVX2OR512VL-LABEL: shuffle_v8i32_uuuuuu7u:
   2186 ; AVX2OR512VL:       # %bb.0:
   2187 ; AVX2OR512VL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,1,3,3,4,5,7,7]
   2188 ; AVX2OR512VL-NEXT:    retq
   2189   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 7, i32 undef>
   2190   ret <8 x i32> %shuffle
   2191 }
   2192 
   2193 define <8 x float> @splat_mem_v8f32_2(float* %p) {
   2194 ; ALL-LABEL: splat_mem_v8f32_2:
   2195 ; ALL:       # %bb.0:
   2196 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2197 ; ALL-NEXT:    retq
   2198   %1 = load float, float* %p
   2199   %2 = insertelement <4 x float> undef, float %1, i32 0
   2200   %3 = shufflevector <4 x float> %2, <4 x float> undef, <8 x i32> zeroinitializer
   2201   ret <8 x float> %3
   2202 }
   2203 
   2204 define <8 x float> @splat_v8f32(<4 x float> %r) {
   2205 ; AVX1-LABEL: splat_v8f32:
   2206 ; AVX1:       # %bb.0:
   2207 ; AVX1-NEXT:    vpermilps {{.*#+}} xmm0 = xmm0[0,0,0,0]
   2208 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2209 ; AVX1-NEXT:    retq
   2210 ;
   2211 ; AVX2OR512VL-LABEL: splat_v8f32:
   2212 ; AVX2OR512VL:       # %bb.0:
   2213 ; AVX2OR512VL-NEXT:    vbroadcastss %xmm0, %ymm0
   2214 ; AVX2OR512VL-NEXT:    retq
   2215   %1 = shufflevector <4 x float> %r, <4 x float> undef, <8 x i32> zeroinitializer
   2216   ret <8 x float> %1
   2217 }
   2218 
   2219 ;
   2220 ; Shuffle to logical bit shifts
   2221 ;
   2222 
   2223 define <8 x i32> @shuffle_v8i32_z0U2zUz6(<8 x i32> %a) {
   2224 ; AVX1-LABEL: shuffle_v8i32_z0U2zUz6:
   2225 ; AVX1:       # %bb.0:
   2226 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2227 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2228 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[2,0,3,1,6,4,7,5]
   2229 ; AVX1-NEXT:    retq
   2230 ;
   2231 ; AVX2OR512VL-LABEL: shuffle_v8i32_z0U2zUz6:
   2232 ; AVX2OR512VL:       # %bb.0:
   2233 ; AVX2OR512VL-NEXT:    vpsllq $32, %ymm0, %ymm0
   2234 ; AVX2OR512VL-NEXT:    retq
   2235   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 8, i32 0, i32 undef, i32 2, i32 8, i32 undef, i32 8, i32 6>
   2236   ret <8 x i32> %shuffle
   2237 }
   2238 
   2239 define <8 x i32> @shuffle_v8i32_1U3z5zUU(<8 x i32> %a) {
   2240 ; AVX1-LABEL: shuffle_v8i32_1U3z5zUU:
   2241 ; AVX1:       # %bb.0:
   2242 ; AVX1-NEXT:    vxorps %xmm1, %xmm1, %xmm1
   2243 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2244 ; AVX1-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[0,2,1,3,4,6,5,7]
   2245 ; AVX1-NEXT:    retq
   2246 ;
   2247 ; AVX2OR512VL-LABEL: shuffle_v8i32_1U3z5zUU:
   2248 ; AVX2OR512VL:       # %bb.0:
   2249 ; AVX2OR512VL-NEXT:    vpsrlq $32, %ymm0, %ymm0
   2250 ; AVX2OR512VL-NEXT:    retq
   2251   %shuffle = shufflevector <8 x i32> %a, <8 x i32> zeroinitializer, <8 x i32> <i32 1, i32 undef, i32 3, i32 8, i32 5, i32 8, i32 undef, i32 undef>
   2252   ret <8 x i32> %shuffle
   2253 }
   2254 
   2255 define <8 x i32> @shuffle_v8i32_B012F456(<8 x i32> %a, <8 x i32> %b) {
   2256 ; AVX1-LABEL: shuffle_v8i32_B012F456:
   2257 ; AVX1:       # %bb.0:
   2258 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[3,0],ymm0[0,0],ymm1[7,4],ymm0[4,4]
   2259 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[0,2],ymm0[1,2],ymm1[4,6],ymm0[5,6]
   2260 ; AVX1-NEXT:    retq
   2261 ;
   2262 ; AVX2OR512VL-LABEL: shuffle_v8i32_B012F456:
   2263 ; AVX2OR512VL:       # %bb.0:
   2264 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[12,13,14,15],ymm0[0,1,2,3,4,5,6,7,8,9,10,11],ymm1[28,29,30,31],ymm0[16,17,18,19,20,21,22,23,24,25,26,27]
   2265 ; AVX2OR512VL-NEXT:    retq
   2266   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 11, i32 0, i32 1, i32 2, i32 15, i32 4, i32 5, i32 6>
   2267   ret <8 x i32> %shuffle
   2268 }
   2269 
   2270 define <8 x i32> @shuffle_v8i32_1238567C(<8 x i32> %a, <8 x i32> %b) {
   2271 ; AVX1-LABEL: shuffle_v8i32_1238567C:
   2272 ; AVX1:       # %bb.0:
   2273 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   2274 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2275 ; AVX1-NEXT:    retq
   2276 ;
   2277 ; AVX2OR512VL-LABEL: shuffle_v8i32_1238567C:
   2278 ; AVX2OR512VL:       # %bb.0:
   2279 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[4,5,6,7,8,9,10,11,12,13,14,15],ymm1[0,1,2,3],ymm0[20,21,22,23,24,25,26,27,28,29,30,31],ymm1[16,17,18,19]
   2280 ; AVX2OR512VL-NEXT:    retq
   2281   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 8, i32 5, i32 6, i32 7, i32 12>
   2282   ret <8 x i32> %shuffle
   2283 }
   2284 
   2285 define <8 x i32> @shuffle_v8i32_9AB0DEF4(<8 x i32> %a, <8 x i32> %b) {
   2286 ; AVX1-LABEL: shuffle_v8i32_9AB0DEF4:
   2287 ; AVX1:       # %bb.0:
   2288 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,0],ymm1[3,0],ymm0[4,4],ymm1[7,4]
   2289 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,2],ymm0[2,0],ymm1[5,6],ymm0[6,4]
   2290 ; AVX1-NEXT:    retq
   2291 ;
   2292 ; AVX2OR512VL-LABEL: shuffle_v8i32_9AB0DEF4:
   2293 ; AVX2OR512VL:       # %bb.0:
   2294 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm1[4,5,6,7,8,9,10,11,12,13,14,15],ymm0[0,1,2,3],ymm1[20,21,22,23,24,25,26,27,28,29,30,31],ymm0[16,17,18,19]
   2295 ; AVX2OR512VL-NEXT:    retq
   2296   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 10, i32 11, i32 0, i32 13, i32 14, i32 15, i32 4>
   2297   ret <8 x i32> %shuffle
   2298 }
   2299 
   2300 define <8 x i32> @shuffle_v8i32_389A7CDE(<8 x i32> %a, <8 x i32> %b) {
   2301 ; AVX1-LABEL: shuffle_v8i32_389A7CDE:
   2302 ; AVX1:       # %bb.0:
   2303 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[3,0],ymm1[0,0],ymm0[7,4],ymm1[4,4]
   2304 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[0,2],ymm1[1,2],ymm0[4,6],ymm1[5,6]
   2305 ; AVX1-NEXT:    retq
   2306 ;
   2307 ; AVX2OR512VL-LABEL: shuffle_v8i32_389A7CDE:
   2308 ; AVX2OR512VL:       # %bb.0:
   2309 ; AVX2OR512VL-NEXT:    vpalignr {{.*#+}} ymm0 = ymm0[12,13,14,15],ymm1[0,1,2,3,4,5,6,7,8,9,10,11],ymm0[28,29,30,31],ymm1[16,17,18,19,20,21,22,23,24,25,26,27]
   2310 ; AVX2OR512VL-NEXT:    retq
   2311   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 8, i32 9, i32 10, i32 7, i32 12, i32 13, i32 14>
   2312   ret <8 x i32> %shuffle
   2313 }
   2314 
   2315 define <8 x i32> @shuffle_v8i32_30127456(<8 x i32> %a, <8 x i32> %b) {
   2316 ; ALL-LABEL: shuffle_v8i32_30127456:
   2317 ; ALL:       # %bb.0:
   2318 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[3,0,1,2,7,4,5,6]
   2319 ; ALL-NEXT:    retq
   2320   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 3, i32 0, i32 1, i32 2, i32 7, i32 4, i32 5, i32 6>
   2321   ret <8 x i32> %shuffle
   2322 }
   2323 
   2324 define <8 x i32> @shuffle_v8i32_12305674(<8 x i32> %a, <8 x i32> %b) {
   2325 ; ALL-LABEL: shuffle_v8i32_12305674:
   2326 ; ALL:       # %bb.0:
   2327 ; ALL-NEXT:    vpermilps {{.*#+}} ymm0 = ymm0[1,2,3,0,5,6,7,4]
   2328 ; ALL-NEXT:    retq
   2329   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 0, i32 5, i32 6, i32 7, i32 4>
   2330   ret <8 x i32> %shuffle
   2331 }
   2332 
   2333 define <8x float> @concat_v2f32_1(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2334 ; ALL-LABEL: concat_v2f32_1:
   2335 ; ALL:       # %bb.0: # %entry
   2336 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   2337 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2338 ; ALL-NEXT:    retq
   2339 entry:
   2340   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2341   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2342   %tmp73 = shufflevector <2 x float> %tmp72, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2343   %tmp75 = shufflevector <2 x float> %tmp74, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
   2344   %tmp76 = shufflevector <8 x float> %tmp73, <8 x float> %tmp75, <8 x i32> <i32 0, i32 1, i32 8, i32 9, i32 undef, i32 undef, i32 undef, i32 undef>
   2345   ret <8 x float> %tmp76
   2346 }
   2347 
   2348 define <8x float> @concat_v2f32_2(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2349 ; ALL-LABEL: concat_v2f32_2:
   2350 ; ALL:       # %bb.0: # %entry
   2351 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   2352 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2353 ; ALL-NEXT:    retq
   2354 entry:
   2355   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2356   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2357   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2358   ret <8 x float> %tmp76
   2359 }
   2360 
   2361 define <8x float> @concat_v2f32_3(<2 x float>* %tmp64, <2 x float>* %tmp65) {
   2362 ; ALL-LABEL: concat_v2f32_3:
   2363 ; ALL:       # %bb.0: # %entry
   2364 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
   2365 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
   2366 ; ALL-NEXT:    retq
   2367 entry:
   2368   %tmp74 = load <2 x float>, <2 x float>* %tmp65, align 8
   2369   %tmp72 = load <2 x float>, <2 x float>* %tmp64, align 8
   2370   %tmp76 = shufflevector <2 x float> %tmp72, <2 x float> %tmp74, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2371   %res = shufflevector <4 x float> %tmp76, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef>
   2372   ret <8 x float> %res
   2373 }
   2374 
   2375 define <8 x i32> @insert_mem_and_zero_v8i32(i32* %ptr) {
   2376 ; ALL-LABEL: insert_mem_and_zero_v8i32:
   2377 ; ALL:       # %bb.0:
   2378 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
   2379 ; ALL-NEXT:    retq
   2380   %a = load i32, i32* %ptr
   2381   %v = insertelement <8 x i32> undef, i32 %a, i32 0
   2382   %shuffle = shufflevector <8 x i32> %v, <8 x i32> zeroinitializer, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
   2383   ret <8 x i32> %shuffle
   2384 }
   2385 
   2386 define <8 x i32> @concat_v8i32_0123CDEF(<8 x i32> %a, <8 x i32> %b) {
   2387 ; ALL-LABEL: concat_v8i32_0123CDEF:
   2388 ; ALL:       # %bb.0:
   2389 ; ALL-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   2390 ; ALL-NEXT:    retq
   2391   %alo = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2392   %bhi = shufflevector <8 x i32> %b, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2393   %shuf = shufflevector <4 x i32> %alo, <4 x i32> %bhi, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
   2394   ret <8 x i32> %shuf
   2395 }
   2396 
   2397 define <8 x i32> @concat_v8i32_4567CDEF_bc(<8 x i32> %a0, <8 x i32> %a1) {
   2398 ; AVX1OR2-LABEL: concat_v8i32_4567CDEF_bc:
   2399 ; AVX1OR2:       # %bb.0:
   2400 ; AVX1OR2-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2401 ; AVX1OR2-NEXT:    retq
   2402 ;
   2403 ; AVX512VL-LABEL: concat_v8i32_4567CDEF_bc:
   2404 ; AVX512VL:       # %bb.0:
   2405 ; AVX512VL-NEXT:    vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2406 ; AVX512VL-NEXT:    retq
   2407   %a0hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2408   %a1hi = shufflevector <8 x i32> %a0, <8 x i32> %a1, <4 x i32> <i32 12, i32 13, i32 14, i32 15>
   2409   %bc0hi = bitcast <4 x i32> %a0hi to <2 x i64>
   2410   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2411   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2412   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x i32>
   2413   ret <8 x i32> %shuffle32
   2414 }
   2415 
   2416 define <8 x float> @concat_v8f32_4567CDEF_bc(<8 x float> %f0, <8 x float> %f1) {
   2417 ; ALL-LABEL: concat_v8f32_4567CDEF_bc:
   2418 ; ALL:       # %bb.0:
   2419 ; ALL-NEXT:    vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
   2420 ; ALL-NEXT:    retq
   2421   %a0 = bitcast <8 x float> %f0 to <4 x i64>
   2422   %a1 = bitcast <8 x float> %f1 to <8 x i32>
   2423   %a0hi = shufflevector <4 x i64> %a0, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
   2424   %a1hi = shufflevector <8 x i32> %a1, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
   2425   %bc0hi = bitcast <2 x i64> %a0hi to <2 x i64>
   2426   %bc1hi = bitcast <4 x i32> %a1hi to <2 x i64>
   2427   %shuffle64 = shufflevector <2 x i64> %bc0hi, <2 x i64> %bc1hi, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
   2428   %shuffle32 = bitcast <4 x i64> %shuffle64 to <8 x float>
   2429   ret <8 x float> %shuffle32
   2430 }
   2431 
   2432 define <8 x i32> @insert_dup_mem_v8i32(i32* %ptr) {
   2433 ; ALL-LABEL: insert_dup_mem_v8i32:
   2434 ; ALL:       # %bb.0:
   2435 ; ALL-NEXT:    vbroadcastss (%rdi), %ymm0
   2436 ; ALL-NEXT:    retq
   2437   %tmp = load i32, i32* %ptr, align 4
   2438   %tmp1 = insertelement <4 x i32> zeroinitializer, i32 %tmp, i32 0
   2439   %tmp2 = shufflevector <4 x i32> %tmp1, <4 x i32> undef, <8 x i32> zeroinitializer
   2440   ret <8 x i32> %tmp2
   2441 }
   2442 
   2443 define <8 x i32> @shuffle_v8i32_12345678(<8 x i32> %a, <8 x i32> %b) {
   2444 ; AVX1-LABEL: shuffle_v8i32_12345678:
   2445 ; AVX1:       # %bb.0:
   2446 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
   2447 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2448 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   2449 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2450 ; AVX1-NEXT:    retq
   2451 ;
   2452 ; AVX2-LABEL: shuffle_v8i32_12345678:
   2453 ; AVX2:       # %bb.0:
   2454 ; AVX2-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0],ymm0[1,2,3,4,5,6,7]
   2455 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0]
   2456 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   2457 ; AVX2-NEXT:    retq
   2458 ;
   2459 ; AVX512VL-LABEL: shuffle_v8i32_12345678:
   2460 ; AVX512VL:       # %bb.0:
   2461 ; AVX512VL-NEXT:    valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7],ymm1[0]
   2462 ; AVX512VL-NEXT:    retq
   2463   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
   2464   ret <8 x i32> %shuffle
   2465 }
   2466 
   2467 define <8 x i32> @shuffle_v8i32_12345670(<8 x i32> %a) {
   2468 ; AVX1-LABEL: shuffle_v8i32_12345670:
   2469 ; AVX1:       # %bb.0:
   2470 ; AVX1-NEXT:    vperm2f128 {{.*#+}} ymm1 = ymm0[2,3,0,1]
   2471 ; AVX1-NEXT:    vshufps {{.*#+}} ymm1 = ymm1[0,0],ymm0[3,0],ymm1[4,4],ymm0[7,4]
   2472 ; AVX1-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,2],ymm1[2,0],ymm0[5,6],ymm1[6,4]
   2473 ; AVX1-NEXT:    retq
   2474 ;
   2475 ; AVX2-LABEL: shuffle_v8i32_12345670:
   2476 ; AVX2:       # %bb.0:
   2477 ; AVX2-NEXT:    vmovaps {{.*#+}} ymm1 = [1,2,3,4,5,6,7,0]
   2478 ; AVX2-NEXT:    vpermps %ymm0, %ymm1, %ymm0
   2479 ; AVX2-NEXT:    retq
   2480 ;
   2481 ; AVX512VL-LABEL: shuffle_v8i32_12345670:
   2482 ; AVX512VL:       # %bb.0:
   2483 ; AVX512VL-NEXT:    valignd {{.*#+}} ymm0 = ymm0[1,2,3,4,5,6,7,0]
   2484 ; AVX512VL-NEXT:    retq
   2485   %shuffle = shufflevector <8 x i32> %a, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0>
   2486   ret <8 x i32> %shuffle
   2487 }
   2488 
   2489 define <8 x float> @add_v8f32_02468ACE_13579BDF(<8 x float> %a, <8 x float> %b) {
   2490 ; AVX1-LABEL: add_v8f32_02468ACE_13579BDF:
   2491 ; AVX1:       # %bb.0: # %entry
   2492 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2493 ; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2]
   2494 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
   2495 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2496 ; AVX1-NEXT:    vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2]
   2497 ; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
   2498 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
   2499 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
   2500 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
   2501 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   2502 ; AVX1-NEXT:    vaddps %ymm0, %ymm3, %ymm0
   2503 ; AVX1-NEXT:    retq
   2504 ;
   2505 ; AVX2-LABEL: add_v8f32_02468ACE_13579BDF:
   2506 ; AVX2:       # %bb.0: # %entry
   2507 ; AVX2-NEXT:    vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2508 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2509 ; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2510 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2511 ; AVX2-NEXT:    vaddps %ymm0, %ymm2, %ymm0
   2512 ; AVX2-NEXT:    retq
   2513 ;
   2514 ; AVX512VL-SLOW-LABEL: add_v8f32_02468ACE_13579BDF:
   2515 ; AVX512VL-SLOW:       # %bb.0: # %entry
   2516 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2517 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2518 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2519 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2520 ; AVX512VL-SLOW-NEXT:    vaddps %ymm0, %ymm2, %ymm0
   2521 ; AVX512VL-SLOW-NEXT:    retq
   2522 ;
   2523 ; AVX512VL-FAST-LABEL: add_v8f32_02468ACE_13579BDF:
   2524 ; AVX512VL-FAST:       # %bb.0: # %entry
   2525 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
   2526 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm1, %ymm0, %ymm2
   2527 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15]
   2528 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm1, %ymm0, %ymm3
   2529 ; AVX512VL-FAST-NEXT:    vaddps %ymm3, %ymm2, %ymm0
   2530 ; AVX512VL-FAST-NEXT:    retq
   2531 entry:
   2532   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   2533   %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   2534   %add = fadd <8 x float> %shuffle, %shuffle1
   2535   ret <8 x float> %add
   2536 }
   2537 
   2538 define <8 x float> @add_v8f32_8ACE0246_9BDF1357(<8 x float> %a, <8 x float> %b) {
   2539 ; AVX1-LABEL: add_v8f32_8ACE0246_9BDF1357:
   2540 ; AVX1:       # %bb.0: # %entry
   2541 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2542 ; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2]
   2543 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
   2544 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
   2545 ; AVX1-NEXT:    vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2]
   2546 ; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
   2547 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
   2548 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2549 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
   2550 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   2551 ; AVX1-NEXT:    vaddps %ymm0, %ymm3, %ymm0
   2552 ; AVX1-NEXT:    retq
   2553 ;
   2554 ; AVX2-LABEL: add_v8f32_8ACE0246_9BDF1357:
   2555 ; AVX2:       # %bb.0: # %entry
   2556 ; AVX2-NEXT:    vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
   2557 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2558 ; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
   2559 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2560 ; AVX2-NEXT:    vaddps %ymm0, %ymm2, %ymm0
   2561 ; AVX2-NEXT:    retq
   2562 ;
   2563 ; AVX512VL-SLOW-LABEL: add_v8f32_8ACE0246_9BDF1357:
   2564 ; AVX512VL-SLOW:       # %bb.0: # %entry
   2565 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
   2566 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2567 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
   2568 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2569 ; AVX512VL-SLOW-NEXT:    vaddps %ymm0, %ymm2, %ymm0
   2570 ; AVX512VL-SLOW-NEXT:    retq
   2571 ;
   2572 ; AVX512VL-FAST-LABEL: add_v8f32_8ACE0246_9BDF1357:
   2573 ; AVX512VL-FAST:       # %bb.0: # %entry
   2574 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
   2575 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm0, %ymm1, %ymm2
   2576 ; AVX512VL-FAST-NEXT:    vmovaps {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15]
   2577 ; AVX512VL-FAST-NEXT:    vpermi2ps %ymm0, %ymm1, %ymm3
   2578 ; AVX512VL-FAST-NEXT:    vaddps %ymm3, %ymm2, %ymm0
   2579 ; AVX512VL-FAST-NEXT:    retq
   2580 entry:
   2581   %shuffle = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>
   2582   %shuffle1 = shufflevector <8 x float> %a, <8 x float> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>
   2583   %add = fadd <8 x float> %shuffle, %shuffle1
   2584   ret <8 x float> %add
   2585 }
   2586 
   2587 define <8 x i32> @add_v8i32_02468ACE_13579BDF(<8 x i32> %a, <8 x i32> %b) {
   2588 ; AVX1-LABEL: add_v8i32_02468ACE_13579BDF:
   2589 ; AVX1:       # %bb.0: # %entry
   2590 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm2
   2591 ; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm1[0,2],xmm2[0,2]
   2592 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
   2593 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm4
   2594 ; AVX1-NEXT:    vshufps {{.*#+}} xmm5 = xmm0[0,2],xmm4[0,2]
   2595 ; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
   2596 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm2[1,3]
   2597 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm1
   2598 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm4[1,3]
   2599 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],ymm1[4,5,6,7]
   2600 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2601 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
   2602 ; AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
   2603 ; AVX1-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
   2604 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2605 ; AVX1-NEXT:    retq
   2606 ;
   2607 ; AVX2-LABEL: add_v8i32_02468ACE_13579BDF:
   2608 ; AVX2:       # %bb.0: # %entry
   2609 ; AVX2-NEXT:    vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2610 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2611 ; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2612 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2613 ; AVX2-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
   2614 ; AVX2-NEXT:    retq
   2615 ;
   2616 ; AVX512VL-SLOW-LABEL: add_v8i32_02468ACE_13579BDF:
   2617 ; AVX512VL-SLOW:       # %bb.0: # %entry
   2618 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm2 = ymm0[0,2],ymm1[0,2],ymm0[4,6],ymm1[4,6]
   2619 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2620 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm0[1,3],ymm1[1,3],ymm0[5,7],ymm1[5,7]
   2621 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2622 ; AVX512VL-SLOW-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
   2623 ; AVX512VL-SLOW-NEXT:    retq
   2624 ;
   2625 ; AVX512VL-FAST-LABEL: add_v8i32_02468ACE_13579BDF:
   2626 ; AVX512VL-FAST:       # %bb.0: # %entry
   2627 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
   2628 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm1, %ymm0, %ymm2
   2629 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15]
   2630 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm1, %ymm0, %ymm3
   2631 ; AVX512VL-FAST-NEXT:    vpaddd %ymm3, %ymm2, %ymm0
   2632 ; AVX512VL-FAST-NEXT:    retq
   2633 entry:
   2634   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 2, i32 4, i32 6, i32 8, i32 10, i32 12, i32 14>
   2635   %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 1, i32 3, i32 5, i32 7, i32 9, i32 11, i32 13, i32 15>
   2636   %add = add <8 x i32> %shuffle, %shuffle1
   2637   ret <8 x i32> %add
   2638 }
   2639 
   2640 define <8 x i32> @add_v8i32_8ACE0246_9BDF1357(<8 x i32> %a, <8 x i32> %b) {
   2641 ; AVX1-LABEL: add_v8i32_8ACE0246_9BDF1357:
   2642 ; AVX1:       # %bb.0: # %entry
   2643 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm2
   2644 ; AVX1-NEXT:    vshufps {{.*#+}} xmm3 = xmm0[0,2],xmm2[0,2]
   2645 ; AVX1-NEXT:    vinsertf128 $1, %xmm3, %ymm0, %ymm3
   2646 ; AVX1-NEXT:    vextractf128 $1, %ymm1, %xmm4
   2647 ; AVX1-NEXT:    vshufps {{.*#+}} xmm5 = xmm1[0,2],xmm4[0,2]
   2648 ; AVX1-NEXT:    vblendps {{.*#+}} ymm3 = ymm5[0,1,2,3],ymm3[4,5,6,7]
   2649 ; AVX1-NEXT:    vshufps {{.*#+}} xmm0 = xmm0[1,3],xmm2[1,3]
   2650 ; AVX1-NEXT:    vinsertf128 $1, %xmm0, %ymm0, %ymm0
   2651 ; AVX1-NEXT:    vshufps {{.*#+}} xmm1 = xmm1[1,3],xmm4[1,3]
   2652 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm1[0,1,2,3],ymm0[4,5,6,7]
   2653 ; AVX1-NEXT:    vextractf128 $1, %ymm0, %xmm1
   2654 ; AVX1-NEXT:    vextractf128 $1, %ymm3, %xmm2
   2655 ; AVX1-NEXT:    vpaddd %xmm1, %xmm2, %xmm1
   2656 ; AVX1-NEXT:    vpaddd %xmm0, %xmm3, %xmm0
   2657 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
   2658 ; AVX1-NEXT:    retq
   2659 ;
   2660 ; AVX2-LABEL: add_v8i32_8ACE0246_9BDF1357:
   2661 ; AVX2:       # %bb.0: # %entry
   2662 ; AVX2-NEXT:    vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
   2663 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2664 ; AVX2-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
   2665 ; AVX2-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2666 ; AVX2-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
   2667 ; AVX2-NEXT:    retq
   2668 ;
   2669 ; AVX512VL-SLOW-LABEL: add_v8i32_8ACE0246_9BDF1357:
   2670 ; AVX512VL-SLOW:       # %bb.0: # %entry
   2671 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm2 = ymm1[0,2],ymm0[0,2],ymm1[4,6],ymm0[4,6]
   2672 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm2 = ymm2[0,2,1,3]
   2673 ; AVX512VL-SLOW-NEXT:    vshufps {{.*#+}} ymm0 = ymm1[1,3],ymm0[1,3],ymm1[5,7],ymm0[5,7]
   2674 ; AVX512VL-SLOW-NEXT:    vpermpd {{.*#+}} ymm0 = ymm0[0,2,1,3]
   2675 ; AVX512VL-SLOW-NEXT:    vpaddd %ymm0, %ymm2, %ymm0
   2676 ; AVX512VL-SLOW-NEXT:    retq
   2677 ;
   2678 ; AVX512VL-FAST-LABEL: add_v8i32_8ACE0246_9BDF1357:
   2679 ; AVX512VL-FAST:       # %bb.0: # %entry
   2680 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,2,4,6,8,10,12,14]
   2681 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm0, %ymm1, %ymm2
   2682 ; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm3 = [1,3,5,7,9,11,13,15]
   2683 ; AVX512VL-FAST-NEXT:    vpermi2d %ymm0, %ymm1, %ymm3
   2684 ; AVX512VL-FAST-NEXT:    vpaddd %ymm3, %ymm2, %ymm0
   2685 ; AVX512VL-FAST-NEXT:    retq
   2686 entry:
   2687   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 8, i32 10, i32 12, i32 14, i32 0, i32 2, i32 4, i32 6>
   2688   %shuffle1 = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 9, i32 11, i32 13, i32 15, i32 1, i32 3, i32 5, i32 7>
   2689   %add = add <8 x i32> %shuffle, %shuffle1
   2690   ret <8 x i32> %add
   2691 }
   2692