Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mcpu=x86-64 -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F
      3 ; RUN: llc < %s -mtriple=i386-unknown-linux-gnu -mattr=+avx512f | FileCheck %s --check-prefix=ALL --check-prefix=AVX512F-32
      4 
      5 target triple = "x86_64-unknown-unknown"
      6 
      7 define <8 x double> @shuffle_v8f64_00000000(<8 x double> %a, <8 x double> %b) {
      8 ; AVX512F-LABEL: shuffle_v8f64_00000000:
      9 ; AVX512F:       # BB#0:
     10 ; AVX512F-NEXT:    vbroadcastsd %xmm0, %zmm0
     11 ; AVX512F-NEXT:    retq
     12 ;
     13 ; AVX512F-32-LABEL: shuffle_v8f64_00000000:
     14 ; AVX512F-32:       # BB#0:
     15 ; AVX512F-32-NEXT:    vbroadcastsd %xmm0, %zmm0
     16 ; AVX512F-32-NEXT:    retl
     17   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
     18   ret <8 x double> %shuffle
     19 }
     20 
     21 define <8 x double> @shuffle_v8f64_00000010(<8 x double> %a, <8 x double> %b) {
     22 ; AVX512F-LABEL: shuffle_v8f64_00000010:
     23 ; AVX512F:       # BB#0:
     24 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
     25 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     26 ; AVX512F-NEXT:    retq
     27 ;
     28 ; AVX512F-32-LABEL: shuffle_v8f64_00000010:
     29 ; AVX512F-32:       # BB#0:
     30 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
     31 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     32 ; AVX512F-32-NEXT:    retl
     33   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
     34   ret <8 x double> %shuffle
     35 }
     36 
     37 define <8 x double> @shuffle_v8f64_00000200(<8 x double> %a, <8 x double> %b) {
     38 ; AVX512F-LABEL: shuffle_v8f64_00000200:
     39 ; AVX512F:       # BB#0:
     40 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
     41 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     42 ; AVX512F-NEXT:    retq
     43 ;
     44 ; AVX512F-32-LABEL: shuffle_v8f64_00000200:
     45 ; AVX512F-32:       # BB#0:
     46 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
     47 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     48 ; AVX512F-32-NEXT:    retl
     49   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
     50   ret <8 x double> %shuffle
     51 }
     52 
     53 define <8 x double> @shuffle_v8f64_00003000(<8 x double> %a, <8 x double> %b) {
     54 ; AVX512F-LABEL: shuffle_v8f64_00003000:
     55 ; AVX512F:       # BB#0:
     56 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
     57 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     58 ; AVX512F-NEXT:    retq
     59 ;
     60 ; AVX512F-32-LABEL: shuffle_v8f64_00003000:
     61 ; AVX512F-32:       # BB#0:
     62 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
     63 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     64 ; AVX512F-32-NEXT:    retl
     65   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
     66   ret <8 x double> %shuffle
     67 }
     68 
     69 define <8 x double> @shuffle_v8f64_00040000(<8 x double> %a, <8 x double> %b) {
     70 ; AVX512F-LABEL: shuffle_v8f64_00040000:
     71 ; AVX512F:       # BB#0:
     72 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
     73 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     74 ; AVX512F-NEXT:    retq
     75 ;
     76 ; AVX512F-32-LABEL: shuffle_v8f64_00040000:
     77 ; AVX512F-32:       # BB#0:
     78 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
     79 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     80 ; AVX512F-32-NEXT:    retl
     81   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
     82   ret <8 x double> %shuffle
     83 }
     84 
     85 define <8 x double> @shuffle_v8f64_00500000(<8 x double> %a, <8 x double> %b) {
     86 ; AVX512F-LABEL: shuffle_v8f64_00500000:
     87 ; AVX512F:       # BB#0:
     88 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
     89 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     90 ; AVX512F-NEXT:    retq
     91 ;
     92 ; AVX512F-32-LABEL: shuffle_v8f64_00500000:
     93 ; AVX512F-32:       # BB#0:
     94 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
     95 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
     96 ; AVX512F-32-NEXT:    retl
     97   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
     98   ret <8 x double> %shuffle
     99 }
    100 
    101 define <8 x double> @shuffle_v8f64_06000000(<8 x double> %a, <8 x double> %b) {
    102 ; AVX512F-LABEL: shuffle_v8f64_06000000:
    103 ; AVX512F:       # BB#0:
    104 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
    105 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    106 ; AVX512F-NEXT:    retq
    107 ;
    108 ; AVX512F-32-LABEL: shuffle_v8f64_06000000:
    109 ; AVX512F-32:       # BB#0:
    110 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
    111 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    112 ; AVX512F-32-NEXT:    retl
    113   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    114   ret <8 x double> %shuffle
    115 }
    116 
    117 define <8 x double> @shuffle_v8f64_70000000(<8 x double> %a, <8 x double> %b) {
    118 ; AVX512F-LABEL: shuffle_v8f64_70000000:
    119 ; AVX512F:       # BB#0:
    120 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
    121 ; AVX512F-NEXT:    movl $7, %eax
    122 ; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
    123 ; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
    124 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    125 ; AVX512F-NEXT:    retq
    126 ;
    127 ; AVX512F-32-LABEL: shuffle_v8f64_70000000:
    128 ; AVX512F-32:       # BB#0:
    129 ; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
    130 ; AVX512F-32-NEXT:    movl $7, %eax
    131 ; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
    132 ; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
    133 ; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
    134 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    135 ; AVX512F-32-NEXT:    retl
    136   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    137   ret <8 x double> %shuffle
    138 }
    139 
    140 define <8 x double> @shuffle_v8f64_01014545(<8 x double> %a, <8 x double> %b) {
    141 ; AVX512F-LABEL: shuffle_v8f64_01014545:
    142 ; AVX512F:       # BB#0:
    143 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
    144 ; AVX512F-NEXT:    retq
    145 ;
    146 ; AVX512F-32-LABEL: shuffle_v8f64_01014545:
    147 ; AVX512F-32:       # BB#0:
    148 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
    149 ; AVX512F-32-NEXT:    retl
    150   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
    151   ret <8 x double> %shuffle
    152 }
    153 
    154 define <8 x double> @shuffle_v8f64_00112233(<8 x double> %a, <8 x double> %b) {
    155 ; AVX512F-LABEL: shuffle_v8f64_00112233:
    156 ; AVX512F:       # BB#0:
    157 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
    158 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    159 ; AVX512F-NEXT:    retq
    160 ;
    161 ; AVX512F-32-LABEL: shuffle_v8f64_00112233:
    162 ; AVX512F-32:       # BB#0:
    163 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
    164 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    165 ; AVX512F-32-NEXT:    retl
    166   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
    167   ret <8 x double> %shuffle
    168 }
    169 
    170 define <8 x double> @shuffle_v8f64_00001111(<8 x double> %a, <8 x double> %b) {
    171 ; AVX512F-LABEL: shuffle_v8f64_00001111:
    172 ; AVX512F:       # BB#0:
    173 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
    174 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    175 ; AVX512F-NEXT:    retq
    176 ;
    177 ; AVX512F-32-LABEL: shuffle_v8f64_00001111:
    178 ; AVX512F-32:       # BB#0:
    179 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
    180 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    181 ; AVX512F-32-NEXT:    retl
    182   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
    183   ret <8 x double> %shuffle
    184 }
    185 
    186 define <8 x double> @shuffle_v8f64_81a3c5e7(<8 x double> %a, <8 x double> %b) {
    187 ;
    188 ; AVX512F-LABEL: shuffle_v8f64_81a3c5e7:
    189 ; AVX512F:       # BB#0:
    190 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
    191 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    192 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    193 ; AVX512F-NEXT:    retq
    194 ;
    195 ; AVX512F-32-LABEL: shuffle_v8f64_81a3c5e7:
    196 ; AVX512F-32:       # BB#0:
    197 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
    198 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    199 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    200 ; AVX512F-32-NEXT:    retl
    201   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
    202   ret <8 x double> %shuffle
    203 }
    204 
    205 define <8 x double> @shuffle_v8f64_08080808(<8 x double> %a, <8 x double> %b) {
    206 ;
    207 ; AVX512F-LABEL: shuffle_v8f64_08080808:
    208 ; AVX512F:       # BB#0:
    209 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
    210 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    211 ; AVX512F-NEXT:    retq
    212 ;
    213 ; AVX512F-32-LABEL: shuffle_v8f64_08080808:
    214 ; AVX512F-32:       # BB#0:
    215 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
    216 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    217 ; AVX512F-32-NEXT:    retl
    218   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
    219   ret <8 x double> %shuffle
    220 }
    221 
    222 define <8 x double> @shuffle_v8f64_08084c4c(<8 x double> %a, <8 x double> %b) {
    223 ;
    224 ; AVX512F-LABEL: shuffle_v8f64_08084c4c:
    225 ; AVX512F:       # BB#0:
    226 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
    227 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    228 ; AVX512F-NEXT:    retq
    229 ;
    230 ; AVX512F-32-LABEL: shuffle_v8f64_08084c4c:
    231 ; AVX512F-32:       # BB#0:
    232 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
    233 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    234 ; AVX512F-32-NEXT:    retl
    235   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
    236   ret <8 x double> %shuffle
    237 }
    238 
    239 define <8 x double> @shuffle_v8f64_8823cc67(<8 x double> %a, <8 x double> %b) {
    240 ;
    241 ; AVX512F-LABEL: shuffle_v8f64_8823cc67:
    242 ; AVX512F:       # BB#0:
    243 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
    244 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    245 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    246 ; AVX512F-NEXT:    retq
    247 ;
    248 ; AVX512F-32-LABEL: shuffle_v8f64_8823cc67:
    249 ; AVX512F-32:       # BB#0:
    250 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
    251 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    252 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    253 ; AVX512F-32-NEXT:    retl
    254   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
    255   ret <8 x double> %shuffle
    256 }
    257 
    258 define <8 x double> @shuffle_v8f64_9832dc76(<8 x double> %a, <8 x double> %b) {
    259 ;
    260 ; AVX512F-LABEL: shuffle_v8f64_9832dc76:
    261 ; AVX512F:       # BB#0:
    262 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
    263 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    264 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    265 ; AVX512F-NEXT:    retq
    266 ;
    267 ; AVX512F-32-LABEL: shuffle_v8f64_9832dc76:
    268 ; AVX512F-32:       # BB#0:
    269 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
    270 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    271 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    272 ; AVX512F-32-NEXT:    retl
    273   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
    274   ret <8 x double> %shuffle
    275 }
    276 
    277 define <8 x double> @shuffle_v8f64_9810dc54(<8 x double> %a, <8 x double> %b) {
    278 ;
    279 ; AVX512F-LABEL: shuffle_v8f64_9810dc54:
    280 ; AVX512F:       # BB#0:
    281 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
    282 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    283 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    284 ; AVX512F-NEXT:    retq
    285 ;
    286 ; AVX512F-32-LABEL: shuffle_v8f64_9810dc54:
    287 ; AVX512F-32:       # BB#0:
    288 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
    289 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    290 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    291 ; AVX512F-32-NEXT:    retl
    292   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
    293   ret <8 x double> %shuffle
    294 }
    295 
    296 define <8 x double> @shuffle_v8f64_08194c5d(<8 x double> %a, <8 x double> %b) {
    297 ;
    298 ; AVX512F-LABEL: shuffle_v8f64_08194c5d:
    299 ; AVX512F:       # BB#0:
    300 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
    301 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    302 ; AVX512F-NEXT:    retq
    303 ;
    304 ; AVX512F-32-LABEL: shuffle_v8f64_08194c5d:
    305 ; AVX512F-32:       # BB#0:
    306 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
    307 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    308 ; AVX512F-32-NEXT:    retl
    309   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
    310   ret <8 x double> %shuffle
    311 }
    312 
    313 define <8 x double> @shuffle_v8f64_2a3b6e7f(<8 x double> %a, <8 x double> %b) {
    314 ;
    315 ; AVX512F-LABEL: shuffle_v8f64_2a3b6e7f:
    316 ; AVX512F:       # BB#0:
    317 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
    318 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    319 ; AVX512F-NEXT:    retq
    320 ;
    321 ; AVX512F-32-LABEL: shuffle_v8f64_2a3b6e7f:
    322 ; AVX512F-32:       # BB#0:
    323 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
    324 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    325 ; AVX512F-32-NEXT:    retl
    326   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
    327   ret <8 x double> %shuffle
    328 }
    329 
    330 define <8 x double> @shuffle_v8f64_08192a3b(<8 x double> %a, <8 x double> %b) {
    331 ;
    332 ; AVX512F-LABEL: shuffle_v8f64_08192a3b:
    333 ; AVX512F:       # BB#0:
    334 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
    335 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    336 ; AVX512F-NEXT:    retq
    337 ;
    338 ; AVX512F-32-LABEL: shuffle_v8f64_08192a3b:
    339 ; AVX512F-32:       # BB#0:
    340 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
    341 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    342 ; AVX512F-32-NEXT:    retl
    343   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
    344   ret <8 x double> %shuffle
    345 }
    346 
    347 define <8 x double> @shuffle_v8f64_08991abb(<8 x double> %a, <8 x double> %b) {
    348 ;
    349 ; AVX512F-LABEL: shuffle_v8f64_08991abb:
    350 ; AVX512F:       # BB#0:
    351 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
    352 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    353 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    354 ; AVX512F-NEXT:    retq
    355 ;
    356 ; AVX512F-32-LABEL: shuffle_v8f64_08991abb:
    357 ; AVX512F-32:       # BB#0:
    358 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
    359 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    360 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    361 ; AVX512F-32-NEXT:    retl
    362   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
    363   ret <8 x double> %shuffle
    364 }
    365 
    366 define <8 x double> @shuffle_v8f64_091b2d3f(<8 x double> %a, <8 x double> %b) {
    367 ;
    368 ; AVX512F-LABEL: shuffle_v8f64_091b2d3f:
    369 ; AVX512F:       # BB#0:
    370 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
    371 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    372 ; AVX512F-NEXT:    retq
    373 ;
    374 ; AVX512F-32-LABEL: shuffle_v8f64_091b2d3f:
    375 ; AVX512F-32:       # BB#0:
    376 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
    377 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    378 ; AVX512F-32-NEXT:    retl
    379   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
    380   ret <8 x double> %shuffle
    381 }
    382 
    383 define <8 x double> @shuffle_v8f64_09ab1def(<8 x double> %a, <8 x double> %b) {
    384 ;
    385 ; AVX512F-LABEL: shuffle_v8f64_09ab1def:
    386 ; AVX512F:       # BB#0:
    387 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
    388 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    389 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    390 ; AVX512F-NEXT:    retq
    391 ;
    392 ; AVX512F-32-LABEL: shuffle_v8f64_09ab1def:
    393 ; AVX512F-32:       # BB#0:
    394 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
    395 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    396 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    397 ; AVX512F-32-NEXT:    retl
    398   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
    399   ret <8 x double> %shuffle
    400 }
    401 
    402 define <8 x double> @shuffle_v8f64_00014445(<8 x double> %a, <8 x double> %b) {
    403 ;
    404 ; AVX512F-LABEL: shuffle_v8f64_00014445:
    405 ; AVX512F:       # BB#0:
    406 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
    407 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    408 ; AVX512F-NEXT:    retq
    409 ;
    410 ; AVX512F-32-LABEL: shuffle_v8f64_00014445:
    411 ; AVX512F-32:       # BB#0:
    412 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
    413 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    414 ; AVX512F-32-NEXT:    retl
    415   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
    416   ret <8 x double> %shuffle
    417 }
    418 
    419 define <8 x double> @shuffle_v8f64_00204464(<8 x double> %a, <8 x double> %b) {
    420 ;
    421 ; AVX512F-LABEL: shuffle_v8f64_00204464:
    422 ; AVX512F:       # BB#0:
    423 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
    424 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    425 ; AVX512F-NEXT:    retq
    426 ;
    427 ; AVX512F-32-LABEL: shuffle_v8f64_00204464:
    428 ; AVX512F-32:       # BB#0:
    429 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
    430 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    431 ; AVX512F-32-NEXT:    retl
    432   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
    433   ret <8 x double> %shuffle
    434 }
    435 
    436 define <8 x double> @shuffle_v8f64_03004744(<8 x double> %a, <8 x double> %b) {
    437 ;
    438 ; AVX512F-LABEL: shuffle_v8f64_03004744:
    439 ; AVX512F:       # BB#0:
    440 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
    441 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    442 ; AVX512F-NEXT:    retq
    443 ;
    444 ; AVX512F-32-LABEL: shuffle_v8f64_03004744:
    445 ; AVX512F-32:       # BB#0:
    446 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
    447 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    448 ; AVX512F-32-NEXT:    retl
    449   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
    450   ret <8 x double> %shuffle
    451 }
    452 
    453 define <8 x double> @shuffle_v8f64_10005444(<8 x double> %a, <8 x double> %b) {
    454 ;
    455 ; AVX512F-LABEL: shuffle_v8f64_10005444:
    456 ; AVX512F:       # BB#0:
    457 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
    458 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    459 ; AVX512F-NEXT:    retq
    460 ;
    461 ; AVX512F-32-LABEL: shuffle_v8f64_10005444:
    462 ; AVX512F-32:       # BB#0:
    463 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
    464 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    465 ; AVX512F-32-NEXT:    retl
    466   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
    467   ret <8 x double> %shuffle
    468 }
    469 
    470 define <8 x double> @shuffle_v8f64_22006644(<8 x double> %a, <8 x double> %b) {
    471 ;
    472 ; AVX512F-LABEL: shuffle_v8f64_22006644:
    473 ; AVX512F:       # BB#0:
    474 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
    475 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    476 ; AVX512F-NEXT:    retq
    477 ;
    478 ; AVX512F-32-LABEL: shuffle_v8f64_22006644:
    479 ; AVX512F-32:       # BB#0:
    480 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
    481 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    482 ; AVX512F-32-NEXT:    retl
    483   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
    484   ret <8 x double> %shuffle
    485 }
    486 
    487 define <8 x double> @shuffle_v8f64_33307774(<8 x double> %a, <8 x double> %b) {
    488 ;
    489 ; AVX512F-LABEL: shuffle_v8f64_33307774:
    490 ; AVX512F:       # BB#0:
    491 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
    492 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    493 ; AVX512F-NEXT:    retq
    494 ;
    495 ; AVX512F-32-LABEL: shuffle_v8f64_33307774:
    496 ; AVX512F-32:       # BB#0:
    497 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
    498 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    499 ; AVX512F-32-NEXT:    retl
    500   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
    501   ret <8 x double> %shuffle
    502 }
    503 
    504 define <8 x double> @shuffle_v8f64_32107654(<8 x double> %a, <8 x double> %b) {
    505 ;
    506 ; AVX512F-LABEL: shuffle_v8f64_32107654:
    507 ; AVX512F:       # BB#0:
    508 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
    509 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    510 ; AVX512F-NEXT:    retq
    511 ;
    512 ; AVX512F-32-LABEL: shuffle_v8f64_32107654:
    513 ; AVX512F-32:       # BB#0:
    514 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
    515 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    516 ; AVX512F-32-NEXT:    retl
    517   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
    518   ret <8 x double> %shuffle
    519 }
    520 
    521 define <8 x double> @shuffle_v8f64_00234467(<8 x double> %a, <8 x double> %b) {
    522 ;
    523 ; AVX512F-LABEL: shuffle_v8f64_00234467:
    524 ; AVX512F:       # BB#0:
    525 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
    526 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    527 ; AVX512F-NEXT:    retq
    528 ;
    529 ; AVX512F-32-LABEL: shuffle_v8f64_00234467:
    530 ; AVX512F-32:       # BB#0:
    531 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
    532 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    533 ; AVX512F-32-NEXT:    retl
    534   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
    535   ret <8 x double> %shuffle
    536 }
    537 
    538 define <8 x double> @shuffle_v8f64_00224466(<8 x double> %a, <8 x double> %b) {
    539 ;
    540 ; AVX512F-LABEL: shuffle_v8f64_00224466:
    541 ; AVX512F:       # BB#0:
    542 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
    543 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    544 ; AVX512F-NEXT:    retq
    545 ;
    546 ; AVX512F-32-LABEL: shuffle_v8f64_00224466:
    547 ; AVX512F-32:       # BB#0:
    548 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
    549 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    550 ; AVX512F-32-NEXT:    retl
    551   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
    552   ret <8 x double> %shuffle
    553 }
    554 
    555 define <8 x double> @shuffle_v8f64_10325476(<8 x double> %a, <8 x double> %b) {
    556 ;
    557 ; AVX512F-LABEL: shuffle_v8f64_10325476:
    558 ; AVX512F:       # BB#0:
    559 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
    560 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    561 ; AVX512F-NEXT:    retq
    562 ;
    563 ; AVX512F-32-LABEL: shuffle_v8f64_10325476:
    564 ; AVX512F-32:       # BB#0:
    565 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
    566 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    567 ; AVX512F-32-NEXT:    retl
    568   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
    569   ret <8 x double> %shuffle
    570 }
    571 
    572 define <8 x double> @shuffle_v8f64_11335577(<8 x double> %a, <8 x double> %b) {
    573 ;
    574 ; AVX512F-LABEL: shuffle_v8f64_11335577:
    575 ; AVX512F:       # BB#0:
    576 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
    577 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    578 ; AVX512F-NEXT:    retq
    579 ;
    580 ; AVX512F-32-LABEL: shuffle_v8f64_11335577:
    581 ; AVX512F-32:       # BB#0:
    582 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
    583 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    584 ; AVX512F-32-NEXT:    retl
    585   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
    586   ret <8 x double> %shuffle
    587 }
    588 
    589 define <8 x double> @shuffle_v8f64_10235467(<8 x double> %a, <8 x double> %b) {
    590 ;
    591 ; AVX512F-LABEL: shuffle_v8f64_10235467:
    592 ; AVX512F:       # BB#0:
    593 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
    594 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    595 ; AVX512F-NEXT:    retq
    596 ;
    597 ; AVX512F-32-LABEL: shuffle_v8f64_10235467:
    598 ; AVX512F-32:       # BB#0:
    599 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
    600 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    601 ; AVX512F-32-NEXT:    retl
    602   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    603   ret <8 x double> %shuffle
    604 }
    605 
    606 define <8 x double> @shuffle_v8f64_10225466(<8 x double> %a, <8 x double> %b) {
    607 ;
    608 ; AVX512F-LABEL: shuffle_v8f64_10225466:
    609 ; AVX512F:       # BB#0:
    610 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
    611 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    612 ; AVX512F-NEXT:    retq
    613 ;
    614 ; AVX512F-32-LABEL: shuffle_v8f64_10225466:
    615 ; AVX512F-32:       # BB#0:
    616 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
    617 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    618 ; AVX512F-32-NEXT:    retl
    619   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
    620   ret <8 x double> %shuffle
    621 }
    622 
    623 define <8 x double> @shuffle_v8f64_00015444(<8 x double> %a, <8 x double> %b) {
    624 ;
    625 ; AVX512F-LABEL: shuffle_v8f64_00015444:
    626 ; AVX512F:       # BB#0:
    627 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
    628 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    629 ; AVX512F-NEXT:    retq
    630 ;
    631 ; AVX512F-32-LABEL: shuffle_v8f64_00015444:
    632 ; AVX512F-32:       # BB#0:
    633 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
    634 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    635 ; AVX512F-32-NEXT:    retl
    636   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
    637   ret <8 x double> %shuffle
    638 }
    639 
    640 define <8 x double> @shuffle_v8f64_00204644(<8 x double> %a, <8 x double> %b) {
    641 ;
    642 ; AVX512F-LABEL: shuffle_v8f64_00204644:
    643 ; AVX512F:       # BB#0:
    644 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
    645 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    646 ; AVX512F-NEXT:    retq
    647 ;
    648 ; AVX512F-32-LABEL: shuffle_v8f64_00204644:
    649 ; AVX512F-32:       # BB#0:
    650 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
    651 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    652 ; AVX512F-32-NEXT:    retl
    653   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
    654   ret <8 x double> %shuffle
    655 }
    656 
    657 define <8 x double> @shuffle_v8f64_03004474(<8 x double> %a, <8 x double> %b) {
    658 ;
    659 ; AVX512F-LABEL: shuffle_v8f64_03004474:
    660 ; AVX512F:       # BB#0:
    661 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
    662 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    663 ; AVX512F-NEXT:    retq
    664 ;
    665 ; AVX512F-32-LABEL: shuffle_v8f64_03004474:
    666 ; AVX512F-32:       # BB#0:
    667 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
    668 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    669 ; AVX512F-32-NEXT:    retl
    670   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
    671   ret <8 x double> %shuffle
    672 }
    673 
    674 define <8 x double> @shuffle_v8f64_10004444(<8 x double> %a, <8 x double> %b) {
    675 ;
    676 ; AVX512F-LABEL: shuffle_v8f64_10004444:
    677 ; AVX512F:       # BB#0:
    678 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
    679 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    680 ; AVX512F-NEXT:    retq
    681 ;
    682 ; AVX512F-32-LABEL: shuffle_v8f64_10004444:
    683 ; AVX512F-32:       # BB#0:
    684 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
    685 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    686 ; AVX512F-32-NEXT:    retl
    687   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
    688   ret <8 x double> %shuffle
    689 }
    690 
    691 define <8 x double> @shuffle_v8f64_22006446(<8 x double> %a, <8 x double> %b) {
    692 ;
    693 ; AVX512F-LABEL: shuffle_v8f64_22006446:
    694 ; AVX512F:       # BB#0:
    695 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
    696 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    697 ; AVX512F-NEXT:    retq
    698 ;
    699 ; AVX512F-32-LABEL: shuffle_v8f64_22006446:
    700 ; AVX512F-32:       # BB#0:
    701 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
    702 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    703 ; AVX512F-32-NEXT:    retl
    704   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
    705   ret <8 x double> %shuffle
    706 }
    707 
    708 define <8 x double> @shuffle_v8f64_33307474(<8 x double> %a, <8 x double> %b) {
    709 ;
    710 ; AVX512F-LABEL: shuffle_v8f64_33307474:
    711 ; AVX512F:       # BB#0:
    712 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
    713 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    714 ; AVX512F-NEXT:    retq
    715 ;
    716 ; AVX512F-32-LABEL: shuffle_v8f64_33307474:
    717 ; AVX512F-32:       # BB#0:
    718 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
    719 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    720 ; AVX512F-32-NEXT:    retl
    721   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
    722   ret <8 x double> %shuffle
    723 }
    724 
    725 define <8 x double> @shuffle_v8f64_32104567(<8 x double> %a, <8 x double> %b) {
    726 ;
    727 ; AVX512F-LABEL: shuffle_v8f64_32104567:
    728 ; AVX512F:       # BB#0:
    729 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
    730 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    731 ; AVX512F-NEXT:    retq
    732 ;
    733 ; AVX512F-32-LABEL: shuffle_v8f64_32104567:
    734 ; AVX512F-32:       # BB#0:
    735 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
    736 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    737 ; AVX512F-32-NEXT:    retl
    738   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
    739   ret <8 x double> %shuffle
    740 }
    741 
    742 define <8 x double> @shuffle_v8f64_00236744(<8 x double> %a, <8 x double> %b) {
    743 ;
    744 ; AVX512F-LABEL: shuffle_v8f64_00236744:
    745 ; AVX512F:       # BB#0:
    746 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
    747 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    748 ; AVX512F-NEXT:    retq
    749 ;
    750 ; AVX512F-32-LABEL: shuffle_v8f64_00236744:
    751 ; AVX512F-32:       # BB#0:
    752 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
    753 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    754 ; AVX512F-32-NEXT:    retl
    755   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
    756   ret <8 x double> %shuffle
    757 }
    758 
    759 define <8 x double> @shuffle_v8f64_00226644(<8 x double> %a, <8 x double> %b) {
    760 ;
    761 ; AVX512F-LABEL: shuffle_v8f64_00226644:
    762 ; AVX512F:       # BB#0:
    763 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
    764 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    765 ; AVX512F-NEXT:    retq
    766 ;
    767 ; AVX512F-32-LABEL: shuffle_v8f64_00226644:
    768 ; AVX512F-32:       # BB#0:
    769 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
    770 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    771 ; AVX512F-32-NEXT:    retl
    772   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
    773   ret <8 x double> %shuffle
    774 }
    775 
    776 define <8 x double> @shuffle_v8f64_10324567(<8 x double> %a, <8 x double> %b) {
    777 ;
    778 ; AVX512F-LABEL: shuffle_v8f64_10324567:
    779 ; AVX512F:       # BB#0:
    780 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
    781 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    782 ; AVX512F-NEXT:    retq
    783 ;
    784 ; AVX512F-32-LABEL: shuffle_v8f64_10324567:
    785 ; AVX512F-32:       # BB#0:
    786 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
    787 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    788 ; AVX512F-32-NEXT:    retl
    789   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
    790   ret <8 x double> %shuffle
    791 }
    792 
    793 define <8 x double> @shuffle_v8f64_11334567(<8 x double> %a, <8 x double> %b) {
    794 ;
    795 ; AVX512F-LABEL: shuffle_v8f64_11334567:
    796 ; AVX512F:       # BB#0:
    797 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
    798 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    799 ; AVX512F-NEXT:    retq
    800 ;
    801 ; AVX512F-32-LABEL: shuffle_v8f64_11334567:
    802 ; AVX512F-32:       # BB#0:
    803 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
    804 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    805 ; AVX512F-32-NEXT:    retl
    806   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
    807   ret <8 x double> %shuffle
    808 }
    809 
    810 define <8 x double> @shuffle_v8f64_01235467(<8 x double> %a, <8 x double> %b) {
    811 ;
    812 ; AVX512F-LABEL: shuffle_v8f64_01235467:
    813 ; AVX512F:       # BB#0:
    814 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
    815 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    816 ; AVX512F-NEXT:    retq
    817 ;
    818 ; AVX512F-32-LABEL: shuffle_v8f64_01235467:
    819 ; AVX512F-32:       # BB#0:
    820 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
    821 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    822 ; AVX512F-32-NEXT:    retl
    823   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
    824   ret <8 x double> %shuffle
    825 }
    826 
    827 define <8 x double> @shuffle_v8f64_01235466(<8 x double> %a, <8 x double> %b) {
    828 ;
    829 ; AVX512F-LABEL: shuffle_v8f64_01235466:
    830 ; AVX512F:       # BB#0:
    831 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
    832 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    833 ; AVX512F-NEXT:    retq
    834 ;
    835 ; AVX512F-32-LABEL: shuffle_v8f64_01235466:
    836 ; AVX512F-32:       # BB#0:
    837 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
    838 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    839 ; AVX512F-32-NEXT:    retl
    840   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
    841   ret <8 x double> %shuffle
    842 }
    843 
    844 define <8 x double> @shuffle_v8f64_002u6u44(<8 x double> %a, <8 x double> %b) {
    845 ;
    846 ; AVX512F-LABEL: shuffle_v8f64_002u6u44:
    847 ; AVX512F:       # BB#0:
    848 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
    849 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    850 ; AVX512F-NEXT:    retq
    851 ;
    852 ; AVX512F-32-LABEL: shuffle_v8f64_002u6u44:
    853 ; AVX512F-32:       # BB#0:
    854 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
    855 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    856 ; AVX512F-32-NEXT:    retl
    857   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
    858   ret <8 x double> %shuffle
    859 }
    860 
    861 define <8 x double> @shuffle_v8f64_00uu66uu(<8 x double> %a, <8 x double> %b) {
    862 ;
    863 ; AVX512F-LABEL: shuffle_v8f64_00uu66uu:
    864 ; AVX512F:       # BB#0:
    865 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
    866 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    867 ; AVX512F-NEXT:    retq
    868 ;
    869 ; AVX512F-32-LABEL: shuffle_v8f64_00uu66uu:
    870 ; AVX512F-32:       # BB#0:
    871 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
    872 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    873 ; AVX512F-32-NEXT:    retl
    874   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
    875   ret <8 x double> %shuffle
    876 }
    877 
    878 define <8 x double> @shuffle_v8f64_103245uu(<8 x double> %a, <8 x double> %b) {
    879 ;
    880 ; AVX512F-LABEL: shuffle_v8f64_103245uu:
    881 ; AVX512F:       # BB#0:
    882 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
    883 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    884 ; AVX512F-NEXT:    retq
    885 ;
    886 ; AVX512F-32-LABEL: shuffle_v8f64_103245uu:
    887 ; AVX512F-32:       # BB#0:
    888 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
    889 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    890 ; AVX512F-32-NEXT:    retl
    891   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
    892   ret <8 x double> %shuffle
    893 }
    894 
    895 define <8 x double> @shuffle_v8f64_1133uu67(<8 x double> %a, <8 x double> %b) {
    896 ;
    897 ; AVX512F-LABEL: shuffle_v8f64_1133uu67:
    898 ; AVX512F:       # BB#0:
    899 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
    900 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    901 ; AVX512F-NEXT:    retq
    902 ;
    903 ; AVX512F-32-LABEL: shuffle_v8f64_1133uu67:
    904 ; AVX512F-32:       # BB#0:
    905 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
    906 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    907 ; AVX512F-32-NEXT:    retl
    908   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
    909   ret <8 x double> %shuffle
    910 }
    911 
    912 define <8 x double> @shuffle_v8f64_0uu354uu(<8 x double> %a, <8 x double> %b) {
    913 ;
    914 ; AVX512F-LABEL: shuffle_v8f64_0uu354uu:
    915 ; AVX512F:       # BB#0:
    916 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
    917 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    918 ; AVX512F-NEXT:    retq
    919 ;
    920 ; AVX512F-32-LABEL: shuffle_v8f64_0uu354uu:
    921 ; AVX512F-32:       # BB#0:
    922 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
    923 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    924 ; AVX512F-32-NEXT:    retl
    925   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
    926   ret <8 x double> %shuffle
    927 }
    928 
    929 define <8 x double> @shuffle_v8f64_uuu3uu66(<8 x double> %a, <8 x double> %b) {
    930 ;
    931 ; AVX512F-LABEL: shuffle_v8f64_uuu3uu66:
    932 ; AVX512F:       # BB#0:
    933 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
    934 ; AVX512F-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    935 ; AVX512F-NEXT:    retq
    936 ;
    937 ; AVX512F-32-LABEL: shuffle_v8f64_uuu3uu66:
    938 ; AVX512F-32:       # BB#0:
    939 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
    940 ; AVX512F-32-NEXT:    vpermpd %zmm0, %zmm1, %zmm0
    941 ; AVX512F-32-NEXT:    retl
    942   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
    943   ret <8 x double> %shuffle
    944 }
    945 
    946 define <8 x double> @shuffle_v8f64_c348cda0(<8 x double> %a, <8 x double> %b) {
    947 ;
    948 ; AVX512F-LABEL: shuffle_v8f64_c348cda0:
    949 ; AVX512F:       # BB#0:
    950 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,11,12,0,4,5,2,8]
    951 ; AVX512F-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    952 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
    953 ; AVX512F-NEXT:    retq
    954 ;
    955 ; AVX512F-32-LABEL: shuffle_v8f64_c348cda0:
    956 ; AVX512F-32:       # BB#0:
    957 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [4,0,11,0,12,0,0,0,4,0,5,0,2,0,8,0]
    958 ; AVX512F-32-NEXT:    vpermt2pd %zmm0, %zmm2, %zmm1
    959 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
    960 ; AVX512F-32-NEXT:    retl
    961   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 12, i32 3, i32 4, i32 8, i32 12, i32 13, i32 10, i32 0>
    962   ret <8 x double> %shuffle
    963 }
    964 
    965 define <8 x double> @shuffle_v8f64_f511235a(<8 x double> %a, <8 x double> %b) {
    966 ;
    967 ; AVX512F-LABEL: shuffle_v8f64_f511235a:
    968 ; AVX512F:       # BB#0:
    969 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,5,1,1,2,3,5,10]
    970 ; AVX512F-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    971 ; AVX512F-NEXT:    retq
    972 ;
    973 ; AVX512F-32-LABEL: shuffle_v8f64_f511235a:
    974 ; AVX512F-32:       # BB#0:
    975 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [15,0,5,0,1,0,1,0,2,0,3,0,5,0,10,0]
    976 ; AVX512F-32-NEXT:    vpermt2pd %zmm1, %zmm2, %zmm0
    977 ; AVX512F-32-NEXT:    retl
    978   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32> <i32 15, i32 5, i32 1, i32 1, i32 2, i32 3, i32 5, i32 10>
    979   ret <8 x double> %shuffle
    980 }
    981 
    982 define <8 x i64> @shuffle_v8i64_00000000(<8 x i64> %a, <8 x i64> %b) {
    983 ;
    984 ; AVX512F-LABEL: shuffle_v8i64_00000000:
    985 ; AVX512F:       # BB#0:
    986 ; AVX512F-NEXT:    vpbroadcastq %xmm0, %zmm0
    987 ; AVX512F-NEXT:    retq
    988 ;
    989 ; AVX512F-32-LABEL: shuffle_v8i64_00000000:
    990 ; AVX512F-32:       # BB#0:
    991 ; AVX512F-32-NEXT:    vpbroadcastq %xmm0, %zmm0
    992 ; AVX512F-32-NEXT:    retl
    993   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
    994   ret <8 x i64> %shuffle
    995 }
    996 
    997 define <8 x i64> @shuffle_v8i64_00000010(<8 x i64> %a, <8 x i64> %b) {
    998 ;
    999 ; AVX512F-LABEL: shuffle_v8i64_00000010:
   1000 ; AVX512F:       # BB#0:
   1001 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0]
   1002 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1003 ; AVX512F-NEXT:    retq
   1004 ;
   1005 ; AVX512F-32-LABEL: shuffle_v8i64_00000010:
   1006 ; AVX512F-32:       # BB#0:
   1007 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0]
   1008 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1009 ; AVX512F-32-NEXT:    retl
   1010   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 1, i32 0>
   1011   ret <8 x i64> %shuffle
   1012 }
   1013 
   1014 define <8 x i64> @shuffle_v8i64_00000200(<8 x i64> %a, <8 x i64> %b) {
   1015 ;
   1016 ; AVX512F-LABEL: shuffle_v8i64_00000200:
   1017 ; AVX512F:       # BB#0:
   1018 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,2,0,0]
   1019 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1020 ; AVX512F-NEXT:    retq
   1021 ;
   1022 ; AVX512F-32-LABEL: shuffle_v8i64_00000200:
   1023 ; AVX512F-32:       # BB#0:
   1024 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,0,0,2,0,0,0,0,0]
   1025 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1026 ; AVX512F-32-NEXT:    retl
   1027   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 2, i32 0, i32 0>
   1028   ret <8 x i64> %shuffle
   1029 }
   1030 
   1031 define <8 x i64> @shuffle_v8i64_00003000(<8 x i64> %a, <8 x i64> %b) {
   1032 ;
   1033 ; AVX512F-LABEL: shuffle_v8i64_00003000:
   1034 ; AVX512F:       # BB#0:
   1035 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,3,0,0,0]
   1036 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1037 ; AVX512F-NEXT:    retq
   1038 ;
   1039 ; AVX512F-32-LABEL: shuffle_v8i64_00003000:
   1040 ; AVX512F-32:       # BB#0:
   1041 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,3,0,0,0,0,0,0,0]
   1042 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1043 ; AVX512F-32-NEXT:    retl
   1044   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 3, i32 0, i32 0, i32 0>
   1045   ret <8 x i64> %shuffle
   1046 }
   1047 
   1048 define <8 x i64> @shuffle_v8i64_00040000(<8 x i64> %a, <8 x i64> %b) {
   1049 ;
   1050 ; AVX512F-LABEL: shuffle_v8i64_00040000:
   1051 ; AVX512F:       # BB#0:
   1052 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,4,0,0,0,0]
   1053 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1054 ; AVX512F-NEXT:    retq
   1055 ;
   1056 ; AVX512F-32-LABEL: shuffle_v8i64_00040000:
   1057 ; AVX512F-32:       # BB#0:
   1058 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,4,0,0,0,0,0,0,0,0,0]
   1059 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1060 ; AVX512F-32-NEXT:    retl
   1061   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 4, i32 0, i32 0, i32 0, i32 0>
   1062   ret <8 x i64> %shuffle
   1063 }
   1064 
   1065 define <8 x i64> @shuffle_v8i64_00500000(<8 x i64> %a, <8 x i64> %b) {
   1066 ;
   1067 ; AVX512F-LABEL: shuffle_v8i64_00500000:
   1068 ; AVX512F:       # BB#0:
   1069 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,5,0,0,0,0,0]
   1070 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1071 ; AVX512F-NEXT:    retq
   1072 ;
   1073 ; AVX512F-32-LABEL: shuffle_v8i64_00500000:
   1074 ; AVX512F-32:       # BB#0:
   1075 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,5,0,0,0,0,0,0,0,0,0,0,0]
   1076 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1077 ; AVX512F-32-NEXT:    retl
   1078   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 5, i32 0, i32 0, i32 0, i32 0, i32 0>
   1079   ret <8 x i64> %shuffle
   1080 }
   1081 
   1082 define <8 x i64> @shuffle_v8i64_06000000(<8 x i64> %a, <8 x i64> %b) {
   1083 ;
   1084 ; AVX512F-LABEL: shuffle_v8i64_06000000:
   1085 ; AVX512F:       # BB#0:
   1086 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,6,0,0,0,0,0,0]
   1087 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1088 ; AVX512F-NEXT:    retq
   1089 ;
   1090 ; AVX512F-32-LABEL: shuffle_v8i64_06000000:
   1091 ; AVX512F-32:       # BB#0:
   1092 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,6,0,0,0,0,0,0,0,0,0,0,0,0,0]
   1093 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1094 ; AVX512F-32-NEXT:    retl
   1095   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 6, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1096   ret <8 x i64> %shuffle
   1097 }
   1098 
   1099 define <8 x i64> @shuffle_v8i64_70000000(<8 x i64> %a, <8 x i64> %b) {
   1100 ;
   1101 ; AVX512F-LABEL: shuffle_v8i64_70000000:
   1102 ; AVX512F:       # BB#0:
   1103 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
   1104 ; AVX512F-NEXT:    movl $7, %eax
   1105 ; AVX512F-NEXT:    vpinsrq $0, %rax, %xmm1, %xmm2
   1106 ; AVX512F-NEXT:    vinserti32x4 $0, %xmm2, %zmm1, %zmm1
   1107 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1108 ; AVX512F-NEXT:    retq
   1109 ;
   1110 ; AVX512F-32-LABEL: shuffle_v8i64_70000000:
   1111 ; AVX512F-32:       # BB#0:
   1112 ; AVX512F-32-NEXT:    vpxor %xmm1, %xmm1, %xmm1
   1113 ; AVX512F-32-NEXT:    movl $7, %eax
   1114 ; AVX512F-32-NEXT:    vpinsrd $0, %eax, %xmm1, %xmm1
   1115 ; AVX512F-32-NEXT:    vpxord %zmm2, %zmm2, %zmm2
   1116 ; AVX512F-32-NEXT:    vinserti32x4 $0, %xmm1, %zmm2, %zmm1
   1117 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1118 ; AVX512F-32-NEXT:    retl
   1119   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 7, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
   1120   ret <8 x i64> %shuffle
   1121 }
   1122 
   1123 define <8 x i64> @shuffle_v8i64_01014545(<8 x i64> %a, <8 x i64> %b) {
   1124 ; AVX512F-LABEL: shuffle_v8i64_01014545:
   1125 ; AVX512F:       # BB#0:
   1126 ; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
   1127 ; AVX512F-NEXT:    retq
   1128 ;
   1129 ; AVX512F-32-LABEL: shuffle_v8i64_01014545:
   1130 ; AVX512F-32:       # BB#0:
   1131 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,0,1,4,5,4,5]
   1132 ; AVX512F-32-NEXT:    retl
   1133 
   1134   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 4, i32 5>
   1135   ret <8 x i64> %shuffle
   1136 }
   1137 
   1138 define <8 x i64> @shuffle_v8i64_00112233(<8 x i64> %a, <8 x i64> %b) {
   1139 ;
   1140 ; AVX512F-LABEL: shuffle_v8i64_00112233:
   1141 ; AVX512F:       # BB#0:
   1142 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,1,2,2,3,3]
   1143 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1144 ; AVX512F-NEXT:    retq
   1145 ;
   1146 ; AVX512F-32-LABEL: shuffle_v8i64_00112233:
   1147 ; AVX512F-32:       # BB#0:
   1148 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,0,1,0,2,0,2,0,3,0,3,0]
   1149 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1150 ; AVX512F-32-NEXT:    retl
   1151   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 1, i32 1, i32 2, i32 2, i32 3, i32 3>
   1152   ret <8 x i64> %shuffle
   1153 }
   1154 
   1155 define <8 x i64> @shuffle_v8i64_00001111(<8 x i64> %a, <8 x i64> %b) {
   1156 ;
   1157 ; AVX512F-LABEL: shuffle_v8i64_00001111:
   1158 ; AVX512F:       # BB#0:
   1159 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,1,1,1,1]
   1160 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1161 ; AVX512F-NEXT:    retq
   1162 ;
   1163 ; AVX512F-32-LABEL: shuffle_v8i64_00001111:
   1164 ; AVX512F-32:       # BB#0:
   1165 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,0,0,1,0,1,0,1,0,1,0]
   1166 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1167 ; AVX512F-32-NEXT:    retl
   1168   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 1, i32 1, i32 1, i32 1>
   1169   ret <8 x i64> %shuffle
   1170 }
   1171 
   1172 define <8 x i64> @shuffle_v8i64_81a3c5e7(<8 x i64> %a, <8 x i64> %b) {
   1173 ;
   1174 ; AVX512F-LABEL: shuffle_v8i64_81a3c5e7:
   1175 ; AVX512F:       # BB#0:
   1176 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,2,11,4,13,6,15]
   1177 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1178 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1179 ; AVX512F-NEXT:    retq
   1180 ;
   1181 ; AVX512F-32-LABEL: shuffle_v8i64_81a3c5e7:
   1182 ; AVX512F-32:       # BB#0:
   1183 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,2,0,11,0,4,0,13,0,6,0,15,0]
   1184 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1185 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1186 ; AVX512F-32-NEXT:    retl
   1187   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 1, i32 10, i32 3, i32 12, i32 5, i32 14, i32 7>
   1188   ret <8 x i64> %shuffle
   1189 }
   1190 
   1191 define <8 x i64> @shuffle_v8i64_08080808(<8 x i64> %a, <8 x i64> %b) {
   1192 ;
   1193 ; AVX512F-LABEL: shuffle_v8i64_08080808:
   1194 ; AVX512F:       # BB#0:
   1195 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,0,8,0,8]
   1196 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1197 ; AVX512F-NEXT:    retq
   1198 ;
   1199 ; AVX512F-32-LABEL: shuffle_v8i64_08080808:
   1200 ; AVX512F-32:       # BB#0:
   1201 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,0,0,8,0,0,0,8,0]
   1202 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1203 ; AVX512F-32-NEXT:    retl
   1204   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 0, i32 8, i32 0, i32 8>
   1205   ret <8 x i64> %shuffle
   1206 }
   1207 
   1208 define <8 x i64> @shuffle_v8i64_08084c4c(<8 x i64> %a, <8 x i64> %b) {
   1209 ;
   1210 ; AVX512F-LABEL: shuffle_v8i64_08084c4c:
   1211 ; AVX512F:       # BB#0:
   1212 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,0,8,4,12,4,12]
   1213 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1214 ; AVX512F-NEXT:    retq
   1215 ;
   1216 ; AVX512F-32-LABEL: shuffle_v8i64_08084c4c:
   1217 ; AVX512F-32:       # BB#0:
   1218 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,0,0,8,0,4,0,12,0,4,0,12,0]
   1219 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1220 ; AVX512F-32-NEXT:    retl
   1221   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 0, i32 8, i32 4, i32 12, i32 4, i32 12>
   1222   ret <8 x i64> %shuffle
   1223 }
   1224 
   1225 define <8 x i64> @shuffle_v8i64_8823cc67(<8 x i64> %a, <8 x i64> %b) {
   1226 ;
   1227 ; AVX512F-LABEL: shuffle_v8i64_8823cc67:
   1228 ; AVX512F:       # BB#0:
   1229 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,10,11,4,4,14,15]
   1230 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1231 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1232 ; AVX512F-NEXT:    retq
   1233 ;
   1234 ; AVX512F-32-LABEL: shuffle_v8i64_8823cc67:
   1235 ; AVX512F-32:       # BB#0:
   1236 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,0,0,10,0,11,0,4,0,4,0,14,0,15,0]
   1237 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1238 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1239 ; AVX512F-32-NEXT:    retl
   1240   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 8, i32 8, i32 2, i32 3, i32 12, i32 12, i32 6, i32 7>
   1241   ret <8 x i64> %shuffle
   1242 }
   1243 
   1244 define <8 x i64> @shuffle_v8i64_9832dc76(<8 x i64> %a, <8 x i64> %b) {
   1245 ;
   1246 ; AVX512F-LABEL: shuffle_v8i64_9832dc76:
   1247 ; AVX512F:       # BB#0:
   1248 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,11,10,5,4,15,14]
   1249 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1250 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1251 ; AVX512F-NEXT:    retq
   1252 ;
   1253 ; AVX512F-32-LABEL: shuffle_v8i64_9832dc76:
   1254 ; AVX512F-32:       # BB#0:
   1255 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,11,0,10,0,5,0,4,0,15,0,14,0]
   1256 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1257 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1258 ; AVX512F-32-NEXT:    retl
   1259   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 3, i32 2, i32 13, i32 12, i32 7, i32 6>
   1260   ret <8 x i64> %shuffle
   1261 }
   1262 
   1263 define <8 x i64> @shuffle_v8i64_9810dc54(<8 x i64> %a, <8 x i64> %b) {
   1264 ;
   1265 ; AVX512F-LABEL: shuffle_v8i64_9810dc54:
   1266 ; AVX512F:       # BB#0:
   1267 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,9,8,5,4,13,12]
   1268 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1269 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1270 ; AVX512F-NEXT:    retq
   1271 ;
   1272 ; AVX512F-32-LABEL: shuffle_v8i64_9810dc54:
   1273 ; AVX512F-32:       # BB#0:
   1274 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [1,0,0,0,9,0,8,0,5,0,4,0,13,0,12,0]
   1275 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1276 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1277 ; AVX512F-32-NEXT:    retl
   1278   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 9, i32 8, i32 1, i32 0, i32 13, i32 12, i32 5, i32 4>
   1279   ret <8 x i64> %shuffle
   1280 }
   1281 
   1282 define <8 x i64> @shuffle_v8i64_08194c5d(<8 x i64> %a, <8 x i64> %b) {
   1283 ;
   1284 ; AVX512F-LABEL: shuffle_v8i64_08194c5d:
   1285 ; AVX512F:       # BB#0:
   1286 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,4,12,5,13]
   1287 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1288 ; AVX512F-NEXT:    retq
   1289 ;
   1290 ; AVX512F-32-LABEL: shuffle_v8i64_08194c5d:
   1291 ; AVX512F-32:       # BB#0:
   1292 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,4,0,12,0,5,0,13,0]
   1293 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1294 ; AVX512F-32-NEXT:    retl
   1295   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
   1296   ret <8 x i64> %shuffle
   1297 }
   1298 
   1299 define <8 x i64> @shuffle_v8i64_2a3b6e7f(<8 x i64> %a, <8 x i64> %b) {
   1300 ;
   1301 ; AVX512F-LABEL: shuffle_v8i64_2a3b6e7f:
   1302 ; AVX512F:       # BB#0:
   1303 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,10,3,11,6,14,7,15]
   1304 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1305 ; AVX512F-NEXT:    retq
   1306 ;
   1307 ; AVX512F-32-LABEL: shuffle_v8i64_2a3b6e7f:
   1308 ; AVX512F-32:       # BB#0:
   1309 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [2,0,10,0,3,0,11,0,6,0,14,0,7,0,15,0]
   1310 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1311 ; AVX512F-32-NEXT:    retl
   1312   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
   1313   ret <8 x i64> %shuffle
   1314 }
   1315 
   1316 define <8 x i64> @shuffle_v8i64_08192a3b(<8 x i64> %a, <8 x i64> %b) {
   1317 ;
   1318 ; AVX512F-LABEL: shuffle_v8i64_08192a3b:
   1319 ; AVX512F:       # BB#0:
   1320 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,8,1,9,2,10,3,11]
   1321 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1322 ; AVX512F-NEXT:    retq
   1323 ;
   1324 ; AVX512F-32-LABEL: shuffle_v8i64_08192a3b:
   1325 ; AVX512F-32:       # BB#0:
   1326 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,8,0,1,0,9,0,2,0,10,0,3,0,11,0]
   1327 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1328 ; AVX512F-32-NEXT:    retl
   1329   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   1330   ret <8 x i64> %shuffle
   1331 }
   1332 
   1333 define <8 x i64> @shuffle_v8i64_08991abb(<8 x i64> %a, <8 x i64> %b) {
   1334 ;
   1335 ; AVX512F-LABEL: shuffle_v8i64_08991abb:
   1336 ; AVX512F:       # BB#0:
   1337 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,1,9,2,3,3]
   1338 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1339 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1340 ; AVX512F-NEXT:    retq
   1341 ;
   1342 ; AVX512F-32-LABEL: shuffle_v8i64_08991abb:
   1343 ; AVX512F-32:       # BB#0:
   1344 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,0,0,1,0,1,0,9,0,2,0,3,0,3,0]
   1345 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1346 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1347 ; AVX512F-32-NEXT:    retl
   1348   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 8, i32 9, i32 9, i32 1, i32 10, i32 11, i32 11>
   1349   ret <8 x i64> %shuffle
   1350 }
   1351 
   1352 define <8 x i64> @shuffle_v8i64_091b2d3f(<8 x i64> %a, <8 x i64> %b) {
   1353 ;
   1354 ; AVX512F-LABEL: shuffle_v8i64_091b2d3f:
   1355 ; AVX512F:       # BB#0:
   1356 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,9,1,11,2,13,3,15]
   1357 ; AVX512F-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1358 ; AVX512F-NEXT:    retq
   1359 ;
   1360 ; AVX512F-32-LABEL: shuffle_v8i64_091b2d3f:
   1361 ; AVX512F-32:       # BB#0:
   1362 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [0,0,9,0,1,0,11,0,2,0,13,0,3,0,15,0]
   1363 ; AVX512F-32-NEXT:    vpermt2q %zmm1, %zmm2, %zmm0
   1364 ; AVX512F-32-NEXT:    retl
   1365   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   1366   ret <8 x i64> %shuffle
   1367 }
   1368 
   1369 define <8 x i64> @shuffle_v8i64_09ab1def(<8 x i64> %a, <8 x i64> %b) {
   1370 ;
   1371 ; AVX512F-LABEL: shuffle_v8i64_09ab1def:
   1372 ; AVX512F:       # BB#0:
   1373 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,1,2,3,9,5,6,7]
   1374 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1375 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1376 ; AVX512F-NEXT:    retq
   1377 ;
   1378 ; AVX512F-32-LABEL: shuffle_v8i64_09ab1def:
   1379 ; AVX512F-32:       # BB#0:
   1380 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [8,0,1,0,2,0,3,0,9,0,5,0,6,0,7,0]
   1381 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1382 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1383 ; AVX512F-32-NEXT:    retl
   1384   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 9, i32 10, i32 11, i32 1, i32 13, i32 14, i32 15>
   1385   ret <8 x i64> %shuffle
   1386 }
   1387 
   1388 define <8 x i64> @shuffle_v8i64_00014445(<8 x i64> %a, <8 x i64> %b) {
   1389 ;
   1390 ; AVX512F-LABEL: shuffle_v8i64_00014445:
   1391 ; AVX512F:       # BB#0:
   1392 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,4,4,4,5]
   1393 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1394 ; AVX512F-NEXT:    retq
   1395 ;
   1396 ; AVX512F-32-LABEL: shuffle_v8i64_00014445:
   1397 ; AVX512F-32:       # BB#0:
   1398 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,4,0,4,0,4,0,5,0]
   1399 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1400 ; AVX512F-32-NEXT:    retl
   1401   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 4, i32 4, i32 4, i32 5>
   1402   ret <8 x i64> %shuffle
   1403 }
   1404 
   1405 define <8 x i64> @shuffle_v8i64_00204464(<8 x i64> %a, <8 x i64> %b) {
   1406 ;
   1407 ; AVX512F-LABEL: shuffle_v8i64_00204464:
   1408 ; AVX512F:       # BB#0:
   1409 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,4,6,4]
   1410 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1411 ; AVX512F-NEXT:    retq
   1412 ;
   1413 ; AVX512F-32-LABEL: shuffle_v8i64_00204464:
   1414 ; AVX512F-32:       # BB#0:
   1415 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,4,0,6,0,4,0]
   1416 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1417 ; AVX512F-32-NEXT:    retl
   1418   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 4, i32 6, i32 4>
   1419   ret <8 x i64> %shuffle
   1420 }
   1421 
   1422 define <8 x i64> @shuffle_v8i64_03004744(<8 x i64> %a, <8 x i64> %b) {
   1423 ;
   1424 ; AVX512F-LABEL: shuffle_v8i64_03004744:
   1425 ; AVX512F:       # BB#0:
   1426 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,7,4,4]
   1427 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1428 ; AVX512F-NEXT:    retq
   1429 ;
   1430 ; AVX512F-32-LABEL: shuffle_v8i64_03004744:
   1431 ; AVX512F-32:       # BB#0:
   1432 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,7,0,4,0,4,0]
   1433 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1434 ; AVX512F-32-NEXT:    retl
   1435   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 7, i32 4, i32 4>
   1436   ret <8 x i64> %shuffle
   1437 }
   1438 
   1439 define <8 x i64> @shuffle_v8i64_10005444(<8 x i64> %a, <8 x i64> %b) {
   1440 ;
   1441 ; AVX512F-LABEL: shuffle_v8i64_10005444:
   1442 ; AVX512F:       # BB#0:
   1443 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,5,4,4,4]
   1444 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1445 ; AVX512F-NEXT:    retq
   1446 ;
   1447 ; AVX512F-32-LABEL: shuffle_v8i64_10005444:
   1448 ; AVX512F-32:       # BB#0:
   1449 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,5,0,4,0,4,0,4,0]
   1450 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1451 ; AVX512F-32-NEXT:    retl
   1452   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 5, i32 4, i32 4, i32 4>
   1453   ret <8 x i64> %shuffle
   1454 }
   1455 
   1456 define <8 x i64> @shuffle_v8i64_22006644(<8 x i64> %a, <8 x i64> %b) {
   1457 ;
   1458 ; AVX512F-LABEL: shuffle_v8i64_22006644:
   1459 ; AVX512F:       # BB#0:
   1460 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,6,4,4]
   1461 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1462 ; AVX512F-NEXT:    retq
   1463 ;
   1464 ; AVX512F-32-LABEL: shuffle_v8i64_22006644:
   1465 ; AVX512F-32:       # BB#0:
   1466 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,6,0,4,0,4,0]
   1467 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1468 ; AVX512F-32-NEXT:    retl
   1469   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 6, i32 4, i32 4>
   1470   ret <8 x i64> %shuffle
   1471 }
   1472 
   1473 define <8 x i64> @shuffle_v8i64_33307774(<8 x i64> %a, <8 x i64> %b) {
   1474 ;
   1475 ; AVX512F-LABEL: shuffle_v8i64_33307774:
   1476 ; AVX512F:       # BB#0:
   1477 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,7,7,4]
   1478 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1479 ; AVX512F-NEXT:    retq
   1480 ;
   1481 ; AVX512F-32-LABEL: shuffle_v8i64_33307774:
   1482 ; AVX512F-32:       # BB#0:
   1483 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,7,0,7,0,4,0]
   1484 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1485 ; AVX512F-32-NEXT:    retl
   1486   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 7, i32 7, i32 4>
   1487   ret <8 x i64> %shuffle
   1488 }
   1489 
   1490 define <8 x i64> @shuffle_v8i64_32107654(<8 x i64> %a, <8 x i64> %b) {
   1491 ;
   1492 ; AVX512F-LABEL: shuffle_v8i64_32107654:
   1493 ; AVX512F:       # BB#0:
   1494 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,7,6,5,4]
   1495 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1496 ; AVX512F-NEXT:    retq
   1497 ;
   1498 ; AVX512F-32-LABEL: shuffle_v8i64_32107654:
   1499 ; AVX512F-32:       # BB#0:
   1500 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,7,0,6,0,5,0,4,0]
   1501 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1502 ; AVX512F-32-NEXT:    retl
   1503   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
   1504   ret <8 x i64> %shuffle
   1505 }
   1506 
   1507 define <8 x i64> @shuffle_v8i64_00234467(<8 x i64> %a, <8 x i64> %b) {
   1508 ;
   1509 ; AVX512F-LABEL: shuffle_v8i64_00234467:
   1510 ; AVX512F:       # BB#0:
   1511 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,4,4,6,7]
   1512 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1513 ; AVX512F-NEXT:    retq
   1514 ;
   1515 ; AVX512F-32-LABEL: shuffle_v8i64_00234467:
   1516 ; AVX512F-32:       # BB#0:
   1517 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,4,0,4,0,6,0,7,0]
   1518 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1519 ; AVX512F-32-NEXT:    retl
   1520   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 4, i32 4, i32 6, i32 7>
   1521   ret <8 x i64> %shuffle
   1522 }
   1523 
   1524 define <8 x i64> @shuffle_v8i64_00224466(<8 x i64> %a, <8 x i64> %b) {
   1525 ;
   1526 ; AVX512F-LABEL: shuffle_v8i64_00224466:
   1527 ; AVX512F:       # BB#0:
   1528 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,4,4,6,6]
   1529 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1530 ; AVX512F-NEXT:    retq
   1531 ;
   1532 ; AVX512F-32-LABEL: shuffle_v8i64_00224466:
   1533 ; AVX512F-32:       # BB#0:
   1534 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,4,0,4,0,6,0,6,0]
   1535 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1536 ; AVX512F-32-NEXT:    retl
   1537   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
   1538   ret <8 x i64> %shuffle
   1539 }
   1540 
   1541 define <8 x i64> @shuffle_v8i64_10325476(<8 x i64> %a, <8 x i64> %b) {
   1542 ;
   1543 ; AVX512F-LABEL: shuffle_v8i64_10325476:
   1544 ; AVX512F:       # BB#0:
   1545 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,5,4,7,6]
   1546 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1547 ; AVX512F-NEXT:    retq
   1548 ;
   1549 ; AVX512F-32-LABEL: shuffle_v8i64_10325476:
   1550 ; AVX512F-32:       # BB#0:
   1551 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,5,0,4,0,7,0,6,0]
   1552 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1553 ; AVX512F-32-NEXT:    retl
   1554   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
   1555   ret <8 x i64> %shuffle
   1556 }
   1557 
   1558 define <8 x i64> @shuffle_v8i64_11335577(<8 x i64> %a, <8 x i64> %b) {
   1559 ;
   1560 ; AVX512F-LABEL: shuffle_v8i64_11335577:
   1561 ; AVX512F:       # BB#0:
   1562 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,5,5,7,7]
   1563 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1564 ; AVX512F-NEXT:    retq
   1565 ;
   1566 ; AVX512F-32-LABEL: shuffle_v8i64_11335577:
   1567 ; AVX512F-32:       # BB#0:
   1568 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,5,0,5,0,7,0,7,0]
   1569 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1570 ; AVX512F-32-NEXT:    retl
   1571   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
   1572   ret <8 x i64> %shuffle
   1573 }
   1574 
   1575 define <8 x i64> @shuffle_v8i64_10235467(<8 x i64> %a, <8 x i64> %b) {
   1576 ;
   1577 ; AVX512F-LABEL: shuffle_v8i64_10235467:
   1578 ; AVX512F:       # BB#0:
   1579 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,3,5,4,6,7]
   1580 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1581 ; AVX512F-NEXT:    retq
   1582 ;
   1583 ; AVX512F-32-LABEL: shuffle_v8i64_10235467:
   1584 ; AVX512F-32:       # BB#0:
   1585 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,3,0,5,0,4,0,6,0,7,0]
   1586 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1587 ; AVX512F-32-NEXT:    retl
   1588   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1589   ret <8 x i64> %shuffle
   1590 }
   1591 
   1592 define <8 x i64> @shuffle_v8i64_10225466(<8 x i64> %a, <8 x i64> %b) {
   1593 ;
   1594 ; AVX512F-LABEL: shuffle_v8i64_10225466:
   1595 ; AVX512F:       # BB#0:
   1596 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,2,2,5,4,6,6]
   1597 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1598 ; AVX512F-NEXT:    retq
   1599 ;
   1600 ; AVX512F-32-LABEL: shuffle_v8i64_10225466:
   1601 ; AVX512F-32:       # BB#0:
   1602 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,2,0,2,0,5,0,4,0,6,0,6,0]
   1603 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1604 ; AVX512F-32-NEXT:    retl
   1605   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 2, i32 2, i32 5, i32 4, i32 6, i32 6>
   1606   ret <8 x i64> %shuffle
   1607 }
   1608 
   1609 define <8 x i64> @shuffle_v8i64_00015444(<8 x i64> %a, <8 x i64> %b) {
   1610 ;
   1611 ; AVX512F-LABEL: shuffle_v8i64_00015444:
   1612 ; AVX512F:       # BB#0:
   1613 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,1,5,4,4,4]
   1614 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1615 ; AVX512F-NEXT:    retq
   1616 ;
   1617 ; AVX512F-32-LABEL: shuffle_v8i64_00015444:
   1618 ; AVX512F-32:       # BB#0:
   1619 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,0,0,1,0,5,0,4,0,4,0,4,0]
   1620 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1621 ; AVX512F-32-NEXT:    retl
   1622   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 0, i32 1, i32 5, i32 4, i32 4, i32 4>
   1623   ret <8 x i64> %shuffle
   1624 }
   1625 
   1626 define <8 x i64> @shuffle_v8i64_00204644(<8 x i64> %a, <8 x i64> %b) {
   1627 ;
   1628 ; AVX512F-LABEL: shuffle_v8i64_00204644:
   1629 ; AVX512F:       # BB#0:
   1630 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,0,4,6,4,4]
   1631 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1632 ; AVX512F-NEXT:    retq
   1633 ;
   1634 ; AVX512F-32-LABEL: shuffle_v8i64_00204644:
   1635 ; AVX512F-32:       # BB#0:
   1636 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,0,0,4,0,6,0,4,0,4,0]
   1637 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1638 ; AVX512F-32-NEXT:    retl
   1639   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 0, i32 4, i32 6, i32 4, i32 4>
   1640   ret <8 x i64> %shuffle
   1641 }
   1642 
   1643 define <8 x i64> @shuffle_v8i64_03004474(<8 x i64> %a, <8 x i64> %b) {
   1644 ;
   1645 ; AVX512F-LABEL: shuffle_v8i64_03004474:
   1646 ; AVX512F:       # BB#0:
   1647 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,3,0,0,4,4,7,4]
   1648 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1649 ; AVX512F-NEXT:    retq
   1650 ;
   1651 ; AVX512F-32-LABEL: shuffle_v8i64_03004474:
   1652 ; AVX512F-32:       # BB#0:
   1653 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,3,0,0,0,0,0,4,0,4,0,7,0,4,0]
   1654 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1655 ; AVX512F-32-NEXT:    retl
   1656   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 4, i32 7, i32 4>
   1657   ret <8 x i64> %shuffle
   1658 }
   1659 
   1660 define <8 x i64> @shuffle_v8i64_10004444(<8 x i64> %a, <8 x i64> %b) {
   1661 ;
   1662 ; AVX512F-LABEL: shuffle_v8i64_10004444:
   1663 ; AVX512F:       # BB#0:
   1664 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,4,4,4,4]
   1665 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1666 ; AVX512F-NEXT:    retq
   1667 ;
   1668 ; AVX512F-32-LABEL: shuffle_v8i64_10004444:
   1669 ; AVX512F-32:       # BB#0:
   1670 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,0,0,0,0,4,0,4,0,4,0,4,0]
   1671 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1672 ; AVX512F-32-NEXT:    retl
   1673   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
   1674   ret <8 x i64> %shuffle
   1675 }
   1676 
   1677 define <8 x i64> @shuffle_v8i64_22006446(<8 x i64> %a, <8 x i64> %b) {
   1678 ;
   1679 ; AVX512F-LABEL: shuffle_v8i64_22006446:
   1680 ; AVX512F:       # BB#0:
   1681 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,2,0,0,6,4,4,6]
   1682 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1683 ; AVX512F-NEXT:    retq
   1684 ;
   1685 ; AVX512F-32-LABEL: shuffle_v8i64_22006446:
   1686 ; AVX512F-32:       # BB#0:
   1687 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [2,0,2,0,0,0,0,0,6,0,4,0,4,0,6,0]
   1688 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1689 ; AVX512F-32-NEXT:    retl
   1690   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 2, i32 2, i32 0, i32 0, i32 6, i32 4, i32 4, i32 6>
   1691   ret <8 x i64> %shuffle
   1692 }
   1693 
   1694 define <8 x i64> @shuffle_v8i64_33307474(<8 x i64> %a, <8 x i64> %b) {
   1695 ;
   1696 ; AVX512F-LABEL: shuffle_v8i64_33307474:
   1697 ; AVX512F:       # BB#0:
   1698 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,3,3,0,7,4,7,4]
   1699 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1700 ; AVX512F-NEXT:    retq
   1701 ;
   1702 ; AVX512F-32-LABEL: shuffle_v8i64_33307474:
   1703 ; AVX512F-32:       # BB#0:
   1704 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,3,0,3,0,0,0,7,0,4,0,7,0,4,0]
   1705 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1706 ; AVX512F-32-NEXT:    retl
   1707   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 3, i32 3, i32 0, i32 7, i32 4, i32 7, i32 4>
   1708   ret <8 x i64> %shuffle
   1709 }
   1710 
   1711 define <8 x i64> @shuffle_v8i64_32104567(<8 x i64> %a, <8 x i64> %b) {
   1712 ;
   1713 ; AVX512F-LABEL: shuffle_v8i64_32104567:
   1714 ; AVX512F:       # BB#0:
   1715 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,2,1,0,4,5,6,7]
   1716 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1717 ; AVX512F-NEXT:    retq
   1718 ;
   1719 ; AVX512F-32-LABEL: shuffle_v8i64_32104567:
   1720 ; AVX512F-32:       # BB#0:
   1721 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [3,0,2,0,1,0,0,0,4,0,5,0,6,0,7,0]
   1722 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1723 ; AVX512F-32-NEXT:    retl
   1724   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7>
   1725   ret <8 x i64> %shuffle
   1726 }
   1727 
   1728 define <8 x i64> @shuffle_v8i64_00236744(<8 x i64> %a, <8 x i64> %b) {
   1729 ;
   1730 ; AVX512F-LABEL: shuffle_v8i64_00236744:
   1731 ; AVX512F:       # BB#0:
   1732 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,3,6,7,4,4]
   1733 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1734 ; AVX512F-NEXT:    retq
   1735 ;
   1736 ; AVX512F-32-LABEL: shuffle_v8i64_00236744:
   1737 ; AVX512F-32:       # BB#0:
   1738 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,3,0,6,0,7,0,4,0,4,0]
   1739 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1740 ; AVX512F-32-NEXT:    retl
   1741   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 6, i32 7, i32 4, i32 4>
   1742   ret <8 x i64> %shuffle
   1743 }
   1744 
   1745 define <8 x i64> @shuffle_v8i64_00226644(<8 x i64> %a, <8 x i64> %b) {
   1746 ;
   1747 ; AVX512F-LABEL: shuffle_v8i64_00226644:
   1748 ; AVX512F:       # BB#0:
   1749 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,2,2,6,6,4,4]
   1750 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1751 ; AVX512F-NEXT:    retq
   1752 ;
   1753 ; AVX512F-32-LABEL: shuffle_v8i64_00226644:
   1754 ; AVX512F-32:       # BB#0:
   1755 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,0,0,2,0,2,0,6,0,6,0,4,0,4,0]
   1756 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1757 ; AVX512F-32-NEXT:    retl
   1758   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 6, i32 6, i32 4, i32 4>
   1759   ret <8 x i64> %shuffle
   1760 }
   1761 
   1762 define <8 x i64> @shuffle_v8i64_10324567(<8 x i64> %a, <8 x i64> %b) {
   1763 ;
   1764 ; AVX512F-LABEL: shuffle_v8i64_10324567:
   1765 ; AVX512F:       # BB#0:
   1766 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,3,2,4,5,6,7]
   1767 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1768 ; AVX512F-NEXT:    retq
   1769 ;
   1770 ; AVX512F-32-LABEL: shuffle_v8i64_10324567:
   1771 ; AVX512F-32:       # BB#0:
   1772 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,0,0,3,0,2,0,4,0,5,0,6,0,7,0]
   1773 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1774 ; AVX512F-32-NEXT:    retl
   1775   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
   1776   ret <8 x i64> %shuffle
   1777 }
   1778 
   1779 define <8 x i64> @shuffle_v8i64_11334567(<8 x i64> %a, <8 x i64> %b) {
   1780 ;
   1781 ; AVX512F-LABEL: shuffle_v8i64_11334567:
   1782 ; AVX512F:       # BB#0:
   1783 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,1,3,3,4,5,6,7]
   1784 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1785 ; AVX512F-NEXT:    retq
   1786 ;
   1787 ; AVX512F-32-LABEL: shuffle_v8i64_11334567:
   1788 ; AVX512F-32:       # BB#0:
   1789 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [1,0,1,0,3,0,3,0,4,0,5,0,6,0,7,0]
   1790 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1791 ; AVX512F-32-NEXT:    retl
   1792   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7>
   1793   ret <8 x i64> %shuffle
   1794 }
   1795 
   1796 define <8 x i64> @shuffle_v8i64_01235467(<8 x i64> %a, <8 x i64> %b) {
   1797 ;
   1798 ; AVX512F-LABEL: shuffle_v8i64_01235467:
   1799 ; AVX512F:       # BB#0:
   1800 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,7]
   1801 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1802 ; AVX512F-NEXT:    retq
   1803 ;
   1804 ; AVX512F-32-LABEL: shuffle_v8i64_01235467:
   1805 ; AVX512F-32:       # BB#0:
   1806 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,7,0]
   1807 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1808 ; AVX512F-32-NEXT:    retl
   1809   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 7>
   1810   ret <8 x i64> %shuffle
   1811 }
   1812 
   1813 define <8 x i64> @shuffle_v8i64_01235466(<8 x i64> %a, <8 x i64> %b) {
   1814 ;
   1815 ; AVX512F-LABEL: shuffle_v8i64_01235466:
   1816 ; AVX512F:       # BB#0:
   1817 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,1,2,3,5,4,6,6]
   1818 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1819 ; AVX512F-NEXT:    retq
   1820 ;
   1821 ; AVX512F-32-LABEL: shuffle_v8i64_01235466:
   1822 ; AVX512F-32:       # BB#0:
   1823 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = [0,0,1,0,2,0,3,0,5,0,4,0,6,0,6,0]
   1824 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1825 ; AVX512F-32-NEXT:    retl
   1826   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 6, i32 6>
   1827   ret <8 x i64> %shuffle
   1828 }
   1829 
   1830 define <8 x i64> @shuffle_v8i64_002u6u44(<8 x i64> %a, <8 x i64> %b) {
   1831 ;
   1832 ; AVX512F-LABEL: shuffle_v8i64_002u6u44:
   1833 ; AVX512F:       # BB#0:
   1834 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,2,u,6,u,4,4>
   1835 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1836 ; AVX512F-NEXT:    retq
   1837 ;
   1838 ; AVX512F-32-LABEL: shuffle_v8i64_002u6u44:
   1839 ; AVX512F-32:       # BB#0:
   1840 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,2,0,u,u,6,0,u,u,4,0,4,0>
   1841 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1842 ; AVX512F-32-NEXT:    retl
   1843   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 2, i32 undef, i32 6, i32 undef, i32 4, i32 4>
   1844   ret <8 x i64> %shuffle
   1845 }
   1846 
   1847 define <8 x i64> @shuffle_v8i64_00uu66uu(<8 x i64> %a, <8 x i64> %b) {
   1848 ;
   1849 ; AVX512F-LABEL: shuffle_v8i64_00uu66uu:
   1850 ; AVX512F:       # BB#0:
   1851 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,6,6,u,u>
   1852 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1853 ; AVX512F-NEXT:    retq
   1854 ;
   1855 ; AVX512F-32-LABEL: shuffle_v8i64_00uu66uu:
   1856 ; AVX512F-32:       # BB#0:
   1857 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,0,0,u,u,u,u,6,0,6,0,u,u,u,u>
   1858 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1859 ; AVX512F-32-NEXT:    retl
   1860   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 0, i32 undef, i32 undef, i32 6, i32 6, i32 undef, i32 undef>
   1861   ret <8 x i64> %shuffle
   1862 }
   1863 
   1864 define <8 x i64> @shuffle_v8i64_103245uu(<8 x i64> %a, <8 x i64> %b) {
   1865 ;
   1866 ; AVX512F-LABEL: shuffle_v8i64_103245uu:
   1867 ; AVX512F:       # BB#0:
   1868 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,3,2,4,5,u,u>
   1869 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1870 ; AVX512F-NEXT:    retq
   1871 ;
   1872 ; AVX512F-32-LABEL: shuffle_v8i64_103245uu:
   1873 ; AVX512F-32:       # BB#0:
   1874 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,0,0,3,0,2,0,4,0,5,0,u,u,u,u>
   1875 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1876 ; AVX512F-32-NEXT:    retl
   1877   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 undef, i32 undef>
   1878   ret <8 x i64> %shuffle
   1879 }
   1880 
   1881 define <8 x i64> @shuffle_v8i64_1133uu67(<8 x i64> %a, <8 x i64> %b) {
   1882 ;
   1883 ; AVX512F-LABEL: shuffle_v8i64_1133uu67:
   1884 ; AVX512F:       # BB#0:
   1885 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,1,3,3,u,u,6,7>
   1886 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1887 ; AVX512F-NEXT:    retq
   1888 ;
   1889 ; AVX512F-32-LABEL: shuffle_v8i64_1133uu67:
   1890 ; AVX512F-32:       # BB#0:
   1891 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <1,0,1,0,3,0,3,0,u,u,u,u,6,0,7,0>
   1892 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1893 ; AVX512F-32-NEXT:    retl
   1894   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 undef, i32 undef, i32 6, i32 7>
   1895   ret <8 x i64> %shuffle
   1896 }
   1897 
   1898 define <8 x i64> @shuffle_v8i64_0uu354uu(<8 x i64> %a, <8 x i64> %b) {
   1899 ;
   1900 ; AVX512F-LABEL: shuffle_v8i64_0uu354uu:
   1901 ; AVX512F:       # BB#0:
   1902 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,u,u,3,5,4,u,u>
   1903 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1904 ; AVX512F-NEXT:    retq
   1905 ;
   1906 ; AVX512F-32-LABEL: shuffle_v8i64_0uu354uu:
   1907 ; AVX512F-32:       # BB#0:
   1908 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <0,0,u,u,u,u,3,0,5,0,4,0,u,u,u,u>
   1909 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1910 ; AVX512F-32-NEXT:    retl
   1911   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 0, i32 undef, i32 undef, i32 3, i32 5, i32 4, i32 undef, i32 undef>
   1912   ret <8 x i64> %shuffle
   1913 }
   1914 
   1915 define <8 x i64> @shuffle_v8i64_uuu3uu66(<8 x i64> %a, <8 x i64> %b) {
   1916 ;
   1917 ; AVX512F-LABEL: shuffle_v8i64_uuu3uu66:
   1918 ; AVX512F:       # BB#0:
   1919 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,3,u,u,6,6>
   1920 ; AVX512F-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1921 ; AVX512F-NEXT:    retq
   1922 ;
   1923 ; AVX512F-32-LABEL: shuffle_v8i64_uuu3uu66:
   1924 ; AVX512F-32:       # BB#0:
   1925 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm1 = <u,u,u,u,u,u,3,0,u,u,u,u,6,0,6,0>
   1926 ; AVX512F-32-NEXT:    vpermq %zmm0, %zmm1, %zmm0
   1927 ; AVX512F-32-NEXT:    retl
   1928   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 undef, i32 undef, i32 undef, i32 3, i32 undef, i32 undef, i32 6, i32 6>
   1929   ret <8 x i64> %shuffle
   1930 }
   1931 
   1932 define <8 x i64> @shuffle_v8i64_6caa87e5(<8 x i64> %a, <8 x i64> %b) {
   1933 ;
   1934 ; AVX512F-LABEL: shuffle_v8i64_6caa87e5:
   1935 ; AVX512F:       # BB#0:
   1936 ; AVX512F-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,4,2,2,0,15,6,13]
   1937 ; AVX512F-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1938 ; AVX512F-NEXT:    vmovaps %zmm1, %zmm0
   1939 ; AVX512F-NEXT:    retq
   1940 ;
   1941 ; AVX512F-32-LABEL: shuffle_v8i64_6caa87e5:
   1942 ; AVX512F-32:       # BB#0:
   1943 ; AVX512F-32-NEXT:    vmovdqa64 {{.*#+}} zmm2 = [14,0,4,0,2,0,2,0,0,0,15,0,6,0,13,0]
   1944 ; AVX512F-32-NEXT:    vpermt2q %zmm0, %zmm2, %zmm1
   1945 ; AVX512F-32-NEXT:    vmovaps %zmm1, %zmm0
   1946 ; AVX512F-32-NEXT:    retl
   1947   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32> <i32 6, i32 12, i32 10, i32 10, i32 8, i32 7, i32 14, i32 5>
   1948   ret <8 x i64> %shuffle
   1949 }
   1950 
   1951 define <8 x double> @shuffle_v8f64_082a4c6e(<8 x double> %a, <8 x double> %b) {
   1952 ;
   1953 ; AVX512F-LABEL: shuffle_v8f64_082a4c6e:
   1954 ; AVX512F:       # BB#0:
   1955 ; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1956 ; AVX512F-NEXT:    retq
   1957 ;
   1958 ; AVX512F-32-LABEL: shuffle_v8f64_082a4c6e:
   1959 ; AVX512F-32:       # BB#0:
   1960 ; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1961 ; AVX512F-32-NEXT:    retl
   1962   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   1963   ret <8 x double> %shuffle
   1964 }
   1965 
   1966 define <8 x double> @shuffle_v8f64_0z2z4z6z(<8 x double> %a, <8 x double> %b) {
   1967 ;
   1968 ; AVX512F-LABEL: shuffle_v8f64_0z2z4z6z:
   1969 ; AVX512F:       # BB#0:
   1970 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
   1971 ; AVX512F-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1972 ; AVX512F-NEXT:    retq
   1973 ;
   1974 ; AVX512F-32-LABEL: shuffle_v8f64_0z2z4z6z:
   1975 ; AVX512F-32:       # BB#0:
   1976 ; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
   1977 ; AVX512F-32-NEXT:    vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1978 ; AVX512F-32-NEXT:    retl
   1979   %shuffle = shufflevector <8 x double> %a, <8 x double> zeroinitializer, <8 x i32><i32 0, i32 8, i32 2, i32 8, i32 4, i32 8, i32 6, i32 8>
   1980   ret <8 x double> %shuffle
   1981 }
   1982 
   1983 define <8 x i64> @shuffle_v8i64_082a4c6e(<8 x i64> %a, <8 x i64> %b) {
   1984 ;
   1985 ; AVX512F-LABEL: shuffle_v8i64_082a4c6e:
   1986 ; AVX512F:       # BB#0:
   1987 ; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1988 ; AVX512F-NEXT:    retq
   1989 ;
   1990 ; AVX512F-32-LABEL: shuffle_v8i64_082a4c6e:
   1991 ; AVX512F-32:       # BB#0:
   1992 ; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   1993 ; AVX512F-32-NEXT:    retl
   1994   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
   1995   ret <8 x i64> %shuffle
   1996 }
   1997 
   1998 define <8 x i64> @shuffle_v8i64_z8zazcze(<8 x i64> %a, <8 x i64> %b) {
   1999 ;
   2000 ; AVX512F-LABEL: shuffle_v8i64_z8zazcze:
   2001 ; AVX512F:       # BB#0:
   2002 ; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
   2003 ; AVX512F-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   2004 ; AVX512F-NEXT:    retq
   2005 ;
   2006 ; AVX512F-32-LABEL: shuffle_v8i64_z8zazcze:
   2007 ; AVX512F-32:       # BB#0:
   2008 ; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
   2009 ; AVX512F-32-NEXT:    vpunpcklqdq {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
   2010 ; AVX512F-32-NEXT:    retl
   2011   %shuffle = shufflevector <8 x i64> zeroinitializer, <8 x i64> %b, <8 x i32><i32 7, i32 8, i32 5, i32 10, i32 3, i32 12, i32 1, i32 14>
   2012   ret <8 x i64> %shuffle
   2013 }
   2014 
   2015 define <8 x double> @shuffle_v8f64_193b5d7f(<8 x double> %a, <8 x double> %b) {
   2016 ;
   2017 ; AVX512F-LABEL: shuffle_v8f64_193b5d7f:
   2018 ; AVX512F:       # BB#0:
   2019 ; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2020 ; AVX512F-NEXT:    retq
   2021 ;
   2022 ; AVX512F-32-LABEL: shuffle_v8f64_193b5d7f:
   2023 ; AVX512F-32:       # BB#0:
   2024 ; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2025 ; AVX512F-32-NEXT:    retl
   2026   %shuffle = shufflevector <8 x double> %a, <8 x double> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2027   ret <8 x double> %shuffle
   2028 }
   2029 
   2030 define <8 x double> @shuffle_v8f64_z9zbzdzf(<8 x double> %a, <8 x double> %b) {
   2031 ;
   2032 ; AVX512F-LABEL: shuffle_v8f64_z9zbzdzf:
   2033 ; AVX512F:       # BB#0:
   2034 ; AVX512F-NEXT:    vpxord %zmm0, %zmm0, %zmm0
   2035 ; AVX512F-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2036 ; AVX512F-NEXT:    retq
   2037 ;
   2038 ; AVX512F-32-LABEL: shuffle_v8f64_z9zbzdzf:
   2039 ; AVX512F-32:       # BB#0:
   2040 ; AVX512F-32-NEXT:    vpxord %zmm0, %zmm0, %zmm0
   2041 ; AVX512F-32-NEXT:    vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2042 ; AVX512F-32-NEXT:    retl
   2043   %shuffle = shufflevector <8 x double> zeroinitializer, <8 x double> %b, <8 x i32><i32 0, i32 9, i32 0, i32 11, i32 0, i32 13, i32 0, i32 15>
   2044   ret <8 x double> %shuffle
   2045 }
   2046 
   2047 define <8 x i64> @shuffle_v8i64_193b5d7f(<8 x i64> %a, <8 x i64> %b) {
   2048 ;
   2049 ; AVX512F-LABEL: shuffle_v8i64_193b5d7f:
   2050 ; AVX512F:       # BB#0:
   2051 ; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2052 ; AVX512F-NEXT:    retq
   2053 ;
   2054 ; AVX512F-32-LABEL: shuffle_v8i64_193b5d7f:
   2055 ; AVX512F-32:       # BB#0:
   2056 ; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2057 ; AVX512F-32-NEXT:    retl
   2058   %shuffle = shufflevector <8 x i64> %a, <8 x i64> %b, <8 x i32><i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
   2059   ret <8 x i64> %shuffle
   2060 }
   2061 
   2062 define <8 x i64> @shuffle_v8i64_1z3z5z7z(<8 x i64> %a, <8 x i64> %b) {
   2063 ;
   2064 ; AVX512F-LABEL: shuffle_v8i64_1z3z5z7z:
   2065 ; AVX512F:       # BB#0:
   2066 ; AVX512F-NEXT:    vpxord %zmm1, %zmm1, %zmm1
   2067 ; AVX512F-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2068 ; AVX512F-NEXT:    retq
   2069 ;
   2070 ; AVX512F-32-LABEL: shuffle_v8i64_1z3z5z7z:
   2071 ; AVX512F-32:       # BB#0:
   2072 ; AVX512F-32-NEXT:    vpxord %zmm1, %zmm1, %zmm1
   2073 ; AVX512F-32-NEXT:    vpunpckhqdq {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
   2074 ; AVX512F-32-NEXT:    retl
   2075   %shuffle = shufflevector <8 x i64> %a, <8 x i64> zeroinitializer, <8 x i32><i32 1, i32 8, i32 3, i32 15, i32 5, i32 8, i32 7, i32 15>
   2076   ret <8 x i64> %shuffle
   2077 }
   2078 
   2079 define <8 x double> @test_vshuff64x2_512(<8 x double> %x, <8 x double> %x1) nounwind {
   2080 ; AVX512F-LABEL: test_vshuff64x2_512:
   2081 ; AVX512F:       # BB#0:
   2082 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2083 ; AVX512F-NEXT:    retq
   2084 ;
   2085 ; AVX512F-32-LABEL: test_vshuff64x2_512:
   2086 ; AVX512F-32:       # BB#0:
   2087 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2088 ; AVX512F-32-NEXT:    retl
   2089   %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2090   ret <8 x double> %res
   2091 }
   2092 
   2093 define <8 x double> @test_vshuff64x2_512_maskz(<8 x double> %x, <8 x double> %x1, <8 x i1> %mask) nounwind {
   2094 ; AVX512F-LABEL: test_vshuff64x2_512_maskz:
   2095 ; AVX512F:       # BB#0:
   2096 ; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
   2097 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
   2098 ; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
   2099 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2100 ; AVX512F-NEXT:    retq
   2101 ;
   2102 ; AVX512F-32-LABEL: test_vshuff64x2_512_maskz:
   2103 ; AVX512F-32:       # BB#0:
   2104 ; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
   2105 ; AVX512F-32-NEXT:    vpandq .LCPI122_0, %zmm2, %zmm2
   2106 ; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
   2107 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2108 ; AVX512F-32-NEXT:    retl
   2109   %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2110   %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
   2111   ret <8 x double> %res
   2112 }
   2113 
   2114 define <8 x i64> @test_vshufi64x2_512_mask(<8 x i64> %x, <8 x i64> %x1, <8 x i1> %mask) nounwind {
   2115 ; AVX512F-LABEL: test_vshufi64x2_512_mask:
   2116 ; AVX512F:       # BB#0:
   2117 ; AVX512F-NEXT:    vpmovsxwq %xmm2, %zmm2
   2118 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm2, %zmm2
   2119 ; AVX512F-NEXT:    vptestmq %zmm2, %zmm2, %k1
   2120 ; AVX512F-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2121 ; AVX512F-NEXT:    retq
   2122 ;
   2123 ; AVX512F-32-LABEL: test_vshufi64x2_512_mask:
   2124 ; AVX512F-32:       # BB#0:
   2125 ; AVX512F-32-NEXT:    vpmovsxwq %xmm2, %zmm2
   2126 ; AVX512F-32-NEXT:    vpandq .LCPI123_0, %zmm2, %zmm2
   2127 ; AVX512F-32-NEXT:    vptestmq %zmm2, %zmm2, %k1
   2128 ; AVX512F-32-NEXT:    vshufi64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],zmm1[2,3,0,1]
   2129 ; AVX512F-32-NEXT:    retl
   2130   %y = shufflevector <8 x i64> %x, <8 x i64> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2131   %res = select <8 x i1> %mask, <8 x i64> %y, <8 x i64> %x
   2132   ret <8 x i64> %res
   2133 }
   2134 
   2135 define <8 x double> @test_vshuff64x2_512_mem(<8 x double> %x, <8 x double> *%ptr) nounwind {
   2136 ; AVX512F-LABEL: test_vshuff64x2_512_mem:
   2137 ; AVX512F:       # BB#0:
   2138 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2139 ; AVX512F-NEXT:    retq
   2140 ;
   2141 ; AVX512F-32-LABEL: test_vshuff64x2_512_mem:
   2142 ; AVX512F-32:       # BB#0:
   2143 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2144 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2145 ; AVX512F-32-NEXT:    retl
   2146   %x1   = load <8 x double>,<8 x double> *%ptr,align 1
   2147   %res = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2148   ret <8 x double> %res
   2149 }
   2150 
   2151 define <8 x double> @test_vshuff64x2_512_mem_mask(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
   2152 ; AVX512F-LABEL: test_vshuff64x2_512_mem_mask:
   2153 ; AVX512F:       # BB#0:
   2154 ; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
   2155 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
   2156 ; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
   2157 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2158 ; AVX512F-NEXT:    retq
   2159 ;
   2160 ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_mask:
   2161 ; AVX512F-32:       # BB#0:
   2162 ; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
   2163 ; AVX512F-32-NEXT:    vpandq .LCPI125_0, %zmm1, %zmm1
   2164 ; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
   2165 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2166 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2167 ; AVX512F-32-NEXT:    retl
   2168   %x1 = load <8 x double>,<8 x double> *%ptr,align 1
   2169   %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2170   %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> %x
   2171   ret <8 x double> %res
   2172 }
   2173 
   2174 define <8 x double> @test_vshuff64x2_512_mem_maskz(<8 x double> %x, <8 x double> *%ptr, <8 x i1> %mask) nounwind {
   2175 ; AVX512F-LABEL: test_vshuff64x2_512_mem_maskz:
   2176 ; AVX512F:       # BB#0:
   2177 ; AVX512F-NEXT:    vpmovsxwq %xmm1, %zmm1
   2178 ; AVX512F-NEXT:    vpandq {{.*}}(%rip){1to8}, %zmm1, %zmm1
   2179 ; AVX512F-NEXT:    vptestmq %zmm1, %zmm1, %k1
   2180 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2181 ; AVX512F-NEXT:    retq
   2182 ;
   2183 ; AVX512F-32-LABEL: test_vshuff64x2_512_mem_maskz:
   2184 ; AVX512F-32:       # BB#0:
   2185 ; AVX512F-32-NEXT:    vpmovsxwq %xmm1, %zmm1
   2186 ; AVX512F-32-NEXT:    vpandq .LCPI126_0, %zmm1, %zmm1
   2187 ; AVX512F-32-NEXT:    vptestmq %zmm1, %zmm1, %k1
   2188 ; AVX512F-32-NEXT:    movl {{[0-9]+}}(%esp), %eax
   2189 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,4,5],mem[2,3,0,1]
   2190 ; AVX512F-32-NEXT:    retl
   2191   %x1 = load <8 x double>,<8 x double> *%ptr,align 1
   2192   %y = shufflevector <8 x double> %x, <8 x double> %x1, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 10, i32 11, i32 8, i32 9>
   2193   %res = select <8 x i1> %mask, <8 x double> %y, <8 x double> zeroinitializer
   2194   ret <8 x double> %res
   2195 }
   2196 
   2197 define <16 x float> @test_vshuff32x4_512(<16 x float> %x, <16 x float> %x1) nounwind {
   2198 ; AVX512F-LABEL: test_vshuff32x4_512:
   2199 ; AVX512F:       # BB#0:
   2200 ; AVX512F-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
   2201 ; AVX512F-NEXT:    retq
   2202 ;
   2203 ; AVX512F-32-LABEL: test_vshuff32x4_512:
   2204 ; AVX512F-32:       # BB#0:
   2205 ; AVX512F-32-NEXT:    vshuff64x2 {{.*#+}} zmm0 = zmm0[0,1,2,3],zmm1[2,3,0,1]
   2206 ; AVX512F-32-NEXT:    retl
   2207   %res = shufflevector <16 x float> %x, <16 x float> %x1, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 16, i32 17, i32 18, i32 19>
   2208   ret <16 x float> %res
   2209 }
   2210