Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2
      4 
      5 ;
      6 ; Unary shuffle indices from registers
      7 ;
      8 
      9 define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
     10 ; ALL-LABEL: var_shuffle_v4f64_v4f64_xxxx_i64:
     11 ; ALL:       # %bb.0:
     12 ; ALL-NEXT:    pushq %rbp
     13 ; ALL-NEXT:    movq %rsp, %rbp
     14 ; ALL-NEXT:    andq $-32, %rsp
     15 ; ALL-NEXT:    subq $64, %rsp
     16 ; ALL-NEXT:    andl $3, %esi
     17 ; ALL-NEXT:    andl $3, %ecx
     18 ; ALL-NEXT:    andl $3, %edx
     19 ; ALL-NEXT:    andl $3, %edi
     20 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
     21 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     22 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
     23 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
     24 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
     25 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     26 ; ALL-NEXT:    movq %rbp, %rsp
     27 ; ALL-NEXT:    popq %rbp
     28 ; ALL-NEXT:    retq
     29   %x0 = extractelement <4 x double> %x, i64 %i0
     30   %x1 = extractelement <4 x double> %x, i64 %i1
     31   %x2 = extractelement <4 x double> %x, i64 %i2
     32   %x3 = extractelement <4 x double> %x, i64 %i3
     33   %r0 = insertelement <4 x double> undef, double %x0, i32 0
     34   %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
     35   %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
     36   %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
     37   ret <4 x double> %r3
     38 }
     39 
     40 define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
     41 ; ALL-LABEL: var_shuffle_v4f64_v4f64_uxx0_i64:
     42 ; ALL:       # %bb.0:
     43 ; ALL-NEXT:    pushq %rbp
     44 ; ALL-NEXT:    movq %rsp, %rbp
     45 ; ALL-NEXT:    andq $-32, %rsp
     46 ; ALL-NEXT:    subq $64, %rsp
     47 ; ALL-NEXT:    andl $3, %edx
     48 ; ALL-NEXT:    andl $3, %esi
     49 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
     50 ; ALL-NEXT:    vmovddup {{.*#+}} xmm0 = mem[0,0]
     51 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
     52 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     53 ; ALL-NEXT:    movq %rbp, %rsp
     54 ; ALL-NEXT:    popq %rbp
     55 ; ALL-NEXT:    retq
     56   %x0 = extractelement <4 x double> %x, i64 %i0
     57   %x1 = extractelement <4 x double> %x, i64 %i1
     58   %x2 = extractelement <4 x double> %x, i64 %i2
     59   %x3 = extractelement <4 x double> %x, i64 %i3
     60   %r0 = insertelement <4 x double> undef, double undef, i32 0
     61   %r1 = insertelement <4 x double>   %r0, double   %x1, i32 1
     62   %r2 = insertelement <4 x double>   %r1, double   %x2, i32 2
     63   %r3 = insertelement <4 x double>   %r2, double   0.0, i32 3
     64   ret <4 x double> %r3
     65 }
     66 
     67 define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
     68 ; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64:
     69 ; ALL:       # %bb.0:
     70 ; ALL-NEXT:    andl $1, %esi
     71 ; ALL-NEXT:    andl $1, %ecx
     72 ; ALL-NEXT:    andl $1, %edx
     73 ; ALL-NEXT:    andl $1, %edi
     74 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
     75 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
     76 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
     77 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0]
     78 ; ALL-NEXT:    vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0]
     79 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
     80 ; ALL-NEXT:    retq
     81   %x0 = extractelement <2 x double> %x, i64 %i0
     82   %x1 = extractelement <2 x double> %x, i64 %i1
     83   %x2 = extractelement <2 x double> %x, i64 %i2
     84   %x3 = extractelement <2 x double> %x, i64 %i3
     85   %r0 = insertelement <4 x double> undef, double %x0, i32 0
     86   %r1 = insertelement <4 x double>   %r0, double %x1, i32 1
     87   %r2 = insertelement <4 x double>   %r1, double %x2, i32 2
     88   %r3 = insertelement <4 x double>   %r2, double %x3, i32 3
     89   ret <4 x double> %r3
     90 }
     91 
     92 define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
     93 ; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64:
     94 ; ALL:       # %bb.0:
     95 ; ALL-NEXT:    pushq %rbp
     96 ; ALL-NEXT:    movq %rsp, %rbp
     97 ; ALL-NEXT:    andq $-32, %rsp
     98 ; ALL-NEXT:    subq $64, %rsp
     99 ; ALL-NEXT:    andl $3, %edi
    100 ; ALL-NEXT:    andl $3, %esi
    101 ; ALL-NEXT:    andl $3, %edx
    102 ; ALL-NEXT:    andl $3, %ecx
    103 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
    104 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    105 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    106 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    107 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    108 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    109 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    110 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    111 ; ALL-NEXT:    movq %rbp, %rsp
    112 ; ALL-NEXT:    popq %rbp
    113 ; ALL-NEXT:    retq
    114   %x0 = extractelement <4 x i64> %x, i64 %i0
    115   %x1 = extractelement <4 x i64> %x, i64 %i1
    116   %x2 = extractelement <4 x i64> %x, i64 %i2
    117   %x3 = extractelement <4 x i64> %x, i64 %i3
    118   %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
    119   %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
    120   %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
    121   %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
    122   ret <4 x i64> %r3
    123 }
    124 
    125 define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
    126 ; ALL-LABEL: var_shuffle_v4i64_v4i64_xx00_i64:
    127 ; ALL:       # %bb.0:
    128 ; ALL-NEXT:    pushq %rbp
    129 ; ALL-NEXT:    movq %rsp, %rbp
    130 ; ALL-NEXT:    andq $-32, %rsp
    131 ; ALL-NEXT:    subq $64, %rsp
    132 ; ALL-NEXT:    andl $3, %edi
    133 ; ALL-NEXT:    andl $3, %esi
    134 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
    135 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    136 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    137 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    138 ; ALL-NEXT:    movq %rbp, %rsp
    139 ; ALL-NEXT:    popq %rbp
    140 ; ALL-NEXT:    retq
    141   %x0 = extractelement <4 x i64> %x, i64 %i0
    142   %x1 = extractelement <4 x i64> %x, i64 %i1
    143   %x2 = extractelement <4 x i64> %x, i64 %i2
    144   %x3 = extractelement <4 x i64> %x, i64 %i3
    145   %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
    146   %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
    147   %r2 = insertelement <4 x i64>   %r1, i64   0, i32 2
    148   %r3 = insertelement <4 x i64>   %r2, i64   0, i32 3
    149   ret <4 x i64> %r3
    150 }
    151 
    152 define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind {
    153 ; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64:
    154 ; ALL:       # %bb.0:
    155 ; ALL-NEXT:    andl $1, %edi
    156 ; ALL-NEXT:    andl $1, %esi
    157 ; ALL-NEXT:    andl $1, %edx
    158 ; ALL-NEXT:    andl $1, %ecx
    159 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    160 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    161 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    162 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    163 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    164 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    165 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    166 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    167 ; ALL-NEXT:    retq
    168   %x0 = extractelement <2 x i64> %x, i64 %i0
    169   %x1 = extractelement <2 x i64> %x, i64 %i1
    170   %x2 = extractelement <2 x i64> %x, i64 %i2
    171   %x3 = extractelement <2 x i64> %x, i64 %i3
    172   %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
    173   %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
    174   %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
    175   %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
    176   ret <4 x i64> %r3
    177 }
    178 
    179 define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
    180 ; ALL-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32:
    181 ; ALL:       # %bb.0:
    182 ; ALL-NEXT:    pushq %rbp
    183 ; ALL-NEXT:    movq %rsp, %rbp
    184 ; ALL-NEXT:    andq $-32, %rsp
    185 ; ALL-NEXT:    subq $64, %rsp
    186 ; ALL-NEXT:    # kill: def $r9d killed $r9d def $r9
    187 ; ALL-NEXT:    # kill: def $r8d killed $r8d def $r8
    188 ; ALL-NEXT:    # kill: def $ecx killed $ecx def $rcx
    189 ; ALL-NEXT:    # kill: def $edx killed $edx def $rdx
    190 ; ALL-NEXT:    # kill: def $esi killed $esi def $rsi
    191 ; ALL-NEXT:    # kill: def $edi killed $edi def $rdi
    192 ; ALL-NEXT:    movl 24(%rbp), %r10d
    193 ; ALL-NEXT:    andl $7, %r10d
    194 ; ALL-NEXT:    movl 16(%rbp), %eax
    195 ; ALL-NEXT:    andl $7, %eax
    196 ; ALL-NEXT:    andl $7, %edi
    197 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
    198 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    199 ; ALL-NEXT:    andl $7, %esi
    200 ; ALL-NEXT:    andl $7, %edx
    201 ; ALL-NEXT:    andl $7, %ecx
    202 ; ALL-NEXT:    andl $7, %r8d
    203 ; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    204 ; ALL-NEXT:    andl $7, %r9d
    205 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
    206 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
    207 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
    208 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
    209 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
    210 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
    211 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    212 ; ALL-NEXT:    movq %rbp, %rsp
    213 ; ALL-NEXT:    popq %rbp
    214 ; ALL-NEXT:    retq
    215   %x0 = extractelement <8 x float> %x, i32 %i0
    216   %x1 = extractelement <8 x float> %x, i32 %i1
    217   %x2 = extractelement <8 x float> %x, i32 %i2
    218   %x3 = extractelement <8 x float> %x, i32 %i3
    219   %x4 = extractelement <8 x float> %x, i32 %i4
    220   %x5 = extractelement <8 x float> %x, i32 %i5
    221   %x6 = extractelement <8 x float> %x, i32 %i6
    222   %x7 = extractelement <8 x float> %x, i32 %i7
    223   %r0 = insertelement <8 x float> undef, float %x0, i32 0
    224   %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
    225   %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
    226   %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
    227   %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
    228   %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
    229   %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
    230   %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
    231   ret <8 x float> %r7
    232 }
    233 
    234 define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind {
    235 ; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32:
    236 ; ALL:       # %bb.0:
    237 ; ALL-NEXT:    # kill: def $r9d killed $r9d def $r9
    238 ; ALL-NEXT:    # kill: def $r8d killed $r8d def $r8
    239 ; ALL-NEXT:    # kill: def $ecx killed $ecx def $rcx
    240 ; ALL-NEXT:    # kill: def $edx killed $edx def $rdx
    241 ; ALL-NEXT:    # kill: def $esi killed $esi def $rsi
    242 ; ALL-NEXT:    # kill: def $edi killed $edi def $rdi
    243 ; ALL-NEXT:    movl {{[0-9]+}}(%rsp), %r10d
    244 ; ALL-NEXT:    andl $3, %r10d
    245 ; ALL-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    246 ; ALL-NEXT:    andl $3, %eax
    247 ; ALL-NEXT:    andl $3, %edi
    248 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    249 ; ALL-NEXT:    vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
    250 ; ALL-NEXT:    andl $3, %esi
    251 ; ALL-NEXT:    andl $3, %edx
    252 ; ALL-NEXT:    andl $3, %ecx
    253 ; ALL-NEXT:    andl $3, %r8d
    254 ; ALL-NEXT:    vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero
    255 ; ALL-NEXT:    andl $3, %r9d
    256 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3]
    257 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3]
    258 ; ALL-NEXT:    vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0]
    259 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3]
    260 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3]
    261 ; ALL-NEXT:    vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0]
    262 ; ALL-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    263 ; ALL-NEXT:    retq
    264   %x0 = extractelement <4 x float> %x, i32 %i0
    265   %x1 = extractelement <4 x float> %x, i32 %i1
    266   %x2 = extractelement <4 x float> %x, i32 %i2
    267   %x3 = extractelement <4 x float> %x, i32 %i3
    268   %x4 = extractelement <4 x float> %x, i32 %i4
    269   %x5 = extractelement <4 x float> %x, i32 %i5
    270   %x6 = extractelement <4 x float> %x, i32 %i6
    271   %x7 = extractelement <4 x float> %x, i32 %i7
    272   %r0 = insertelement <8 x float> undef, float %x0, i32 0
    273   %r1 = insertelement <8 x float>   %r0, float %x1, i32 1
    274   %r2 = insertelement <8 x float>   %r1, float %x2, i32 2
    275   %r3 = insertelement <8 x float>   %r2, float %x3, i32 3
    276   %r4 = insertelement <8 x float>   %r3, float %x4, i32 4
    277   %r5 = insertelement <8 x float>   %r4, float %x5, i32 5
    278   %r6 = insertelement <8 x float>   %r5, float %x6, i32 6
    279   %r7 = insertelement <8 x float>   %r6, float %x7, i32 7
    280   ret <8 x float> %r7
    281 }
    282 
    283 define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
    284 ; AVX1-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
    285 ; AVX1:       # %bb.0:
    286 ; AVX1-NEXT:    pushq %rbp
    287 ; AVX1-NEXT:    movq %rsp, %rbp
    288 ; AVX1-NEXT:    andq $-32, %rsp
    289 ; AVX1-NEXT:    subq $64, %rsp
    290 ; AVX1-NEXT:    # kill: def $r9d killed $r9d def $r9
    291 ; AVX1-NEXT:    # kill: def $r8d killed $r8d def $r8
    292 ; AVX1-NEXT:    # kill: def $ecx killed $ecx def $rcx
    293 ; AVX1-NEXT:    # kill: def $edx killed $edx def $rdx
    294 ; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
    295 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    296 ; AVX1-NEXT:    andl $15, %edi
    297 ; AVX1-NEXT:    vmovaps %ymm0, (%rsp)
    298 ; AVX1-NEXT:    movzwl (%rsp,%rdi,2), %eax
    299 ; AVX1-NEXT:    vmovd %eax, %xmm0
    300 ; AVX1-NEXT:    andl $15, %esi
    301 ; AVX1-NEXT:    vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
    302 ; AVX1-NEXT:    andl $15, %edx
    303 ; AVX1-NEXT:    vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
    304 ; AVX1-NEXT:    andl $15, %ecx
    305 ; AVX1-NEXT:    vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
    306 ; AVX1-NEXT:    andl $15, %r8d
    307 ; AVX1-NEXT:    vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
    308 ; AVX1-NEXT:    andl $15, %r9d
    309 ; AVX1-NEXT:    vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
    310 ; AVX1-NEXT:    movl 16(%rbp), %eax
    311 ; AVX1-NEXT:    andl $15, %eax
    312 ; AVX1-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
    313 ; AVX1-NEXT:    movl 24(%rbp), %eax
    314 ; AVX1-NEXT:    andl $15, %eax
    315 ; AVX1-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
    316 ; AVX1-NEXT:    movl 32(%rbp), %eax
    317 ; AVX1-NEXT:    andl $15, %eax
    318 ; AVX1-NEXT:    movzwl (%rsp,%rax,2), %eax
    319 ; AVX1-NEXT:    vmovd %eax, %xmm1
    320 ; AVX1-NEXT:    movl 40(%rbp), %eax
    321 ; AVX1-NEXT:    andl $15, %eax
    322 ; AVX1-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
    323 ; AVX1-NEXT:    movl 48(%rbp), %eax
    324 ; AVX1-NEXT:    andl $15, %eax
    325 ; AVX1-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
    326 ; AVX1-NEXT:    movl 56(%rbp), %eax
    327 ; AVX1-NEXT:    andl $15, %eax
    328 ; AVX1-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
    329 ; AVX1-NEXT:    movl 64(%rbp), %eax
    330 ; AVX1-NEXT:    andl $15, %eax
    331 ; AVX1-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
    332 ; AVX1-NEXT:    movl 72(%rbp), %eax
    333 ; AVX1-NEXT:    andl $15, %eax
    334 ; AVX1-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
    335 ; AVX1-NEXT:    movl 80(%rbp), %eax
    336 ; AVX1-NEXT:    andl $15, %eax
    337 ; AVX1-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
    338 ; AVX1-NEXT:    movl 88(%rbp), %eax
    339 ; AVX1-NEXT:    andl $15, %eax
    340 ; AVX1-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
    341 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    342 ; AVX1-NEXT:    movq %rbp, %rsp
    343 ; AVX1-NEXT:    popq %rbp
    344 ; AVX1-NEXT:    retq
    345 ;
    346 ; AVX2-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16:
    347 ; AVX2:       # %bb.0:
    348 ; AVX2-NEXT:    pushq %rbp
    349 ; AVX2-NEXT:    movq %rsp, %rbp
    350 ; AVX2-NEXT:    andq $-32, %rsp
    351 ; AVX2-NEXT:    subq $64, %rsp
    352 ; AVX2-NEXT:    # kill: def $r9d killed $r9d def $r9
    353 ; AVX2-NEXT:    # kill: def $r8d killed $r8d def $r8
    354 ; AVX2-NEXT:    # kill: def $ecx killed $ecx def $rcx
    355 ; AVX2-NEXT:    # kill: def $edx killed $edx def $rdx
    356 ; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
    357 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    358 ; AVX2-NEXT:    andl $15, %edi
    359 ; AVX2-NEXT:    vmovaps %ymm0, (%rsp)
    360 ; AVX2-NEXT:    movzwl (%rsp,%rdi,2), %eax
    361 ; AVX2-NEXT:    vmovd %eax, %xmm0
    362 ; AVX2-NEXT:    andl $15, %esi
    363 ; AVX2-NEXT:    vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0
    364 ; AVX2-NEXT:    andl $15, %edx
    365 ; AVX2-NEXT:    vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0
    366 ; AVX2-NEXT:    andl $15, %ecx
    367 ; AVX2-NEXT:    vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0
    368 ; AVX2-NEXT:    andl $15, %r8d
    369 ; AVX2-NEXT:    vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0
    370 ; AVX2-NEXT:    andl $15, %r9d
    371 ; AVX2-NEXT:    vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0
    372 ; AVX2-NEXT:    movl 16(%rbp), %eax
    373 ; AVX2-NEXT:    andl $15, %eax
    374 ; AVX2-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0
    375 ; AVX2-NEXT:    movl 24(%rbp), %eax
    376 ; AVX2-NEXT:    andl $15, %eax
    377 ; AVX2-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0
    378 ; AVX2-NEXT:    movl 32(%rbp), %eax
    379 ; AVX2-NEXT:    andl $15, %eax
    380 ; AVX2-NEXT:    movzwl (%rsp,%rax,2), %eax
    381 ; AVX2-NEXT:    vmovd %eax, %xmm1
    382 ; AVX2-NEXT:    movl 40(%rbp), %eax
    383 ; AVX2-NEXT:    andl $15, %eax
    384 ; AVX2-NEXT:    vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1
    385 ; AVX2-NEXT:    movl 48(%rbp), %eax
    386 ; AVX2-NEXT:    andl $15, %eax
    387 ; AVX2-NEXT:    vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1
    388 ; AVX2-NEXT:    movl 56(%rbp), %eax
    389 ; AVX2-NEXT:    andl $15, %eax
    390 ; AVX2-NEXT:    vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1
    391 ; AVX2-NEXT:    movl 64(%rbp), %eax
    392 ; AVX2-NEXT:    andl $15, %eax
    393 ; AVX2-NEXT:    vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1
    394 ; AVX2-NEXT:    movl 72(%rbp), %eax
    395 ; AVX2-NEXT:    andl $15, %eax
    396 ; AVX2-NEXT:    vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1
    397 ; AVX2-NEXT:    movl 80(%rbp), %eax
    398 ; AVX2-NEXT:    andl $15, %eax
    399 ; AVX2-NEXT:    vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1
    400 ; AVX2-NEXT:    movl 88(%rbp), %eax
    401 ; AVX2-NEXT:    andl $15, %eax
    402 ; AVX2-NEXT:    vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1
    403 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    404 ; AVX2-NEXT:    movq %rbp, %rsp
    405 ; AVX2-NEXT:    popq %rbp
    406 ; AVX2-NEXT:    retq
    407   %x0  = extractelement <16 x i16> %x, i32 %i0
    408   %x1  = extractelement <16 x i16> %x, i32 %i1
    409   %x2  = extractelement <16 x i16> %x, i32 %i2
    410   %x3  = extractelement <16 x i16> %x, i32 %i3
    411   %x4  = extractelement <16 x i16> %x, i32 %i4
    412   %x5  = extractelement <16 x i16> %x, i32 %i5
    413   %x6  = extractelement <16 x i16> %x, i32 %i6
    414   %x7  = extractelement <16 x i16> %x, i32 %i7
    415   %x8  = extractelement <16 x i16> %x, i32 %i8
    416   %x9  = extractelement <16 x i16> %x, i32 %i9
    417   %x10 = extractelement <16 x i16> %x, i32 %i10
    418   %x11 = extractelement <16 x i16> %x, i32 %i11
    419   %x12 = extractelement <16 x i16> %x, i32 %i12
    420   %x13 = extractelement <16 x i16> %x, i32 %i13
    421   %x14 = extractelement <16 x i16> %x, i32 %i14
    422   %x15 = extractelement <16 x i16> %x, i32 %i15
    423   %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
    424   %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
    425   %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
    426   %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
    427   %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
    428   %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
    429   %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
    430   %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
    431   %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
    432   %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
    433   %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
    434   %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
    435   %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
    436   %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
    437   %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
    438   %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
    439   ret <16 x i16> %r15
    440 }
    441 
    442 define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind {
    443 ; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
    444 ; AVX1:       # %bb.0:
    445 ; AVX1-NEXT:    # kill: def $r9d killed $r9d def $r9
    446 ; AVX1-NEXT:    # kill: def $r8d killed $r8d def $r8
    447 ; AVX1-NEXT:    # kill: def $ecx killed $ecx def $rcx
    448 ; AVX1-NEXT:    # kill: def $edx killed $edx def $rdx
    449 ; AVX1-NEXT:    # kill: def $esi killed $esi def $rsi
    450 ; AVX1-NEXT:    # kill: def $edi killed $edi def $rdi
    451 ; AVX1-NEXT:    andl $7, %edi
    452 ; AVX1-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    453 ; AVX1-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
    454 ; AVX1-NEXT:    vmovd %eax, %xmm0
    455 ; AVX1-NEXT:    andl $7, %esi
    456 ; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
    457 ; AVX1-NEXT:    andl $7, %edx
    458 ; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
    459 ; AVX1-NEXT:    andl $7, %ecx
    460 ; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
    461 ; AVX1-NEXT:    andl $7, %r8d
    462 ; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
    463 ; AVX1-NEXT:    andl $7, %r9d
    464 ; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
    465 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    466 ; AVX1-NEXT:    andl $7, %eax
    467 ; AVX1-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
    468 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    469 ; AVX1-NEXT:    andl $7, %eax
    470 ; AVX1-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
    471 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    472 ; AVX1-NEXT:    andl $7, %eax
    473 ; AVX1-NEXT:    movzwl -24(%rsp,%rax,2), %eax
    474 ; AVX1-NEXT:    vmovd %eax, %xmm1
    475 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    476 ; AVX1-NEXT:    andl $7, %eax
    477 ; AVX1-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
    478 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    479 ; AVX1-NEXT:    andl $7, %eax
    480 ; AVX1-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
    481 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    482 ; AVX1-NEXT:    andl $7, %eax
    483 ; AVX1-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
    484 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    485 ; AVX1-NEXT:    andl $7, %eax
    486 ; AVX1-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
    487 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    488 ; AVX1-NEXT:    andl $7, %eax
    489 ; AVX1-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
    490 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    491 ; AVX1-NEXT:    andl $7, %eax
    492 ; AVX1-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
    493 ; AVX1-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    494 ; AVX1-NEXT:    andl $7, %eax
    495 ; AVX1-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
    496 ; AVX1-NEXT:    vinsertf128 $1, %xmm1, %ymm0, %ymm0
    497 ; AVX1-NEXT:    retq
    498 ;
    499 ; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16:
    500 ; AVX2:       # %bb.0:
    501 ; AVX2-NEXT:    # kill: def $r9d killed $r9d def $r9
    502 ; AVX2-NEXT:    # kill: def $r8d killed $r8d def $r8
    503 ; AVX2-NEXT:    # kill: def $ecx killed $ecx def $rcx
    504 ; AVX2-NEXT:    # kill: def $edx killed $edx def $rdx
    505 ; AVX2-NEXT:    # kill: def $esi killed $esi def $rsi
    506 ; AVX2-NEXT:    # kill: def $edi killed $edi def $rdi
    507 ; AVX2-NEXT:    andl $7, %edi
    508 ; AVX2-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    509 ; AVX2-NEXT:    movzwl -24(%rsp,%rdi,2), %eax
    510 ; AVX2-NEXT:    vmovd %eax, %xmm0
    511 ; AVX2-NEXT:    andl $7, %esi
    512 ; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0
    513 ; AVX2-NEXT:    andl $7, %edx
    514 ; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0
    515 ; AVX2-NEXT:    andl $7, %ecx
    516 ; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0
    517 ; AVX2-NEXT:    andl $7, %r8d
    518 ; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0
    519 ; AVX2-NEXT:    andl $7, %r9d
    520 ; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0
    521 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    522 ; AVX2-NEXT:    andl $7, %eax
    523 ; AVX2-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0
    524 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    525 ; AVX2-NEXT:    andl $7, %eax
    526 ; AVX2-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0
    527 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    528 ; AVX2-NEXT:    andl $7, %eax
    529 ; AVX2-NEXT:    movzwl -24(%rsp,%rax,2), %eax
    530 ; AVX2-NEXT:    vmovd %eax, %xmm1
    531 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    532 ; AVX2-NEXT:    andl $7, %eax
    533 ; AVX2-NEXT:    vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1
    534 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    535 ; AVX2-NEXT:    andl $7, %eax
    536 ; AVX2-NEXT:    vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1
    537 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    538 ; AVX2-NEXT:    andl $7, %eax
    539 ; AVX2-NEXT:    vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1
    540 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    541 ; AVX2-NEXT:    andl $7, %eax
    542 ; AVX2-NEXT:    vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1
    543 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    544 ; AVX2-NEXT:    andl $7, %eax
    545 ; AVX2-NEXT:    vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1
    546 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    547 ; AVX2-NEXT:    andl $7, %eax
    548 ; AVX2-NEXT:    vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1
    549 ; AVX2-NEXT:    movl {{[0-9]+}}(%rsp), %eax
    550 ; AVX2-NEXT:    andl $7, %eax
    551 ; AVX2-NEXT:    vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1
    552 ; AVX2-NEXT:    vinserti128 $1, %xmm1, %ymm0, %ymm0
    553 ; AVX2-NEXT:    retq
    554   %x0  = extractelement <8 x i16> %x, i32 %i0
    555   %x1  = extractelement <8 x i16> %x, i32 %i1
    556   %x2  = extractelement <8 x i16> %x, i32 %i2
    557   %x3  = extractelement <8 x i16> %x, i32 %i3
    558   %x4  = extractelement <8 x i16> %x, i32 %i4
    559   %x5  = extractelement <8 x i16> %x, i32 %i5
    560   %x6  = extractelement <8 x i16> %x, i32 %i6
    561   %x7  = extractelement <8 x i16> %x, i32 %i7
    562   %x8  = extractelement <8 x i16> %x, i32 %i8
    563   %x9  = extractelement <8 x i16> %x, i32 %i9
    564   %x10 = extractelement <8 x i16> %x, i32 %i10
    565   %x11 = extractelement <8 x i16> %x, i32 %i11
    566   %x12 = extractelement <8 x i16> %x, i32 %i12
    567   %x13 = extractelement <8 x i16> %x, i32 %i13
    568   %x14 = extractelement <8 x i16> %x, i32 %i14
    569   %x15 = extractelement <8 x i16> %x, i32 %i15
    570   %r0  = insertelement <16 x i16> undef, i16 %x0 , i32 0
    571   %r1  = insertelement <16 x i16>  %r0 , i16 %x1 , i32 1
    572   %r2  = insertelement <16 x i16>  %r1 , i16 %x2 , i32 2
    573   %r3  = insertelement <16 x i16>  %r2 , i16 %x3 , i32 3
    574   %r4  = insertelement <16 x i16>  %r3 , i16 %x4 , i32 4
    575   %r5  = insertelement <16 x i16>  %r4 , i16 %x5 , i32 5
    576   %r6  = insertelement <16 x i16>  %r5 , i16 %x6 , i32 6
    577   %r7  = insertelement <16 x i16>  %r6 , i16 %x7 , i32 7
    578   %r8  = insertelement <16 x i16>  %r7 , i16 %x8 , i32 8
    579   %r9  = insertelement <16 x i16>  %r8 , i16 %x9 , i32 9
    580   %r10 = insertelement <16 x i16>  %r9 , i16 %x10, i32 10
    581   %r11 = insertelement <16 x i16>  %r10, i16 %x11, i32 11
    582   %r12 = insertelement <16 x i16>  %r11, i16 %x12, i32 12
    583   %r13 = insertelement <16 x i16>  %r12, i16 %x13, i32 13
    584   %r14 = insertelement <16 x i16>  %r13, i16 %x14, i32 14
    585   %r15 = insertelement <16 x i16>  %r14, i16 %x15, i32 15
    586   ret <16 x i16> %r15
    587 }
    588 
    589 ;
    590 ; Unary shuffle indices from memory
    591 ;
    592 
    593 define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind {
    594 ; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64:
    595 ; ALL:       # %bb.0:
    596 ; ALL-NEXT:    pushq %rbp
    597 ; ALL-NEXT:    movq %rsp, %rbp
    598 ; ALL-NEXT:    andq $-32, %rsp
    599 ; ALL-NEXT:    subq $64, %rsp
    600 ; ALL-NEXT:    movq (%rdi), %rax
    601 ; ALL-NEXT:    movq 8(%rdi), %rcx
    602 ; ALL-NEXT:    andl $3, %eax
    603 ; ALL-NEXT:    andl $3, %ecx
    604 ; ALL-NEXT:    movq 16(%rdi), %rdx
    605 ; ALL-NEXT:    andl $3, %edx
    606 ; ALL-NEXT:    movq 24(%rdi), %rsi
    607 ; ALL-NEXT:    andl $3, %esi
    608 ; ALL-NEXT:    vmovaps %ymm0, (%rsp)
    609 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    610 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    611 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    612 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    613 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    614 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    615 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    616 ; ALL-NEXT:    movq %rbp, %rsp
    617 ; ALL-NEXT:    popq %rbp
    618 ; ALL-NEXT:    retq
    619   %p0  = getelementptr inbounds i64, i64* %i, i32 0
    620   %p1  = getelementptr inbounds i64, i64* %i, i32 1
    621   %p2  = getelementptr inbounds i64, i64* %i, i32 2
    622   %p3  = getelementptr inbounds i64, i64* %i, i32 3
    623   %i0  = load i64, i64* %p0, align 4
    624   %i1  = load i64, i64* %p1, align 4
    625   %i2  = load i64, i64* %p2, align 4
    626   %i3  = load i64, i64* %p3, align 4
    627   %x0 = extractelement <4 x i64> %x, i64 %i0
    628   %x1 = extractelement <4 x i64> %x, i64 %i1
    629   %x2 = extractelement <4 x i64> %x, i64 %i2
    630   %x3 = extractelement <4 x i64> %x, i64 %i3
    631   %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
    632   %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
    633   %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
    634   %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
    635   ret <4 x i64> %r3
    636 }
    637 
    638 define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind {
    639 ; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64:
    640 ; ALL:       # %bb.0:
    641 ; ALL-NEXT:    movq (%rdi), %rax
    642 ; ALL-NEXT:    movq 8(%rdi), %rcx
    643 ; ALL-NEXT:    andl $1, %eax
    644 ; ALL-NEXT:    andl $1, %ecx
    645 ; ALL-NEXT:    movq 16(%rdi), %rdx
    646 ; ALL-NEXT:    andl $1, %edx
    647 ; ALL-NEXT:    movq 24(%rdi), %rsi
    648 ; ALL-NEXT:    andl $1, %esi
    649 ; ALL-NEXT:    vmovaps %xmm0, -{{[0-9]+}}(%rsp)
    650 ; ALL-NEXT:    vmovsd {{.*#+}} xmm0 = mem[0],zero
    651 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    652 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0]
    653 ; ALL-NEXT:    vmovsd {{.*#+}} xmm1 = mem[0],zero
    654 ; ALL-NEXT:    vmovsd {{.*#+}} xmm2 = mem[0],zero
    655 ; ALL-NEXT:    vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0]
    656 ; ALL-NEXT:    vinsertf128 $1, %xmm0, %ymm1, %ymm0
    657 ; ALL-NEXT:    retq
    658   %p0  = getelementptr inbounds i64, i64* %i, i32 0
    659   %p1  = getelementptr inbounds i64, i64* %i, i32 1
    660   %p2  = getelementptr inbounds i64, i64* %i, i32 2
    661   %p3  = getelementptr inbounds i64, i64* %i, i32 3
    662   %i0  = load i64, i64* %p0, align 4
    663   %i1  = load i64, i64* %p1, align 4
    664   %i2  = load i64, i64* %p2, align 4
    665   %i3  = load i64, i64* %p3, align 4
    666   %x0 = extractelement <2 x i64> %x, i64 %i0
    667   %x1 = extractelement <2 x i64> %x, i64 %i1
    668   %x2 = extractelement <2 x i64> %x, i64 %i2
    669   %x3 = extractelement <2 x i64> %x, i64 %i3
    670   %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0
    671   %r1 = insertelement <4 x i64>   %r0, i64 %x1, i32 1
    672   %r2 = insertelement <4 x i64>   %r1, i64 %x2, i32 2
    673   %r3 = insertelement <4 x i64>   %r2, i64 %x3, i32 3
    674   ret <4 x i64> %r3
    675 }
    676