1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX1 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=ALL --check-prefix=AVX --check-prefix=AVX2 4 5 ; 6 ; Unary shuffle indices from registers 7 ; 8 9 define <4 x double> @var_shuffle_v4f64_v4f64_xxxx_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 10 ; ALL-LABEL: var_shuffle_v4f64_v4f64_xxxx_i64: 11 ; ALL: # %bb.0: 12 ; ALL-NEXT: pushq %rbp 13 ; ALL-NEXT: movq %rsp, %rbp 14 ; ALL-NEXT: andq $-32, %rsp 15 ; ALL-NEXT: subq $64, %rsp 16 ; ALL-NEXT: andl $3, %esi 17 ; ALL-NEXT: andl $3, %ecx 18 ; ALL-NEXT: andl $3, %edx 19 ; ALL-NEXT: andl $3, %edi 20 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 21 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 22 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 23 ; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 24 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 25 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 26 ; ALL-NEXT: movq %rbp, %rsp 27 ; ALL-NEXT: popq %rbp 28 ; ALL-NEXT: retq 29 %x0 = extractelement <4 x double> %x, i64 %i0 30 %x1 = extractelement <4 x double> %x, i64 %i1 31 %x2 = extractelement <4 x double> %x, i64 %i2 32 %x3 = extractelement <4 x double> %x, i64 %i3 33 %r0 = insertelement <4 x double> undef, double %x0, i32 0 34 %r1 = insertelement <4 x double> %r0, double %x1, i32 1 35 %r2 = insertelement <4 x double> %r1, double %x2, i32 2 36 %r3 = insertelement <4 x double> %r2, double %x3, i32 3 37 ret <4 x double> %r3 38 } 39 40 define <4 x double> @var_shuffle_v4f64_v4f64_uxx0_i64(<4 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 41 ; ALL-LABEL: var_shuffle_v4f64_v4f64_uxx0_i64: 42 ; ALL: # %bb.0: 43 ; ALL-NEXT: pushq %rbp 44 ; ALL-NEXT: movq %rsp, %rbp 45 ; ALL-NEXT: andq $-32, %rsp 46 ; ALL-NEXT: subq $64, %rsp 47 ; ALL-NEXT: andl $3, %edx 48 ; ALL-NEXT: andl $3, %esi 49 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 50 ; ALL-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0] 51 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 52 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 53 ; ALL-NEXT: movq %rbp, %rsp 54 ; ALL-NEXT: popq %rbp 55 ; ALL-NEXT: retq 56 %x0 = extractelement <4 x double> %x, i64 %i0 57 %x1 = extractelement <4 x double> %x, i64 %i1 58 %x2 = extractelement <4 x double> %x, i64 %i2 59 %x3 = extractelement <4 x double> %x, i64 %i3 60 %r0 = insertelement <4 x double> undef, double undef, i32 0 61 %r1 = insertelement <4 x double> %r0, double %x1, i32 1 62 %r2 = insertelement <4 x double> %r1, double %x2, i32 2 63 %r3 = insertelement <4 x double> %r2, double 0.0, i32 3 64 ret <4 x double> %r3 65 } 66 67 define <4 x double> @var_shuffle_v4f64_v2f64_xxxx_i64(<2 x double> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 68 ; ALL-LABEL: var_shuffle_v4f64_v2f64_xxxx_i64: 69 ; ALL: # %bb.0: 70 ; ALL-NEXT: andl $1, %esi 71 ; ALL-NEXT: andl $1, %ecx 72 ; ALL-NEXT: andl $1, %edx 73 ; ALL-NEXT: andl $1, %edi 74 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 75 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 76 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 77 ; ALL-NEXT: vmovhpd {{.*#+}} xmm1 = xmm1[0],mem[0] 78 ; ALL-NEXT: vmovhpd {{.*#+}} xmm0 = xmm0[0],mem[0] 79 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 80 ; ALL-NEXT: retq 81 %x0 = extractelement <2 x double> %x, i64 %i0 82 %x1 = extractelement <2 x double> %x, i64 %i1 83 %x2 = extractelement <2 x double> %x, i64 %i2 84 %x3 = extractelement <2 x double> %x, i64 %i3 85 %r0 = insertelement <4 x double> undef, double %x0, i32 0 86 %r1 = insertelement <4 x double> %r0, double %x1, i32 1 87 %r2 = insertelement <4 x double> %r1, double %x2, i32 2 88 %r3 = insertelement <4 x double> %r2, double %x3, i32 3 89 ret <4 x double> %r3 90 } 91 92 define <4 x i64> @var_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 93 ; ALL-LABEL: var_shuffle_v4i64_v4i64_xxxx_i64: 94 ; ALL: # %bb.0: 95 ; ALL-NEXT: pushq %rbp 96 ; ALL-NEXT: movq %rsp, %rbp 97 ; ALL-NEXT: andq $-32, %rsp 98 ; ALL-NEXT: subq $64, %rsp 99 ; ALL-NEXT: andl $3, %edi 100 ; ALL-NEXT: andl $3, %esi 101 ; ALL-NEXT: andl $3, %edx 102 ; ALL-NEXT: andl $3, %ecx 103 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 104 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 105 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 106 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 107 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 108 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 109 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] 110 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 111 ; ALL-NEXT: movq %rbp, %rsp 112 ; ALL-NEXT: popq %rbp 113 ; ALL-NEXT: retq 114 %x0 = extractelement <4 x i64> %x, i64 %i0 115 %x1 = extractelement <4 x i64> %x, i64 %i1 116 %x2 = extractelement <4 x i64> %x, i64 %i2 117 %x3 = extractelement <4 x i64> %x, i64 %i3 118 %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 119 %r1 = insertelement <4 x i64> %r0, i64 %x1, i32 1 120 %r2 = insertelement <4 x i64> %r1, i64 %x2, i32 2 121 %r3 = insertelement <4 x i64> %r2, i64 %x3, i32 3 122 ret <4 x i64> %r3 123 } 124 125 define <4 x i64> @var_shuffle_v4i64_v4i64_xx00_i64(<4 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 126 ; ALL-LABEL: var_shuffle_v4i64_v4i64_xx00_i64: 127 ; ALL: # %bb.0: 128 ; ALL-NEXT: pushq %rbp 129 ; ALL-NEXT: movq %rsp, %rbp 130 ; ALL-NEXT: andq $-32, %rsp 131 ; ALL-NEXT: subq $64, %rsp 132 ; ALL-NEXT: andl $3, %edi 133 ; ALL-NEXT: andl $3, %esi 134 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 135 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 136 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 137 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 138 ; ALL-NEXT: movq %rbp, %rsp 139 ; ALL-NEXT: popq %rbp 140 ; ALL-NEXT: retq 141 %x0 = extractelement <4 x i64> %x, i64 %i0 142 %x1 = extractelement <4 x i64> %x, i64 %i1 143 %x2 = extractelement <4 x i64> %x, i64 %i2 144 %x3 = extractelement <4 x i64> %x, i64 %i3 145 %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 146 %r1 = insertelement <4 x i64> %r0, i64 %x1, i32 1 147 %r2 = insertelement <4 x i64> %r1, i64 0, i32 2 148 %r3 = insertelement <4 x i64> %r2, i64 0, i32 3 149 ret <4 x i64> %r3 150 } 151 152 define <4 x i64> @var_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64 %i0, i64 %i1, i64 %i2, i64 %i3) nounwind { 153 ; ALL-LABEL: var_shuffle_v4i64_v2i64_xxxx_i64: 154 ; ALL: # %bb.0: 155 ; ALL-NEXT: andl $1, %edi 156 ; ALL-NEXT: andl $1, %esi 157 ; ALL-NEXT: andl $1, %edx 158 ; ALL-NEXT: andl $1, %ecx 159 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 160 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 161 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 162 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 163 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 164 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 165 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] 166 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 167 ; ALL-NEXT: retq 168 %x0 = extractelement <2 x i64> %x, i64 %i0 169 %x1 = extractelement <2 x i64> %x, i64 %i1 170 %x2 = extractelement <2 x i64> %x, i64 %i2 171 %x3 = extractelement <2 x i64> %x, i64 %i3 172 %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 173 %r1 = insertelement <4 x i64> %r0, i64 %x1, i32 1 174 %r2 = insertelement <4 x i64> %r1, i64 %x2, i32 2 175 %r3 = insertelement <4 x i64> %r2, i64 %x3, i32 3 176 ret <4 x i64> %r3 177 } 178 179 define <8 x float> @var_shuffle_v8f32_v8f32_xxxxxxxx_i32(<8 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind { 180 ; ALL-LABEL: var_shuffle_v8f32_v8f32_xxxxxxxx_i32: 181 ; ALL: # %bb.0: 182 ; ALL-NEXT: pushq %rbp 183 ; ALL-NEXT: movq %rsp, %rbp 184 ; ALL-NEXT: andq $-32, %rsp 185 ; ALL-NEXT: subq $64, %rsp 186 ; ALL-NEXT: # kill: def $r9d killed $r9d def $r9 187 ; ALL-NEXT: # kill: def $r8d killed $r8d def $r8 188 ; ALL-NEXT: # kill: def $ecx killed $ecx def $rcx 189 ; ALL-NEXT: # kill: def $edx killed $edx def $rdx 190 ; ALL-NEXT: # kill: def $esi killed $esi def $rsi 191 ; ALL-NEXT: # kill: def $edi killed $edi def $rdi 192 ; ALL-NEXT: movl 24(%rbp), %r10d 193 ; ALL-NEXT: andl $7, %r10d 194 ; ALL-NEXT: movl 16(%rbp), %eax 195 ; ALL-NEXT: andl $7, %eax 196 ; ALL-NEXT: andl $7, %edi 197 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 198 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 199 ; ALL-NEXT: andl $7, %esi 200 ; ALL-NEXT: andl $7, %edx 201 ; ALL-NEXT: andl $7, %ecx 202 ; ALL-NEXT: andl $7, %r8d 203 ; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 204 ; ALL-NEXT: andl $7, %r9d 205 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 206 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 207 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 208 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 209 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 210 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 211 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 212 ; ALL-NEXT: movq %rbp, %rsp 213 ; ALL-NEXT: popq %rbp 214 ; ALL-NEXT: retq 215 %x0 = extractelement <8 x float> %x, i32 %i0 216 %x1 = extractelement <8 x float> %x, i32 %i1 217 %x2 = extractelement <8 x float> %x, i32 %i2 218 %x3 = extractelement <8 x float> %x, i32 %i3 219 %x4 = extractelement <8 x float> %x, i32 %i4 220 %x5 = extractelement <8 x float> %x, i32 %i5 221 %x6 = extractelement <8 x float> %x, i32 %i6 222 %x7 = extractelement <8 x float> %x, i32 %i7 223 %r0 = insertelement <8 x float> undef, float %x0, i32 0 224 %r1 = insertelement <8 x float> %r0, float %x1, i32 1 225 %r2 = insertelement <8 x float> %r1, float %x2, i32 2 226 %r3 = insertelement <8 x float> %r2, float %x3, i32 3 227 %r4 = insertelement <8 x float> %r3, float %x4, i32 4 228 %r5 = insertelement <8 x float> %r4, float %x5, i32 5 229 %r6 = insertelement <8 x float> %r5, float %x6, i32 6 230 %r7 = insertelement <8 x float> %r6, float %x7, i32 7 231 ret <8 x float> %r7 232 } 233 234 define <8 x float> @var_shuffle_v8f32_v4f32_xxxxxxxx_i32(<4 x float> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7) nounwind { 235 ; ALL-LABEL: var_shuffle_v8f32_v4f32_xxxxxxxx_i32: 236 ; ALL: # %bb.0: 237 ; ALL-NEXT: # kill: def $r9d killed $r9d def $r9 238 ; ALL-NEXT: # kill: def $r8d killed $r8d def $r8 239 ; ALL-NEXT: # kill: def $ecx killed $ecx def $rcx 240 ; ALL-NEXT: # kill: def $edx killed $edx def $rdx 241 ; ALL-NEXT: # kill: def $esi killed $esi def $rsi 242 ; ALL-NEXT: # kill: def $edi killed $edi def $rdi 243 ; ALL-NEXT: movl {{[0-9]+}}(%rsp), %r10d 244 ; ALL-NEXT: andl $3, %r10d 245 ; ALL-NEXT: movl {{[0-9]+}}(%rsp), %eax 246 ; ALL-NEXT: andl $3, %eax 247 ; ALL-NEXT: andl $3, %edi 248 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 249 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero 250 ; ALL-NEXT: andl $3, %esi 251 ; ALL-NEXT: andl $3, %edx 252 ; ALL-NEXT: andl $3, %ecx 253 ; ALL-NEXT: andl $3, %r8d 254 ; ALL-NEXT: vmovss {{.*#+}} xmm1 = mem[0],zero,zero,zero 255 ; ALL-NEXT: andl $3, %r9d 256 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0],mem[0],xmm1[2,3] 257 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1],mem[0],xmm1[3] 258 ; ALL-NEXT: vinsertps {{.*#+}} xmm1 = xmm1[0,1,2],mem[0] 259 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[2,3] 260 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1],mem[0],xmm0[3] 261 ; ALL-NEXT: vinsertps {{.*#+}} xmm0 = xmm0[0,1,2],mem[0] 262 ; ALL-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 263 ; ALL-NEXT: retq 264 %x0 = extractelement <4 x float> %x, i32 %i0 265 %x1 = extractelement <4 x float> %x, i32 %i1 266 %x2 = extractelement <4 x float> %x, i32 %i2 267 %x3 = extractelement <4 x float> %x, i32 %i3 268 %x4 = extractelement <4 x float> %x, i32 %i4 269 %x5 = extractelement <4 x float> %x, i32 %i5 270 %x6 = extractelement <4 x float> %x, i32 %i6 271 %x7 = extractelement <4 x float> %x, i32 %i7 272 %r0 = insertelement <8 x float> undef, float %x0, i32 0 273 %r1 = insertelement <8 x float> %r0, float %x1, i32 1 274 %r2 = insertelement <8 x float> %r1, float %x2, i32 2 275 %r3 = insertelement <8 x float> %r2, float %x3, i32 3 276 %r4 = insertelement <8 x float> %r3, float %x4, i32 4 277 %r5 = insertelement <8 x float> %r4, float %x5, i32 5 278 %r6 = insertelement <8 x float> %r5, float %x6, i32 6 279 %r7 = insertelement <8 x float> %r6, float %x7, i32 7 280 ret <8 x float> %r7 281 } 282 283 define <16 x i16> @var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16(<16 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind { 284 ; AVX1-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16: 285 ; AVX1: # %bb.0: 286 ; AVX1-NEXT: pushq %rbp 287 ; AVX1-NEXT: movq %rsp, %rbp 288 ; AVX1-NEXT: andq $-32, %rsp 289 ; AVX1-NEXT: subq $64, %rsp 290 ; AVX1-NEXT: # kill: def $r9d killed $r9d def $r9 291 ; AVX1-NEXT: # kill: def $r8d killed $r8d def $r8 292 ; AVX1-NEXT: # kill: def $ecx killed $ecx def $rcx 293 ; AVX1-NEXT: # kill: def $edx killed $edx def $rdx 294 ; AVX1-NEXT: # kill: def $esi killed $esi def $rsi 295 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 296 ; AVX1-NEXT: andl $15, %edi 297 ; AVX1-NEXT: vmovaps %ymm0, (%rsp) 298 ; AVX1-NEXT: movzwl (%rsp,%rdi,2), %eax 299 ; AVX1-NEXT: vmovd %eax, %xmm0 300 ; AVX1-NEXT: andl $15, %esi 301 ; AVX1-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0 302 ; AVX1-NEXT: andl $15, %edx 303 ; AVX1-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0 304 ; AVX1-NEXT: andl $15, %ecx 305 ; AVX1-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0 306 ; AVX1-NEXT: andl $15, %r8d 307 ; AVX1-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0 308 ; AVX1-NEXT: andl $15, %r9d 309 ; AVX1-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0 310 ; AVX1-NEXT: movl 16(%rbp), %eax 311 ; AVX1-NEXT: andl $15, %eax 312 ; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0 313 ; AVX1-NEXT: movl 24(%rbp), %eax 314 ; AVX1-NEXT: andl $15, %eax 315 ; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 316 ; AVX1-NEXT: movl 32(%rbp), %eax 317 ; AVX1-NEXT: andl $15, %eax 318 ; AVX1-NEXT: movzwl (%rsp,%rax,2), %eax 319 ; AVX1-NEXT: vmovd %eax, %xmm1 320 ; AVX1-NEXT: movl 40(%rbp), %eax 321 ; AVX1-NEXT: andl $15, %eax 322 ; AVX1-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1 323 ; AVX1-NEXT: movl 48(%rbp), %eax 324 ; AVX1-NEXT: andl $15, %eax 325 ; AVX1-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1 326 ; AVX1-NEXT: movl 56(%rbp), %eax 327 ; AVX1-NEXT: andl $15, %eax 328 ; AVX1-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1 329 ; AVX1-NEXT: movl 64(%rbp), %eax 330 ; AVX1-NEXT: andl $15, %eax 331 ; AVX1-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1 332 ; AVX1-NEXT: movl 72(%rbp), %eax 333 ; AVX1-NEXT: andl $15, %eax 334 ; AVX1-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1 335 ; AVX1-NEXT: movl 80(%rbp), %eax 336 ; AVX1-NEXT: andl $15, %eax 337 ; AVX1-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1 338 ; AVX1-NEXT: movl 88(%rbp), %eax 339 ; AVX1-NEXT: andl $15, %eax 340 ; AVX1-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1 341 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 342 ; AVX1-NEXT: movq %rbp, %rsp 343 ; AVX1-NEXT: popq %rbp 344 ; AVX1-NEXT: retq 345 ; 346 ; AVX2-LABEL: var_shuffle_v16i16_v16i16_xxxxxxxxxxxxxxxx_i16: 347 ; AVX2: # %bb.0: 348 ; AVX2-NEXT: pushq %rbp 349 ; AVX2-NEXT: movq %rsp, %rbp 350 ; AVX2-NEXT: andq $-32, %rsp 351 ; AVX2-NEXT: subq $64, %rsp 352 ; AVX2-NEXT: # kill: def $r9d killed $r9d def $r9 353 ; AVX2-NEXT: # kill: def $r8d killed $r8d def $r8 354 ; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx 355 ; AVX2-NEXT: # kill: def $edx killed $edx def $rdx 356 ; AVX2-NEXT: # kill: def $esi killed $esi def $rsi 357 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 358 ; AVX2-NEXT: andl $15, %edi 359 ; AVX2-NEXT: vmovaps %ymm0, (%rsp) 360 ; AVX2-NEXT: movzwl (%rsp,%rdi,2), %eax 361 ; AVX2-NEXT: vmovd %eax, %xmm0 362 ; AVX2-NEXT: andl $15, %esi 363 ; AVX2-NEXT: vpinsrw $1, (%rsp,%rsi,2), %xmm0, %xmm0 364 ; AVX2-NEXT: andl $15, %edx 365 ; AVX2-NEXT: vpinsrw $2, (%rsp,%rdx,2), %xmm0, %xmm0 366 ; AVX2-NEXT: andl $15, %ecx 367 ; AVX2-NEXT: vpinsrw $3, (%rsp,%rcx,2), %xmm0, %xmm0 368 ; AVX2-NEXT: andl $15, %r8d 369 ; AVX2-NEXT: vpinsrw $4, (%rsp,%r8,2), %xmm0, %xmm0 370 ; AVX2-NEXT: andl $15, %r9d 371 ; AVX2-NEXT: vpinsrw $5, (%rsp,%r9,2), %xmm0, %xmm0 372 ; AVX2-NEXT: movl 16(%rbp), %eax 373 ; AVX2-NEXT: andl $15, %eax 374 ; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm0, %xmm0 375 ; AVX2-NEXT: movl 24(%rbp), %eax 376 ; AVX2-NEXT: andl $15, %eax 377 ; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm0, %xmm0 378 ; AVX2-NEXT: movl 32(%rbp), %eax 379 ; AVX2-NEXT: andl $15, %eax 380 ; AVX2-NEXT: movzwl (%rsp,%rax,2), %eax 381 ; AVX2-NEXT: vmovd %eax, %xmm1 382 ; AVX2-NEXT: movl 40(%rbp), %eax 383 ; AVX2-NEXT: andl $15, %eax 384 ; AVX2-NEXT: vpinsrw $1, (%rsp,%rax,2), %xmm1, %xmm1 385 ; AVX2-NEXT: movl 48(%rbp), %eax 386 ; AVX2-NEXT: andl $15, %eax 387 ; AVX2-NEXT: vpinsrw $2, (%rsp,%rax,2), %xmm1, %xmm1 388 ; AVX2-NEXT: movl 56(%rbp), %eax 389 ; AVX2-NEXT: andl $15, %eax 390 ; AVX2-NEXT: vpinsrw $3, (%rsp,%rax,2), %xmm1, %xmm1 391 ; AVX2-NEXT: movl 64(%rbp), %eax 392 ; AVX2-NEXT: andl $15, %eax 393 ; AVX2-NEXT: vpinsrw $4, (%rsp,%rax,2), %xmm1, %xmm1 394 ; AVX2-NEXT: movl 72(%rbp), %eax 395 ; AVX2-NEXT: andl $15, %eax 396 ; AVX2-NEXT: vpinsrw $5, (%rsp,%rax,2), %xmm1, %xmm1 397 ; AVX2-NEXT: movl 80(%rbp), %eax 398 ; AVX2-NEXT: andl $15, %eax 399 ; AVX2-NEXT: vpinsrw $6, (%rsp,%rax,2), %xmm1, %xmm1 400 ; AVX2-NEXT: movl 88(%rbp), %eax 401 ; AVX2-NEXT: andl $15, %eax 402 ; AVX2-NEXT: vpinsrw $7, (%rsp,%rax,2), %xmm1, %xmm1 403 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 404 ; AVX2-NEXT: movq %rbp, %rsp 405 ; AVX2-NEXT: popq %rbp 406 ; AVX2-NEXT: retq 407 %x0 = extractelement <16 x i16> %x, i32 %i0 408 %x1 = extractelement <16 x i16> %x, i32 %i1 409 %x2 = extractelement <16 x i16> %x, i32 %i2 410 %x3 = extractelement <16 x i16> %x, i32 %i3 411 %x4 = extractelement <16 x i16> %x, i32 %i4 412 %x5 = extractelement <16 x i16> %x, i32 %i5 413 %x6 = extractelement <16 x i16> %x, i32 %i6 414 %x7 = extractelement <16 x i16> %x, i32 %i7 415 %x8 = extractelement <16 x i16> %x, i32 %i8 416 %x9 = extractelement <16 x i16> %x, i32 %i9 417 %x10 = extractelement <16 x i16> %x, i32 %i10 418 %x11 = extractelement <16 x i16> %x, i32 %i11 419 %x12 = extractelement <16 x i16> %x, i32 %i12 420 %x13 = extractelement <16 x i16> %x, i32 %i13 421 %x14 = extractelement <16 x i16> %x, i32 %i14 422 %x15 = extractelement <16 x i16> %x, i32 %i15 423 %r0 = insertelement <16 x i16> undef, i16 %x0 , i32 0 424 %r1 = insertelement <16 x i16> %r0 , i16 %x1 , i32 1 425 %r2 = insertelement <16 x i16> %r1 , i16 %x2 , i32 2 426 %r3 = insertelement <16 x i16> %r2 , i16 %x3 , i32 3 427 %r4 = insertelement <16 x i16> %r3 , i16 %x4 , i32 4 428 %r5 = insertelement <16 x i16> %r4 , i16 %x5 , i32 5 429 %r6 = insertelement <16 x i16> %r5 , i16 %x6 , i32 6 430 %r7 = insertelement <16 x i16> %r6 , i16 %x7 , i32 7 431 %r8 = insertelement <16 x i16> %r7 , i16 %x8 , i32 8 432 %r9 = insertelement <16 x i16> %r8 , i16 %x9 , i32 9 433 %r10 = insertelement <16 x i16> %r9 , i16 %x10, i32 10 434 %r11 = insertelement <16 x i16> %r10, i16 %x11, i32 11 435 %r12 = insertelement <16 x i16> %r11, i16 %x12, i32 12 436 %r13 = insertelement <16 x i16> %r12, i16 %x13, i32 13 437 %r14 = insertelement <16 x i16> %r13, i16 %x14, i32 14 438 %r15 = insertelement <16 x i16> %r14, i16 %x15, i32 15 439 ret <16 x i16> %r15 440 } 441 442 define <16 x i16> @var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16(<8 x i16> %x, i32 %i0, i32 %i1, i32 %i2, i32 %i3, i32 %i4, i32 %i5, i32 %i6, i32 %i7, i32 %i8, i32 %i9, i32 %i10, i32 %i11, i32 %i12, i32 %i13, i32 %i14, i32 %i15) nounwind { 443 ; AVX1-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16: 444 ; AVX1: # %bb.0: 445 ; AVX1-NEXT: # kill: def $r9d killed $r9d def $r9 446 ; AVX1-NEXT: # kill: def $r8d killed $r8d def $r8 447 ; AVX1-NEXT: # kill: def $ecx killed $ecx def $rcx 448 ; AVX1-NEXT: # kill: def $edx killed $edx def $rdx 449 ; AVX1-NEXT: # kill: def $esi killed $esi def $rsi 450 ; AVX1-NEXT: # kill: def $edi killed $edi def $rdi 451 ; AVX1-NEXT: andl $7, %edi 452 ; AVX1-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 453 ; AVX1-NEXT: movzwl -24(%rsp,%rdi,2), %eax 454 ; AVX1-NEXT: vmovd %eax, %xmm0 455 ; AVX1-NEXT: andl $7, %esi 456 ; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0 457 ; AVX1-NEXT: andl $7, %edx 458 ; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0 459 ; AVX1-NEXT: andl $7, %ecx 460 ; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0 461 ; AVX1-NEXT: andl $7, %r8d 462 ; AVX1-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0 463 ; AVX1-NEXT: andl $7, %r9d 464 ; AVX1-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0 465 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 466 ; AVX1-NEXT: andl $7, %eax 467 ; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0 468 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 469 ; AVX1-NEXT: andl $7, %eax 470 ; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0 471 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 472 ; AVX1-NEXT: andl $7, %eax 473 ; AVX1-NEXT: movzwl -24(%rsp,%rax,2), %eax 474 ; AVX1-NEXT: vmovd %eax, %xmm1 475 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 476 ; AVX1-NEXT: andl $7, %eax 477 ; AVX1-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1 478 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 479 ; AVX1-NEXT: andl $7, %eax 480 ; AVX1-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1 481 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 482 ; AVX1-NEXT: andl $7, %eax 483 ; AVX1-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1 484 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 485 ; AVX1-NEXT: andl $7, %eax 486 ; AVX1-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1 487 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 488 ; AVX1-NEXT: andl $7, %eax 489 ; AVX1-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1 490 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 491 ; AVX1-NEXT: andl $7, %eax 492 ; AVX1-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1 493 ; AVX1-NEXT: movl {{[0-9]+}}(%rsp), %eax 494 ; AVX1-NEXT: andl $7, %eax 495 ; AVX1-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1 496 ; AVX1-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0 497 ; AVX1-NEXT: retq 498 ; 499 ; AVX2-LABEL: var_shuffle_v16i16_v8i16_xxxxxxxxxxxxxxxx_i16: 500 ; AVX2: # %bb.0: 501 ; AVX2-NEXT: # kill: def $r9d killed $r9d def $r9 502 ; AVX2-NEXT: # kill: def $r8d killed $r8d def $r8 503 ; AVX2-NEXT: # kill: def $ecx killed $ecx def $rcx 504 ; AVX2-NEXT: # kill: def $edx killed $edx def $rdx 505 ; AVX2-NEXT: # kill: def $esi killed $esi def $rsi 506 ; AVX2-NEXT: # kill: def $edi killed $edi def $rdi 507 ; AVX2-NEXT: andl $7, %edi 508 ; AVX2-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 509 ; AVX2-NEXT: movzwl -24(%rsp,%rdi,2), %eax 510 ; AVX2-NEXT: vmovd %eax, %xmm0 511 ; AVX2-NEXT: andl $7, %esi 512 ; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rsi,2), %xmm0, %xmm0 513 ; AVX2-NEXT: andl $7, %edx 514 ; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rdx,2), %xmm0, %xmm0 515 ; AVX2-NEXT: andl $7, %ecx 516 ; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rcx,2), %xmm0, %xmm0 517 ; AVX2-NEXT: andl $7, %r8d 518 ; AVX2-NEXT: vpinsrw $4, -24(%rsp,%r8,2), %xmm0, %xmm0 519 ; AVX2-NEXT: andl $7, %r9d 520 ; AVX2-NEXT: vpinsrw $5, -24(%rsp,%r9,2), %xmm0, %xmm0 521 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 522 ; AVX2-NEXT: andl $7, %eax 523 ; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm0, %xmm0 524 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 525 ; AVX2-NEXT: andl $7, %eax 526 ; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm0, %xmm0 527 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 528 ; AVX2-NEXT: andl $7, %eax 529 ; AVX2-NEXT: movzwl -24(%rsp,%rax,2), %eax 530 ; AVX2-NEXT: vmovd %eax, %xmm1 531 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 532 ; AVX2-NEXT: andl $7, %eax 533 ; AVX2-NEXT: vpinsrw $1, -24(%rsp,%rax,2), %xmm1, %xmm1 534 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 535 ; AVX2-NEXT: andl $7, %eax 536 ; AVX2-NEXT: vpinsrw $2, -24(%rsp,%rax,2), %xmm1, %xmm1 537 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 538 ; AVX2-NEXT: andl $7, %eax 539 ; AVX2-NEXT: vpinsrw $3, -24(%rsp,%rax,2), %xmm1, %xmm1 540 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 541 ; AVX2-NEXT: andl $7, %eax 542 ; AVX2-NEXT: vpinsrw $4, -24(%rsp,%rax,2), %xmm1, %xmm1 543 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 544 ; AVX2-NEXT: andl $7, %eax 545 ; AVX2-NEXT: vpinsrw $5, -24(%rsp,%rax,2), %xmm1, %xmm1 546 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 547 ; AVX2-NEXT: andl $7, %eax 548 ; AVX2-NEXT: vpinsrw $6, -24(%rsp,%rax,2), %xmm1, %xmm1 549 ; AVX2-NEXT: movl {{[0-9]+}}(%rsp), %eax 550 ; AVX2-NEXT: andl $7, %eax 551 ; AVX2-NEXT: vpinsrw $7, -24(%rsp,%rax,2), %xmm1, %xmm1 552 ; AVX2-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm0 553 ; AVX2-NEXT: retq 554 %x0 = extractelement <8 x i16> %x, i32 %i0 555 %x1 = extractelement <8 x i16> %x, i32 %i1 556 %x2 = extractelement <8 x i16> %x, i32 %i2 557 %x3 = extractelement <8 x i16> %x, i32 %i3 558 %x4 = extractelement <8 x i16> %x, i32 %i4 559 %x5 = extractelement <8 x i16> %x, i32 %i5 560 %x6 = extractelement <8 x i16> %x, i32 %i6 561 %x7 = extractelement <8 x i16> %x, i32 %i7 562 %x8 = extractelement <8 x i16> %x, i32 %i8 563 %x9 = extractelement <8 x i16> %x, i32 %i9 564 %x10 = extractelement <8 x i16> %x, i32 %i10 565 %x11 = extractelement <8 x i16> %x, i32 %i11 566 %x12 = extractelement <8 x i16> %x, i32 %i12 567 %x13 = extractelement <8 x i16> %x, i32 %i13 568 %x14 = extractelement <8 x i16> %x, i32 %i14 569 %x15 = extractelement <8 x i16> %x, i32 %i15 570 %r0 = insertelement <16 x i16> undef, i16 %x0 , i32 0 571 %r1 = insertelement <16 x i16> %r0 , i16 %x1 , i32 1 572 %r2 = insertelement <16 x i16> %r1 , i16 %x2 , i32 2 573 %r3 = insertelement <16 x i16> %r2 , i16 %x3 , i32 3 574 %r4 = insertelement <16 x i16> %r3 , i16 %x4 , i32 4 575 %r5 = insertelement <16 x i16> %r4 , i16 %x5 , i32 5 576 %r6 = insertelement <16 x i16> %r5 , i16 %x6 , i32 6 577 %r7 = insertelement <16 x i16> %r6 , i16 %x7 , i32 7 578 %r8 = insertelement <16 x i16> %r7 , i16 %x8 , i32 8 579 %r9 = insertelement <16 x i16> %r8 , i16 %x9 , i32 9 580 %r10 = insertelement <16 x i16> %r9 , i16 %x10, i32 10 581 %r11 = insertelement <16 x i16> %r10, i16 %x11, i32 11 582 %r12 = insertelement <16 x i16> %r11, i16 %x12, i32 12 583 %r13 = insertelement <16 x i16> %r12, i16 %x13, i32 13 584 %r14 = insertelement <16 x i16> %r13, i16 %x14, i32 14 585 %r15 = insertelement <16 x i16> %r14, i16 %x15, i32 15 586 ret <16 x i16> %r15 587 } 588 589 ; 590 ; Unary shuffle indices from memory 591 ; 592 593 define <4 x i64> @mem_shuffle_v4i64_v4i64_xxxx_i64(<4 x i64> %x, i64* %i) nounwind { 594 ; ALL-LABEL: mem_shuffle_v4i64_v4i64_xxxx_i64: 595 ; ALL: # %bb.0: 596 ; ALL-NEXT: pushq %rbp 597 ; ALL-NEXT: movq %rsp, %rbp 598 ; ALL-NEXT: andq $-32, %rsp 599 ; ALL-NEXT: subq $64, %rsp 600 ; ALL-NEXT: movq (%rdi), %rax 601 ; ALL-NEXT: movq 8(%rdi), %rcx 602 ; ALL-NEXT: andl $3, %eax 603 ; ALL-NEXT: andl $3, %ecx 604 ; ALL-NEXT: movq 16(%rdi), %rdx 605 ; ALL-NEXT: andl $3, %edx 606 ; ALL-NEXT: movq 24(%rdi), %rsi 607 ; ALL-NEXT: andl $3, %esi 608 ; ALL-NEXT: vmovaps %ymm0, (%rsp) 609 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 610 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 611 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 612 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 613 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 614 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] 615 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 616 ; ALL-NEXT: movq %rbp, %rsp 617 ; ALL-NEXT: popq %rbp 618 ; ALL-NEXT: retq 619 %p0 = getelementptr inbounds i64, i64* %i, i32 0 620 %p1 = getelementptr inbounds i64, i64* %i, i32 1 621 %p2 = getelementptr inbounds i64, i64* %i, i32 2 622 %p3 = getelementptr inbounds i64, i64* %i, i32 3 623 %i0 = load i64, i64* %p0, align 4 624 %i1 = load i64, i64* %p1, align 4 625 %i2 = load i64, i64* %p2, align 4 626 %i3 = load i64, i64* %p3, align 4 627 %x0 = extractelement <4 x i64> %x, i64 %i0 628 %x1 = extractelement <4 x i64> %x, i64 %i1 629 %x2 = extractelement <4 x i64> %x, i64 %i2 630 %x3 = extractelement <4 x i64> %x, i64 %i3 631 %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 632 %r1 = insertelement <4 x i64> %r0, i64 %x1, i32 1 633 %r2 = insertelement <4 x i64> %r1, i64 %x2, i32 2 634 %r3 = insertelement <4 x i64> %r2, i64 %x3, i32 3 635 ret <4 x i64> %r3 636 } 637 638 define <4 x i64> @mem_shuffle_v4i64_v2i64_xxxx_i64(<2 x i64> %x, i64* %i) nounwind { 639 ; ALL-LABEL: mem_shuffle_v4i64_v2i64_xxxx_i64: 640 ; ALL: # %bb.0: 641 ; ALL-NEXT: movq (%rdi), %rax 642 ; ALL-NEXT: movq 8(%rdi), %rcx 643 ; ALL-NEXT: andl $1, %eax 644 ; ALL-NEXT: andl $1, %ecx 645 ; ALL-NEXT: movq 16(%rdi), %rdx 646 ; ALL-NEXT: andl $1, %edx 647 ; ALL-NEXT: movq 24(%rdi), %rsi 648 ; ALL-NEXT: andl $1, %esi 649 ; ALL-NEXT: vmovaps %xmm0, -{{[0-9]+}}(%rsp) 650 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero 651 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 652 ; ALL-NEXT: vmovlhps {{.*#+}} xmm0 = xmm1[0],xmm0[0] 653 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero 654 ; ALL-NEXT: vmovsd {{.*#+}} xmm2 = mem[0],zero 655 ; ALL-NEXT: vmovlhps {{.*#+}} xmm1 = xmm2[0],xmm1[0] 656 ; ALL-NEXT: vinsertf128 $1, %xmm0, %ymm1, %ymm0 657 ; ALL-NEXT: retq 658 %p0 = getelementptr inbounds i64, i64* %i, i32 0 659 %p1 = getelementptr inbounds i64, i64* %i, i32 1 660 %p2 = getelementptr inbounds i64, i64* %i, i32 2 661 %p3 = getelementptr inbounds i64, i64* %i, i32 3 662 %i0 = load i64, i64* %p0, align 4 663 %i1 = load i64, i64* %p1, align 4 664 %i2 = load i64, i64* %p2, align 4 665 %i3 = load i64, i64* %p3, align 4 666 %x0 = extractelement <2 x i64> %x, i64 %i0 667 %x1 = extractelement <2 x i64> %x, i64 %i1 668 %x2 = extractelement <2 x i64> %x, i64 %i2 669 %x3 = extractelement <2 x i64> %x, i64 %i3 670 %r0 = insertelement <4 x i64> undef, i64 %x0, i32 0 671 %r1 = insertelement <4 x i64> %r0, i64 %x1, i32 1 672 %r2 = insertelement <4 x i64> %r1, i64 %x2, i32 2 673 %r3 = insertelement <4 x i64> %r2, i64 %x3, i32 3 674 ret <4 x i64> %r3 675 } 676