1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s 3 4 define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) { 5 ; CHECK-LABEL: pluto: 6 ; CHECK: # %bb.0: # %bb 7 ; CHECK-NEXT: pushq %rbp 8 ; CHECK-NEXT: .cfi_def_cfa_offset 16 9 ; CHECK-NEXT: .cfi_offset %rbp, -16 10 ; CHECK-NEXT: movq %rsp, %rbp 11 ; CHECK-NEXT: .cfi_def_cfa_register %rbp 12 ; CHECK-NEXT: andq $-32, %rsp 13 ; CHECK-NEXT: subq $320, %rsp # imm = 0x140 14 ; CHECK-NEXT: vmovaps 240(%rbp), %ymm8 15 ; CHECK-NEXT: vmovaps 208(%rbp), %ymm9 16 ; CHECK-NEXT: vmovaps 176(%rbp), %ymm10 17 ; CHECK-NEXT: vmovaps 144(%rbp), %ymm11 18 ; CHECK-NEXT: vmovaps 112(%rbp), %ymm12 19 ; CHECK-NEXT: vmovaps 80(%rbp), %ymm13 20 ; CHECK-NEXT: vmovaps 48(%rbp), %ymm14 21 ; CHECK-NEXT: vmovaps 16(%rbp), %ymm15 22 ; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7] 23 ; CHECK-NEXT: vxorps %xmm6, %xmm6, %xmm6 24 ; CHECK-NEXT: vpblendd {{.*#+}} ymm8 = ymm6[0,1],ymm8[2,3,4,5,6,7] 25 ; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7] 26 ; CHECK-NEXT: # kill: def $xmm9 killed $xmm9 killed $ymm9 27 ; CHECK-NEXT: vmovdqa %xmm9, %xmm11 28 ; CHECK-NEXT: # kill: def $ymm11 killed $xmm11 29 ; CHECK-NEXT: vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23] 30 ; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0] 31 ; CHECK-NEXT: vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill 32 ; CHECK-NEXT: # implicit-def: $ymm0 33 ; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm0, %ymm0 34 ; CHECK-NEXT: vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7] 35 ; CHECK-NEXT: vmovaps %xmm2, %xmm9 36 ; CHECK-NEXT: # implicit-def: $ymm2 37 ; CHECK-NEXT: vinserti128 $1, %xmm9, %ymm2, %ymm2 38 ; CHECK-NEXT: vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm8[0],ymm7[2],ymm8[2] 39 ; CHECK-NEXT: vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3] 40 ; CHECK-NEXT: vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7] 41 ; CHECK-NEXT: vmovaps %xmm7, %xmm9 42 ; CHECK-NEXT: vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7] 43 ; CHECK-NEXT: # implicit-def: $ymm6 44 ; CHECK-NEXT: vmovaps %xmm9, %xmm6 45 ; CHECK-NEXT: vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23] 46 ; CHECK-NEXT: vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3] 47 ; CHECK-NEXT: vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7] 48 ; CHECK-NEXT: vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7] 49 ; CHECK-NEXT: vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3] 50 ; CHECK-NEXT: vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5] 51 ; CHECK-NEXT: vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7] 52 ; CHECK-NEXT: vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill 53 ; CHECK-NEXT: vmovaps %ymm5, %ymm1 54 ; CHECK-NEXT: vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill 55 ; CHECK-NEXT: vmovaps %ymm6, %ymm3 56 ; CHECK-NEXT: vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill 57 ; CHECK-NEXT: vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill 58 ; CHECK-NEXT: vmovaps %ymm13, {{[0-9]+}}(%rsp) # 32-byte Spill 59 ; CHECK-NEXT: vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill 60 ; CHECK-NEXT: vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill 61 ; CHECK-NEXT: vmovaps %ymm14, (%rsp) # 32-byte Spill 62 ; CHECK-NEXT: movq %rbp, %rsp 63 ; CHECK-NEXT: popq %rbp 64 ; CHECK-NEXT: .cfi_def_cfa %rsp, 8 65 ; CHECK-NEXT: retq 66 bb: 67 %tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1 68 %tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer 69 %tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5 70 %tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6> 71 ret <16 x i64> %tmp7 72 } 73