Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx2 -O0 | FileCheck %s
      3 
      4 define <16 x i64> @pluto(<16 x i64> %arg, <16 x i64> %arg1, <16 x i64> %arg2, <16 x i64> %arg3, <16 x i64> %arg4) {
      5 ; CHECK-LABEL: pluto:
      6 ; CHECK:       # %bb.0: # %bb
      7 ; CHECK-NEXT:    pushq %rbp
      8 ; CHECK-NEXT:    .cfi_def_cfa_offset 16
      9 ; CHECK-NEXT:    .cfi_offset %rbp, -16
     10 ; CHECK-NEXT:    movq %rsp, %rbp
     11 ; CHECK-NEXT:    .cfi_def_cfa_register %rbp
     12 ; CHECK-NEXT:    andq $-32, %rsp
     13 ; CHECK-NEXT:    subq $320, %rsp # imm = 0x140
     14 ; CHECK-NEXT:    vmovaps 240(%rbp), %ymm8
     15 ; CHECK-NEXT:    vmovaps 208(%rbp), %ymm9
     16 ; CHECK-NEXT:    vmovaps 176(%rbp), %ymm10
     17 ; CHECK-NEXT:    vmovaps 144(%rbp), %ymm11
     18 ; CHECK-NEXT:    vmovaps 112(%rbp), %ymm12
     19 ; CHECK-NEXT:    vmovaps 80(%rbp), %ymm13
     20 ; CHECK-NEXT:    vmovaps 48(%rbp), %ymm14
     21 ; CHECK-NEXT:    vmovaps 16(%rbp), %ymm15
     22 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
     23 ; CHECK-NEXT:    vxorps %xmm6, %xmm6, %xmm6
     24 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm8 = ymm6[0,1],ymm8[2,3,4,5,6,7]
     25 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm6 = ymm6[0,1],ymm11[2,3,4,5,6,7]
     26 ; CHECK-NEXT:    # kill: def $xmm9 killed $xmm9 killed $ymm9
     27 ; CHECK-NEXT:    vmovdqa %xmm9, %xmm11
     28 ; CHECK-NEXT:    # kill: def $ymm11 killed $xmm11
     29 ; CHECK-NEXT:    vpalignr {{.*#+}} ymm6 = ymm2[8,9,10,11,12,13,14,15],ymm6[0,1,2,3,4,5,6,7],ymm2[24,25,26,27,28,29,30,31],ymm6[16,17,18,19,20,21,22,23]
     30 ; CHECK-NEXT:    vpermq {{.*#+}} ymm6 = ymm6[2,3,2,0]
     31 ; CHECK-NEXT:    vmovaps %ymm0, {{[0-9]+}}(%rsp) # 32-byte Spill
     32 ; CHECK-NEXT:    # implicit-def: $ymm0
     33 ; CHECK-NEXT:    vinserti128 $1, %xmm9, %ymm0, %ymm0
     34 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm0 = ymm6[0,1,2,3],ymm0[4,5],ymm6[6,7]
     35 ; CHECK-NEXT:    vmovaps %xmm2, %xmm9
     36 ; CHECK-NEXT:    # implicit-def: $ymm2
     37 ; CHECK-NEXT:    vinserti128 $1, %xmm9, %ymm2, %ymm2
     38 ; CHECK-NEXT:    vpunpcklqdq {{.*#+}} ymm6 = ymm7[0],ymm8[0],ymm7[2],ymm8[2]
     39 ; CHECK-NEXT:    vpermq {{.*#+}} ymm6 = ymm6[2,1,2,3]
     40 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm2 = ymm6[0,1,2,3],ymm2[4,5,6,7]
     41 ; CHECK-NEXT:    vmovaps %xmm7, %xmm9
     42 ; CHECK-NEXT:    vpslldq {{.*#+}} xmm9 = zero,zero,zero,zero,zero,zero,zero,zero,xmm9[0,1,2,3,4,5,6,7]
     43 ; CHECK-NEXT:    # implicit-def: $ymm6
     44 ; CHECK-NEXT:    vmovaps %xmm9, %xmm6
     45 ; CHECK-NEXT:    vpalignr {{.*#+}} ymm11 = ymm11[8,9,10,11,12,13,14,15],ymm5[0,1,2,3,4,5,6,7],ymm11[24,25,26,27,28,29,30,31],ymm5[16,17,18,19,20,21,22,23]
     46 ; CHECK-NEXT:    vpermq {{.*#+}} ymm11 = ymm11[0,1,0,3]
     47 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm6 = ymm6[0,1,2,3],ymm11[4,5,6,7]
     48 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm7 = ymm7[0,1],ymm8[2,3],ymm7[4,5,6,7]
     49 ; CHECK-NEXT:    vpermq {{.*#+}} ymm7 = ymm7[2,1,1,3]
     50 ; CHECK-NEXT:    vpshufd {{.*#+}} ymm5 = ymm5[0,1,0,1,4,5,4,5]
     51 ; CHECK-NEXT:    vpblendd {{.*#+}} ymm5 = ymm7[0,1,2,3,4,5],ymm5[6,7]
     52 ; CHECK-NEXT:    vmovaps %ymm1, {{[0-9]+}}(%rsp) # 32-byte Spill
     53 ; CHECK-NEXT:    vmovaps %ymm5, %ymm1
     54 ; CHECK-NEXT:    vmovaps %ymm3, {{[0-9]+}}(%rsp) # 32-byte Spill
     55 ; CHECK-NEXT:    vmovaps %ymm6, %ymm3
     56 ; CHECK-NEXT:    vmovaps %ymm15, {{[0-9]+}}(%rsp) # 32-byte Spill
     57 ; CHECK-NEXT:    vmovaps %ymm10, {{[0-9]+}}(%rsp) # 32-byte Spill
     58 ; CHECK-NEXT:    vmovaps %ymm13, {{[0-9]+}}(%rsp) # 32-byte Spill
     59 ; CHECK-NEXT:    vmovaps %ymm12, {{[0-9]+}}(%rsp) # 32-byte Spill
     60 ; CHECK-NEXT:    vmovaps %ymm4, {{[0-9]+}}(%rsp) # 32-byte Spill
     61 ; CHECK-NEXT:    vmovaps %ymm14, (%rsp) # 32-byte Spill
     62 ; CHECK-NEXT:    movq %rbp, %rsp
     63 ; CHECK-NEXT:    popq %rbp
     64 ; CHECK-NEXT:    .cfi_def_cfa %rsp, 8
     65 ; CHECK-NEXT:    retq
     66 bb:
     67   %tmp = select <16 x i1> <i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg, <16 x i64> %arg1
     68   %tmp5 = select <16 x i1> <i1 true, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 true, i1 false, i1 true, i1 false, i1 false, i1 false, i1 false, i1 false, i1 false>, <16 x i64> %arg2, <16 x i64> zeroinitializer
     69   %tmp6 = select <16 x i1> <i1 false, i1 true, i1 true, i1 true, i1 false, i1 false, i1 false, i1 false, i1 true, i1 true, i1 false, i1 false, i1 false, i1 true, i1 true, i1 true>, <16 x i64> %arg3, <16 x i64> %tmp5
     70   %tmp7 = shufflevector <16 x i64> %tmp, <16 x i64> %tmp6, <16 x i32> <i32 11, i32 18, i32 24, i32 9, i32 14, i32 29, i32 29, i32 6, i32 14, i32 28, i32 8, i32 9, i32 22, i32 12, i32 25, i32 6>
     71   ret <16 x i64> %tmp7
     72 }
     73