Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-linux-gnu -mattr=+avx2 | FileCheck %s --check-prefix=X64
      4 
      5 define void @f(<8 x float> %A, i8* %B, <4 x double> %C, <4 x i64> %E, <8 x i32> %F, <16 x i16> %G, <32 x i8> %H) nounwind {
      6 ; X32-LABEL: f:
      7 ; X32:       # BB#0:
      8 ; X32-NEXT:    pushl %ebp
      9 ; X32-NEXT:    movl %esp, %ebp
     10 ; X32-NEXT:    andl $-32, %esp
     11 ; X32-NEXT:    subl $32, %esp
     12 ; X32-NEXT:    vmovdqa 104(%ebp), %ymm3
     13 ; X32-NEXT:    vmovdqa 72(%ebp), %ymm4
     14 ; X32-NEXT:    vmovdqa 40(%ebp), %ymm5
     15 ; X32-NEXT:    movl 8(%ebp), %eax
     16 ; X32-NEXT:    vaddps .LCPI0_0, %ymm0, %ymm0
     17 ; X32-NEXT:    vmovntps %ymm0, (%eax)
     18 ; X32-NEXT:    vpaddq .LCPI0_1, %ymm2, %ymm0
     19 ; X32-NEXT:    vmovntdq %ymm0, (%eax)
     20 ; X32-NEXT:    vaddpd .LCPI0_2, %ymm1, %ymm0
     21 ; X32-NEXT:    vmovntpd %ymm0, (%eax)
     22 ; X32-NEXT:    vpaddd .LCPI0_3, %ymm5, %ymm0
     23 ; X32-NEXT:    vmovntdq %ymm0, (%eax)
     24 ; X32-NEXT:    vpaddw .LCPI0_4, %ymm4, %ymm0
     25 ; X32-NEXT:    vmovntdq %ymm0, (%eax)
     26 ; X32-NEXT:    vpaddb .LCPI0_5, %ymm3, %ymm0
     27 ; X32-NEXT:    vmovntdq %ymm0, (%eax)
     28 ; X32-NEXT:    movl %ebp, %esp
     29 ; X32-NEXT:    popl %ebp
     30 ; X32-NEXT:    vzeroupper
     31 ; X32-NEXT:    retl
     32 ;
     33 ; X64-LABEL: f:
     34 ; X64:       # BB#0:
     35 ; X64-NEXT:    vaddps {{.*}}(%rip), %ymm0, %ymm0
     36 ; X64-NEXT:    vmovntps %ymm0, (%rdi)
     37 ; X64-NEXT:    vpaddq {{.*}}(%rip), %ymm2, %ymm0
     38 ; X64-NEXT:    vmovntdq %ymm0, (%rdi)
     39 ; X64-NEXT:    vaddpd {{.*}}(%rip), %ymm1, %ymm0
     40 ; X64-NEXT:    vmovntpd %ymm0, (%rdi)
     41 ; X64-NEXT:    vpaddd {{.*}}(%rip), %ymm3, %ymm0
     42 ; X64-NEXT:    vmovntdq %ymm0, (%rdi)
     43 ; X64-NEXT:    vpaddw {{.*}}(%rip), %ymm4, %ymm0
     44 ; X64-NEXT:    vmovntdq %ymm0, (%rdi)
     45 ; X64-NEXT:    vpaddb {{.*}}(%rip), %ymm5, %ymm0
     46 ; X64-NEXT:    vmovntdq %ymm0, (%rdi)
     47 ; X64-NEXT:    vzeroupper
     48 ; X64-NEXT:    retq
     49   %cast = bitcast i8* %B to <8 x float>*
     50   %A2 = fadd <8 x float> %A, <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0>
     51   store <8 x float> %A2, <8 x float>* %cast, align 32, !nontemporal !0
     52   %cast1 = bitcast i8* %B to <4 x i64>*
     53   %E2 = add <4 x i64> %E, <i64 1, i64 2, i64 3, i64 4>
     54   store <4 x i64> %E2, <4 x i64>* %cast1, align 32, !nontemporal !0
     55   %cast2 = bitcast i8* %B to <4 x double>*
     56   %C2 = fadd <4 x double> %C, <double 1.0, double 2.0, double 3.0, double 4.0>
     57   store <4 x double> %C2, <4 x double>* %cast2, align 32, !nontemporal !0
     58   %cast3 = bitcast i8* %B to <8 x i32>*
     59   %F2 = add <8 x i32> %F, <i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8>
     60   store <8 x i32> %F2, <8 x i32>* %cast3, align 32, !nontemporal !0
     61   %cast4 = bitcast i8* %B to <16 x i16>*
     62   %G2 = add <16 x i16> %G, <i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8>
     63   store <16 x i16> %G2, <16 x i16>* %cast4, align 32, !nontemporal !0
     64   %cast5 = bitcast i8* %B to <32 x i8>*
     65   %H2 = add <32 x i8> %H, <i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8>
     66   store <32 x i8> %H2, <32 x i8>* %cast5, align 32, !nontemporal !0
     67   ret void
     68 }
     69 
     70 !0 = !{i32 1}
     71