Home | History | Annotate | Download | only in X86
      1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
      2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64
      4 
      5 ; This test checks combinations of FNEG and FMA intrinsics
      6 
      7 define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c)  {
      8 ; X32-LABEL: test1:
      9 ; X32:       # %bb.0: # %entry
     10 ; X32-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
     11 ; X32-NEXT:    retl
     12 ;
     13 ; X64-LABEL: test1:
     14 ; X64:       # %bb.0: # %entry
     15 ; X64-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
     16 ; X64-NEXT:    retq
     17 entry:
     18   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     19   %0 = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
     20   ret <8 x float> %0
     21 }
     22 
     23 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
     24 
     25 define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
     26 ; X32-LABEL: test2:
     27 ; X32:       # %bb.0: # %entry
     28 ; X32-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
     29 ; X32-NEXT:    retl
     30 ;
     31 ; X64-LABEL: test2:
     32 ; X64:       # %bb.0: # %entry
     33 ; X64-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
     34 ; X64-NEXT:    retq
     35 entry:
     36   %0 = tail call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
     37   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     38   ret <4 x float> %sub.i
     39 }
     40 
     41 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c)
     42 
     43 define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c)  {
     44 ; X32-LABEL: test3:
     45 ; X32:       # %bb.0: # %entry
     46 ; X32-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
     47 ; X32-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
     48 ; X32-NEXT:    vxorps %xmm1, %xmm0, %xmm0
     49 ; X32-NEXT:    retl
     50 ;
     51 ; X64-LABEL: test3:
     52 ; X64:       # %bb.0: # %entry
     53 ; X64-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
     54 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0,-0,-0,-0]
     55 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm0
     56 ; X64-NEXT:    retq
     57 entry:
     58   %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
     59   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     60   ret <4 x float> %sub.i
     61 }
     62 
     63 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
     64 
     65 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
     66 ; X32-LABEL: test4:
     67 ; X32:       # %bb.0: # %entry
     68 ; X32-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
     69 ; X32-NEXT:    retl
     70 ;
     71 ; X64-LABEL: test4:
     72 ; X64:       # %bb.0: # %entry
     73 ; X64-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
     74 ; X64-NEXT:    retq
     75 entry:
     76   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
     77   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
     78   ret <8 x float> %sub.i
     79 }
     80 
     81 define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
     82 ; X32-LABEL: test5:
     83 ; X32:       # %bb.0: # %entry
     84 ; X32-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
     85 ; X32-NEXT:    retl
     86 ;
     87 ; X64-LABEL: test5:
     88 ; X64:       # %bb.0: # %entry
     89 ; X64-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
     90 ; X64-NEXT:    retq
     91 entry:
     92   %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
     93   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
     94   ret <8 x float> %0
     95 }
     96 
     97 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
     98 
     99 
    100 define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
    101 ; X32-LABEL: test6:
    102 ; X32:       # %bb.0: # %entry
    103 ; X32-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
    104 ; X32-NEXT:    retl
    105 ;
    106 ; X64-LABEL: test6:
    107 ; X64:       # %bb.0: # %entry
    108 ; X64-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
    109 ; X64-NEXT:    retq
    110 entry:
    111   %0 = tail call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
    112   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0
    113   ret <2 x double> %sub.i
    114 }
    115 
    116 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
    117 
    118 define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c)  {
    119 ; X32-LABEL: test7:
    120 ; X32:       # %bb.0: # %entry
    121 ; X32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    122 ; X32-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
    123 ; X32-NEXT:    vsubps %ymm2, %ymm3, %ymm2
    124 ; X32-NEXT:    vbroadcastss %xmm2, %ymm2
    125 ; X32-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
    126 ; X32-NEXT:    retl
    127 ;
    128 ; X64-LABEL: test7:
    129 ; X64:       # %bb.0: # %entry
    130 ; X64-NEXT:    # kill: def $xmm0 killed $xmm0 def $ymm0
    131 ; X64-NEXT:    vmovss {{.*#+}} xmm3 = mem[0],zero,zero,zero
    132 ; X64-NEXT:    vsubps %ymm0, %ymm3, %ymm0
    133 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    134 ; X64-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    135 ; X64-NEXT:    retq
    136 entry:
    137   %0 = insertelement <8 x float> undef, float %a, i32 0
    138   %1 = fsub <8 x float> <float -0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %0
    139   %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer
    140   %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c)
    141   ret <8 x float> %3
    142 
    143 }
    144 
    145 define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c)  {
    146 ; X32-LABEL: test8:
    147 ; X32:       # %bb.0: # %entry
    148 ; X32-NEXT:    vmovss {{.*#+}} xmm2 = mem[0],zero,zero,zero
    149 ; X32-NEXT:    vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
    150 ; X32-NEXT:    vxorps %xmm3, %xmm2, %xmm2
    151 ; X32-NEXT:    vbroadcastss %xmm2, %ymm2
    152 ; X32-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm2 * ymm0) + ymm1
    153 ; X32-NEXT:    retl
    154 ;
    155 ; X64-LABEL: test8:
    156 ; X64:       # %bb.0: # %entry
    157 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm3 = [-0,-0,-0,-0]
    158 ; X64-NEXT:    vxorps %xmm3, %xmm0, %xmm0
    159 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
    160 ; X64-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
    161 ; X64-NEXT:    retq
    162 entry:
    163   %0 = fsub float -0.0, %a
    164   %1 = insertelement <8 x float> undef, float %0, i32 0
    165   %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer
    166   %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c)
    167   ret <8 x float> %3
    168 }
    169 
    170 declare <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)
    171