Home | History | Annotate | Download | only in X86
      1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP
      2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP
      3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE
      4 
      5 ; If the target's divss/divps instructions are substantially
      6 ; slower than rcpss/rcpps with a Newton-Raphson refinement,
      7 ; we should generate the estimate sequence.
      8 
      9 ; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 )
     10 ; for details about the accuracy, speed, and implementation
     11 ; differences of x86 reciprocal estimates.
     12 
     13 define float @reciprocal_estimate(float %x) #0 {
     14   %div = fdiv fast float 1.0, %x
     15   ret float %div
     16 
     17 ; NORECIP-LABEL: reciprocal_estimate:
     18 ; NORECIP: movss
     19 ; NORECIP-NEXT: divss
     20 ; NORECIP-NEXT: movaps
     21 ; NORECIP-NEXT: retq
     22 
     23 ; RECIP-LABEL: reciprocal_estimate:
     24 ; RECIP: vrcpss
     25 ; RECIP: vmulss
     26 ; RECIP: vsubss
     27 ; RECIP: vmulss
     28 ; RECIP: vaddss
     29 ; RECIP-NEXT: retq
     30 
     31 ; REFINE-LABEL: reciprocal_estimate:
     32 ; REFINE: vrcpss
     33 ; REFINE: vmulss
     34 ; REFINE: vsubss
     35 ; REFINE: vmulss
     36 ; REFINE: vaddss
     37 ; REFINE: vmulss
     38 ; REFINE: vsubss
     39 ; REFINE: vmulss
     40 ; REFINE: vaddss
     41 ; REFINE-NEXT: retq
     42 }
     43 
     44 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 {
     45   %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x
     46   ret <4 x float> %div
     47 
     48 ; NORECIP-LABEL: reciprocal_estimate_v4f32:
     49 ; NORECIP: movaps
     50 ; NORECIP-NEXT: divps
     51 ; NORECIP-NEXT: movaps
     52 ; NORECIP-NEXT: retq
     53 
     54 ; RECIP-LABEL: reciprocal_estimate_v4f32:
     55 ; RECIP: vrcpps
     56 ; RECIP: vmulps
     57 ; RECIP: vsubps
     58 ; RECIP: vmulps
     59 ; RECIP: vaddps
     60 ; RECIP-NEXT: retq
     61 
     62 ; REFINE-LABEL: reciprocal_estimate_v4f32:
     63 ; REFINE: vrcpps
     64 ; REFINE: vmulps
     65 ; REFINE: vsubps
     66 ; REFINE: vmulps
     67 ; REFINE: vaddps
     68 ; REFINE: vmulps
     69 ; REFINE: vsubps
     70 ; REFINE: vmulps
     71 ; REFINE: vaddps
     72 ; REFINE-NEXT: retq
     73 }
     74 
     75 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 {
     76   %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x
     77   ret <8 x float> %div
     78 
     79 ; NORECIP-LABEL: reciprocal_estimate_v8f32:
     80 ; NORECIP: movaps
     81 ; NORECIP: movaps
     82 ; NORECIP-NEXT: divps
     83 ; NORECIP-NEXT: divps
     84 ; NORECIP-NEXT: movaps
     85 ; NORECIP-NEXT: movaps
     86 ; NORECIP-NEXT: retq
     87 
     88 ; RECIP-LABEL: reciprocal_estimate_v8f32:
     89 ; RECIP: vrcpps
     90 ; RECIP: vmulps
     91 ; RECIP: vsubps
     92 ; RECIP: vmulps
     93 ; RECIP: vaddps
     94 ; RECIP-NEXT: retq
     95 
     96 ; REFINE-LABEL: reciprocal_estimate_v8f32:
     97 ; REFINE: vrcpps
     98 ; REFINE: vmulps
     99 ; REFINE: vsubps
    100 ; REFINE: vmulps
    101 ; REFINE: vaddps
    102 ; REFINE: vmulps
    103 ; REFINE: vsubps
    104 ; REFINE: vmulps
    105 ; REFINE: vaddps
    106 ; REFINE-NEXT: retq
    107 }
    108 
    109 attributes #0 = { "unsafe-fp-math"="true" }
    110