1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=sse2 -recip=!divf,!vec-divf | FileCheck %s --check-prefix=NORECIP 2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf,vec-divf | FileCheck %s --check-prefix=RECIP 3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=avx -recip=divf:2,vec-divf:2 | FileCheck %s --check-prefix=REFINE 4 5 ; If the target's divss/divps instructions are substantially 6 ; slower than rcpss/rcpps with a Newton-Raphson refinement, 7 ; we should generate the estimate sequence. 8 9 ; See PR21385 ( http://llvm.org/bugs/show_bug.cgi?id=21385 ) 10 ; for details about the accuracy, speed, and implementation 11 ; differences of x86 reciprocal estimates. 12 13 define float @reciprocal_estimate(float %x) #0 { 14 %div = fdiv fast float 1.0, %x 15 ret float %div 16 17 ; NORECIP-LABEL: reciprocal_estimate: 18 ; NORECIP: movss 19 ; NORECIP-NEXT: divss 20 ; NORECIP-NEXT: movaps 21 ; NORECIP-NEXT: retq 22 23 ; RECIP-LABEL: reciprocal_estimate: 24 ; RECIP: vrcpss 25 ; RECIP: vmulss 26 ; RECIP: vsubss 27 ; RECIP: vmulss 28 ; RECIP: vaddss 29 ; RECIP-NEXT: retq 30 31 ; REFINE-LABEL: reciprocal_estimate: 32 ; REFINE: vrcpss 33 ; REFINE: vmulss 34 ; REFINE: vsubss 35 ; REFINE: vmulss 36 ; REFINE: vaddss 37 ; REFINE: vmulss 38 ; REFINE: vsubss 39 ; REFINE: vmulss 40 ; REFINE: vaddss 41 ; REFINE-NEXT: retq 42 } 43 44 define <4 x float> @reciprocal_estimate_v4f32(<4 x float> %x) #0 { 45 %div = fdiv fast <4 x float> <float 1.0, float 1.0, float 1.0, float 1.0>, %x 46 ret <4 x float> %div 47 48 ; NORECIP-LABEL: reciprocal_estimate_v4f32: 49 ; NORECIP: movaps 50 ; NORECIP-NEXT: divps 51 ; NORECIP-NEXT: movaps 52 ; NORECIP-NEXT: retq 53 54 ; RECIP-LABEL: reciprocal_estimate_v4f32: 55 ; RECIP: vrcpps 56 ; RECIP: vmulps 57 ; RECIP: vsubps 58 ; RECIP: vmulps 59 ; RECIP: vaddps 60 ; RECIP-NEXT: retq 61 62 ; REFINE-LABEL: reciprocal_estimate_v4f32: 63 ; REFINE: vrcpps 64 ; REFINE: vmulps 65 ; REFINE: vsubps 66 ; REFINE: vmulps 67 ; REFINE: vaddps 68 ; REFINE: vmulps 69 ; REFINE: vsubps 70 ; REFINE: vmulps 71 ; REFINE: vaddps 72 ; REFINE-NEXT: retq 73 } 74 75 define <8 x float> @reciprocal_estimate_v8f32(<8 x float> %x) #0 { 76 %div = fdiv fast <8 x float> <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>, %x 77 ret <8 x float> %div 78 79 ; NORECIP-LABEL: reciprocal_estimate_v8f32: 80 ; NORECIP: movaps 81 ; NORECIP: movaps 82 ; NORECIP-NEXT: divps 83 ; NORECIP-NEXT: divps 84 ; NORECIP-NEXT: movaps 85 ; NORECIP-NEXT: movaps 86 ; NORECIP-NEXT: retq 87 88 ; RECIP-LABEL: reciprocal_estimate_v8f32: 89 ; RECIP: vrcpps 90 ; RECIP: vmulps 91 ; RECIP: vsubps 92 ; RECIP: vmulps 93 ; RECIP: vaddps 94 ; RECIP-NEXT: retq 95 96 ; REFINE-LABEL: reciprocal_estimate_v8f32: 97 ; REFINE: vrcpps 98 ; REFINE: vmulps 99 ; REFINE: vsubps 100 ; REFINE: vmulps 101 ; REFINE: vaddps 102 ; REFINE: vmulps 103 ; REFINE: vsubps 104 ; REFINE: vmulps 105 ; REFINE: vaddps 106 ; REFINE-NEXT: retq 107 } 108 109 attributes #0 = { "unsafe-fp-math"="true" } 110