1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 3 declare float @llvm.amdgcn.rcp.f32(float) #0 4 declare double @llvm.amdgcn.rcp.f64(double) #0 5 6 declare double @llvm.sqrt.f64(double) #0 7 declare float @llvm.sqrt.f32(float) #0 8 9 ; FUNC-LABEL: {{^}}rcp_undef_f32: 10 ; SI-NOT: v_rcp_f32 11 define amdgpu_kernel void @rcp_undef_f32(float addrspace(1)* %out) #1 { 12 %rcp = call float @llvm.amdgcn.rcp.f32(float undef) 13 store float %rcp, float addrspace(1)* %out, align 4 14 ret void 15 } 16 17 ; FUNC-LABEL: {{^}}rcp_2_f32: 18 ; SI-NOT: v_rcp_f32 19 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0.5 20 define amdgpu_kernel void @rcp_2_f32(float addrspace(1)* %out) #1 { 21 %rcp = call float @llvm.amdgcn.rcp.f32(float 2.0) 22 store float %rcp, float addrspace(1)* %out, align 4 23 ret void 24 } 25 26 ; FUNC-LABEL: {{^}}rcp_10_f32: 27 ; SI-NOT: v_rcp_f32 28 ; SI: v_mov_b32_e32 v{{[0-9]+}}, 0x3dcccccd 29 define amdgpu_kernel void @rcp_10_f32(float addrspace(1)* %out) #1 { 30 %rcp = call float @llvm.amdgcn.rcp.f32(float 10.0) 31 store float %rcp, float addrspace(1)* %out, align 4 32 ret void 33 } 34 35 ; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32: 36 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 37 ; SI-NOT: [[RESULT]] 38 ; SI: buffer_store_dword [[RESULT]] 39 define amdgpu_kernel void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 { 40 %rcp = fdiv float 1.0, %src 41 store float %rcp, float addrspace(1)* %out, align 4 42 ret void 43 } 44 45 ; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32: 46 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}} 47 ; SI-NOT: [[RESULT]] 48 ; SI: buffer_store_dword [[RESULT]] 49 define amdgpu_kernel void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 { 50 %rcp = fdiv float 1.0, %src 51 store float %rcp, float addrspace(1)* %out, align 4 52 ret void 53 } 54 55 ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32: 56 ; SI: v_div_scale_f32 57 define amdgpu_kernel void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 { 58 %rcp = fdiv float 1.0, %src 59 store float %rcp, float addrspace(1)* %out, align 4 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32: 64 ; SI: v_sqrt_f32_e32 65 ; SI: v_rcp_f32_e32 66 define amdgpu_kernel void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 { 67 %sqrt = call float @llvm.sqrt.f32(float %src) 68 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 69 store float %rcp, float addrspace(1)* %out, align 4 70 ret void 71 } 72 73 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32: 74 ; SI: v_rsq_f32_e32 75 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 { 76 %sqrt = call float @llvm.sqrt.f32(float %src) 77 %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt) 78 store float %rcp, float addrspace(1)* %out, align 4 79 ret void 80 } 81 82 ; FUNC-LABEL: {{^}}rcp_f64: 83 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 84 ; SI-NOT: [[RESULT]] 85 ; SI: buffer_store_dwordx2 [[RESULT]] 86 define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double %src) #1 { 87 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 88 store double %rcp, double addrspace(1)* %out, align 8 89 ret void 90 } 91 92 ; FUNC-LABEL: {{^}}unsafe_rcp_f64: 93 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 94 ; SI-NOT: [[RESULT]] 95 ; SI: buffer_store_dwordx2 [[RESULT]] 96 define amdgpu_kernel void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 { 97 %rcp = call double @llvm.amdgcn.rcp.f64(double %src) 98 store double %rcp, double addrspace(1)* %out, align 8 99 ret void 100 } 101 102 ; FUNC-LABEL: {{^}}rcp_pat_f64: 103 ; SI: v_div_scale_f64 104 define amdgpu_kernel void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 105 %rcp = fdiv double 1.0, %src 106 store double %rcp, double addrspace(1)* %out, align 8 107 ret void 108 } 109 110 ; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64: 111 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 112 ; SI-NOT: [[RESULT]] 113 ; SI: buffer_store_dwordx2 [[RESULT]] 114 define amdgpu_kernel void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 115 %rcp = fdiv double 1.0, %src 116 store double %rcp, double addrspace(1)* %out, align 8 117 ret void 118 } 119 120 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64: 121 ; SI-NOT: v_rsq_f64_e32 122 ; SI: v_sqrt_f64 123 ; SI: v_rcp_f64 124 define amdgpu_kernel void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 { 125 %sqrt = call double @llvm.sqrt.f64(double %src) 126 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 127 store double %rcp, double addrspace(1)* %out, align 8 128 ret void 129 } 130 131 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64: 132 ; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}} 133 ; SI-NOT: [[RESULT]] 134 ; SI: buffer_store_dwordx2 [[RESULT]] 135 define amdgpu_kernel void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 { 136 %sqrt = call double @llvm.sqrt.f64(double %src) 137 %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt) 138 store double %rcp, double addrspace(1)* %out, align 8 139 ret void 140 } 141 142 attributes #0 = { nounwind readnone } 143 attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" } 144 attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" } 145 attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" } 146 attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" } 147