1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 6 ; FUNC-LABEL: {{^}}rcp_pat_f32: 7 ; GCN: s_load_dword [[SRC:s[0-9]+]] 8 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 9 ; GCN: buffer_store_dword [[RCP]] 10 11 ; EG: RECIP_IEEE 12 define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 { 13 %rcp = fdiv float 1.0, %src 14 store float %rcp, float addrspace(1)* %out, align 4 15 ret void 16 } 17 18 ; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32: 19 ; GCN: s_load_dword [[SRC:s[0-9]+]] 20 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 21 ; GCN: buffer_store_dword [[RCP]] 22 23 ; EG: RECIP_IEEE 24 define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 25 %rcp = fdiv float 1.0, %src, !fpmath !0 26 store float %rcp, float addrspace(1)* %out, align 4 27 ret void 28 } 29 30 ; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32: 31 ; GCN: s_load_dword [[SRC:s[0-9]+]] 32 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 33 ; GCN: buffer_store_dword [[RCP]] 34 35 ; EG: RECIP_IEEE 36 define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 37 %rcp = fdiv fast float 1.0, %src, !fpmath !0 38 store float %rcp, float addrspace(1)* %out, align 4 39 ret void 40 } 41 42 ; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32: 43 ; GCN: s_load_dword [[SRC:s[0-9]+]] 44 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 45 ; GCN: buffer_store_dword [[RCP]] 46 47 ; EG: RECIP_IEEE 48 define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 { 49 %rcp = fdiv arcp float 1.0, %src, !fpmath !0 50 store float %rcp, float addrspace(1)* %out, align 4 51 ret void 52 } 53 54 ; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32: 55 ; GCN: s_load_dword [[SRC:s[0-9]+]] 56 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]] 57 ; GCN: buffer_store_dword [[RCP]] 58 59 ; EG: RECIP_IEEE 60 define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 { 61 %rcp = fdiv float 1.0, %src, !fpmath !0 62 store float %rcp, float addrspace(1)* %out, align 4 63 ret void 64 } 65 66 ; FUNC-LABEL: {{^}}rcp_fabs_pat_f32: 67 ; GCN: s_load_dword [[SRC:s[0-9]+]] 68 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]| 69 ; GCN: buffer_store_dword [[RCP]] 70 71 ; EG: RECIP_IEEE 72 define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 { 73 %src.fabs = call float @llvm.fabs.f32(float %src) 74 %rcp = fdiv float 1.0, %src.fabs 75 store float %rcp, float addrspace(1)* %out, align 4 76 ret void 77 } 78 79 ; FUNC-LABEL: {{^}}neg_rcp_pat_f32: 80 ; GCN: s_load_dword [[SRC:s[0-9]+]] 81 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]] 82 ; GCN: buffer_store_dword [[RCP]] 83 84 ; EG: RECIP_IEEE 85 define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 { 86 %rcp = fdiv float -1.0, %src 87 store float %rcp, float addrspace(1)* %out, align 4 88 ret void 89 } 90 91 ; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32: 92 ; GCN: s_load_dword [[SRC:s[0-9]+]] 93 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]| 94 ; GCN: buffer_store_dword [[RCP]] 95 define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 { 96 %src.fabs = call float @llvm.fabs.f32(float %src) 97 %src.fabs.fneg = fsub float -0.0, %src.fabs 98 %rcp = fdiv float 1.0, %src.fabs.fneg 99 store float %rcp, float addrspace(1)* %out, align 4 100 ret void 101 } 102 103 ; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32: 104 ; GCN: s_load_dword [[SRC:s[0-9]+]] 105 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]| 106 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]| 107 ; GCN: buffer_store_dword [[RCP]] 108 ; GCN: buffer_store_dword [[MUL]] 109 define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 { 110 %src.fabs = call float @llvm.fabs.f32(float %src) 111 %src.fabs.fneg = fsub float -0.0, %src.fabs 112 %rcp = fdiv float 1.0, %src.fabs.fneg 113 store volatile float %rcp, float addrspace(1)* %out, align 4 114 115 %other = fmul float %src, %src.fabs.fneg 116 store volatile float %other, float addrspace(1)* %out, align 4 117 ret void 118 } 119 120 ; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32: 121 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}} 122 ; GCN: buffer_store_dword [[MUL]] 123 define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 { 124 %x = load float, float addrspace(1)* undef 125 %rcp = fdiv arcp float %x, 2.0 126 store float %rcp, float addrspace(1)* %out, align 4 127 ret void 128 } 129 130 ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32: 131 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}} 132 ; GCN: buffer_store_dword [[MUL]] 133 define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 { 134 %x = load float, float addrspace(1)* undef 135 %rcp = fdiv arcp float %x, 10.0 136 store float %rcp, float addrspace(1)* %out, align 4 137 ret void 138 } 139 140 ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32: 141 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}} 142 ; GCN: buffer_store_dword [[MUL]] 143 define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 { 144 %x = load float, float addrspace(1)* undef 145 %rcp = fdiv arcp float %x, -10.0 146 store float %rcp, float addrspace(1)* %out, align 4 147 ret void 148 } 149 150 declare float @llvm.fabs.f32(float) #1 151 declare float @llvm.sqrt.f32(float) #1 152 153 attributes #0 = { nounwind "unsafe-fp-math"="false" } 154 attributes #1 = { nounwind readnone } 155 attributes #2 = { nounwind "unsafe-fp-math"="true" } 156 157 !0 = !{float 2.500000e+00} 158