1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+fp32-denormals -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -amdgpu-fast-fdiv < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s 4 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefix=I754 -check-prefix=FUNC %s 5 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs -enable-unsafe-fp-math < %s | FileCheck -check-prefix=UNSAFE-FP -check-prefix=FUNC %s 6 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=R600 -check-prefix=FUNC %s 7 8 ; These tests check that fdiv is expanded correctly and also test that the 9 ; scheduler is scheduling the RECIP_IEEE and MUL_IEEE instructions in separate 10 ; instruction groups. 11 12 ; These test check that fdiv using unsafe_fp_math, coarse fp div, and IEEE754 fp div. 13 14 ; FUNC-LABEL: {{^}}fdiv_f32: 15 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 16 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 17 18 ; UNSAFE-FP: v_rcp_f32 19 ; UNSAFE-FP: v_mul_f32_e32 20 21 ; SI-DAG: v_rcp_f32 22 ; SI-DAG: v_mul_f32 23 24 ; I754-DAG: v_div_scale_f32 25 ; I754-DAG: v_rcp_f32 26 ; I754-DAG: v_fma_f32 27 ; I754-DAG: v_mul_f32 28 ; I754-DAG: v_fma_f32 29 ; I754-DAG: v_div_fixup_f32 30 define void @fdiv_f32(float addrspace(1)* %out, float %a, float %b) { 31 entry: 32 %0 = fdiv float %a, %b 33 store float %0, float addrspace(1)* %out 34 ret void 35 } 36 37 ; FUNC-LABEL: {{^}}fdiv_f32_fast_math: 38 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 39 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 40 41 ; UNSAFE-FP: v_rcp_f32 42 ; UNSAFE-FP: v_mul_f32_e32 43 44 ; SI-DAG: v_rcp_f32 45 ; SI-DAG: v_mul_f32 46 define void @fdiv_f32_fast_math(float addrspace(1)* %out, float %a, float %b) { 47 entry: 48 %0 = fdiv fast float %a, %b 49 store float %0, float addrspace(1)* %out 50 ret void 51 } 52 53 ; FUNC-LABEL: {{^}}fdiv_f32_arcp_math: 54 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[2].W 55 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, PS 56 57 ; UNSAFE-FP: v_rcp_f32 58 ; UNSAFE-FP: v_mul_f32_e32 59 60 ; SI-DAG: v_rcp_f32 61 ; SI-DAG: v_mul_f32 62 define void @fdiv_f32_arcp_math(float addrspace(1)* %out, float %a, float %b) { 63 entry: 64 %0 = fdiv arcp float %a, %b 65 store float %0, float addrspace(1)* %out 66 ret void 67 } 68 69 ; FUNC-LABEL: {{^}}fdiv_v2f32: 70 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 71 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 72 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 73 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 74 75 ; UNSAFE-FP: v_rcp_f32 76 ; UNSAFE-FP: v_rcp_f32 77 ; UNSAFE-FP: v_mul_f32_e32 78 ; UNSAFE-FP: v_mul_f32_e32 79 80 ; SI-DAG: v_rcp_f32 81 ; SI-DAG: v_mul_f32 82 ; SI-DAG: v_rcp_f32 83 ; SI-DAG: v_mul_f32 84 85 ; I754: v_div_scale_f32 86 ; I754: v_div_scale_f32 87 ; I754: v_div_scale_f32 88 ; I754: v_div_scale_f32 89 ; I754: v_div_fixup_f32 90 ; I754: v_div_fixup_f32 91 define void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 92 entry: 93 %0 = fdiv <2 x float> %a, %b 94 store <2 x float> %0, <2 x float> addrspace(1)* %out 95 ret void 96 } 97 98 ; FUNC-LABEL: {{^}}fdiv_v2f32_fast_math: 99 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 100 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 101 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 102 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 103 104 ; UNSAFE-FP: v_rcp_f32 105 ; UNSAFE-FP: v_rcp_f32 106 ; UNSAFE-FP: v_mul_f32_e32 107 ; UNSAFE-FP: v_mul_f32_e32 108 109 ; SI-DAG: v_rcp_f32 110 ; SI-DAG: v_mul_f32 111 ; SI-DAG: v_rcp_f32 112 ; SI-DAG: v_mul_f32 113 define void @fdiv_v2f32_fast_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 114 entry: 115 %0 = fdiv fast <2 x float> %a, %b 116 store <2 x float> %0, <2 x float> addrspace(1)* %out 117 ret void 118 } 119 120 ; FUNC-LABEL: {{^}}fdiv_v2f32_arcp_math: 121 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Z 122 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW]}}, KC0[3].Y 123 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[3].X, PS 124 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].W, PS 125 126 ; UNSAFE-FP: v_rcp_f32 127 ; UNSAFE-FP: v_rcp_f32 128 ; UNSAFE-FP: v_mul_f32_e32 129 ; UNSAFE-FP: v_mul_f32_e32 130 131 ; SI-DAG: v_rcp_f32 132 ; SI-DAG: v_mul_f32 133 ; SI-DAG: v_rcp_f32 134 ; SI-DAG: v_mul_f32 135 define void @fdiv_v2f32_arcp_math(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) { 136 entry: 137 %0 = fdiv arcp <2 x float> %a, %b 138 store <2 x float> %0, <2 x float> addrspace(1)* %out 139 ret void 140 } 141 142 ; FUNC-LABEL: {{^}}fdiv_v4f32: 143 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 144 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 145 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 146 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 147 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 148 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 149 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 150 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 151 152 ; UNSAFE-FP: v_rcp_f32_e32 153 ; UNSAFE-FP: v_rcp_f32_e32 154 ; UNSAFE-FP: v_rcp_f32_e32 155 ; UNSAFE-FP: v_rcp_f32_e32 156 ; UNSAFE-FP: v_mul_f32_e32 157 ; UNSAFE-FP: v_mul_f32_e32 158 ; UNSAFE-FP: v_mul_f32_e32 159 ; UNSAFE-FP: v_mul_f32_e32 160 161 ; SI-DAG: v_rcp_f32 162 ; SI-DAG: v_mul_f32 163 ; SI-DAG: v_rcp_f32 164 ; SI-DAG: v_mul_f32 165 ; SI-DAG: v_rcp_f32 166 ; SI-DAG: v_mul_f32 167 ; SI-DAG: v_rcp_f32 168 ; SI-DAG: v_mul_f32 169 170 ; I754: v_div_scale_f32 171 ; I754: v_div_scale_f32 172 ; I754: v_div_scale_f32 173 ; I754: v_div_scale_f32 174 ; I754: v_div_scale_f32 175 ; I754: v_div_scale_f32 176 ; I754: v_div_scale_f32 177 ; I754: v_div_scale_f32 178 ; I754: v_div_fixup_f32 179 ; I754: v_div_fixup_f32 180 ; I754: v_div_fixup_f32 181 ; I754: v_div_fixup_f32 182 define void @fdiv_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 183 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 184 %a = load <4 x float>, <4 x float> addrspace(1) * %in 185 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 186 %result = fdiv <4 x float> %a, %b 187 store <4 x float> %result, <4 x float> addrspace(1)* %out 188 ret void 189 } 190 191 ; FUNC-LABEL: {{^}}fdiv_v4f32_fast_math: 192 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 193 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 194 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 195 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 196 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 197 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 198 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 199 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 200 201 ; UNSAFE-FP: v_rcp_f32_e32 202 ; UNSAFE-FP: v_rcp_f32_e32 203 ; UNSAFE-FP: v_rcp_f32_e32 204 ; UNSAFE-FP: v_rcp_f32_e32 205 ; UNSAFE-FP: v_mul_f32_e32 206 ; UNSAFE-FP: v_mul_f32_e32 207 ; UNSAFE-FP: v_mul_f32_e32 208 ; UNSAFE-FP: v_mul_f32_e32 209 210 ; SI-DAG: v_rcp_f32 211 ; SI-DAG: v_mul_f32 212 ; SI-DAG: v_rcp_f32 213 ; SI-DAG: v_mul_f32 214 ; SI-DAG: v_rcp_f32 215 ; SI-DAG: v_mul_f32 216 ; SI-DAG: v_rcp_f32 217 ; SI-DAG: v_mul_f32 218 define void @fdiv_v4f32_fast_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 219 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 220 %a = load <4 x float>, <4 x float> addrspace(1) * %in 221 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 222 %result = fdiv fast <4 x float> %a, %b 223 store <4 x float> %result, <4 x float> addrspace(1)* %out 224 ret void 225 } 226 227 ; FUNC-LABEL: {{^}}fdiv_v4f32_arcp_math: 228 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 229 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 230 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 231 ; R600-DAG: RECIP_IEEE * T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}} 232 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 233 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 234 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 235 ; R600-DAG: MUL_IEEE {{\** *}}T{{[0-9]+\.[XYZW], T[0-9]+\.[XYZW]}}, PS 236 237 ; UNSAFE-FP: v_rcp_f32_e32 238 ; UNSAFE-FP: v_rcp_f32_e32 239 ; UNSAFE-FP: v_rcp_f32_e32 240 ; UNSAFE-FP: v_rcp_f32_e32 241 ; UNSAFE-FP: v_mul_f32_e32 242 ; UNSAFE-FP: v_mul_f32_e32 243 ; UNSAFE-FP: v_mul_f32_e32 244 ; UNSAFE-FP: v_mul_f32_e32 245 246 ; SI-DAG: v_rcp_f32 247 ; SI-DAG: v_mul_f32 248 ; SI-DAG: v_rcp_f32 249 ; SI-DAG: v_mul_f32 250 ; SI-DAG: v_rcp_f32 251 ; SI-DAG: v_mul_f32 252 ; SI-DAG: v_rcp_f32 253 ; SI-DAG: v_mul_f32 254 define void @fdiv_v4f32_arcp_math(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 255 %b_ptr = getelementptr <4 x float>, <4 x float> addrspace(1)* %in, i32 1 256 %a = load <4 x float>, <4 x float> addrspace(1) * %in 257 %b = load <4 x float>, <4 x float> addrspace(1) * %b_ptr 258 %result = fdiv arcp <4 x float> %a, %b 259 store <4 x float> %result, <4 x float> addrspace(1)* %out 260 ret void 261 } 262