1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-DENORM %s 2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-fp32-denormals < %s | FileCheck --check-prefixes=GCN,GCN-FLUSH %s 3 4 ; GCN-LABEL: {{^}}div_1_by_x_25ulp: 5 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 6 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 7 ; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 8 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 9 ; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 10 ; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 11 ; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 12 ; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 13 14 ; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]] 15 16 ; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 17 define amdgpu_kernel void @div_1_by_x_25ulp(float addrspace(1)* %arg) { 18 %load = load float, float addrspace(1)* %arg, align 4 19 %div = fdiv float 1.000000e+00, %load, !fpmath !0 20 store float %div, float addrspace(1)* %arg, align 4 21 ret void 22 } 23 24 ; GCN-LABEL: {{^}}div_minus_1_by_x_25ulp: 25 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 26 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 27 ; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 28 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 29 ; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 30 ; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], [[VAL]], -[[SCALE]] 31 ; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 32 ; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 33 34 ; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]] 35 36 ; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 37 define amdgpu_kernel void @div_minus_1_by_x_25ulp(float addrspace(1)* %arg) { 38 %load = load float, float addrspace(1)* %arg, align 4 39 %div = fdiv float -1.000000e+00, %load, !fpmath !0 40 store float %div, float addrspace(1)* %arg, align 4 41 ret void 42 } 43 44 ; GCN-LABEL: {{^}}div_1_by_minus_x_25ulp: 45 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 46 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 47 ; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 48 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 49 ; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 50 ; GCN-DENORM: v_mul_f32_e64 [[PRESCALED:v[0-9]+]], -[[VAL]], [[SCALE]] 51 ; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 52 ; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 53 54 ; GCN-FLUSH: v_rcp_f32_e64 [[OUT:v[0-9]+]], -[[VAL]] 55 56 ; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 57 define amdgpu_kernel void @div_1_by_minus_x_25ulp(float addrspace(1)* %arg) { 58 %load = load float, float addrspace(1)* %arg, align 4 59 %neg = fsub float -0.000000e+00, %load 60 %div = fdiv float 1.000000e+00, %neg, !fpmath !0 61 store float %div, float addrspace(1)* %arg, align 4 62 ret void 63 } 64 65 ; GCN-LABEL: {{^}}div_minus_1_by_minus_x_25ulp: 66 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 67 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 68 ; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 69 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 70 ; GCN-DENORM-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 71 ; GCN-DENORM: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 72 ; GCN-DENORM: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 73 ; GCN-DENORM: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 74 75 ; GCN-FLUSH: v_rcp_f32_e32 [[OUT:v[0-9]+]], [[VAL]] 76 77 ; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 78 define amdgpu_kernel void @div_minus_1_by_minus_x_25ulp(float addrspace(1)* %arg) { 79 %load = load float, float addrspace(1)* %arg, align 4 80 %neg = fsub float -0.000000e+00, %load 81 %div = fdiv float -1.000000e+00, %neg, !fpmath !0 82 store float %div, float addrspace(1)* %arg, align 4 83 ret void 84 } 85 86 ; GCN-LABEL: {{^}}div_v4_1_by_x_25ulp: 87 ; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} 88 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 89 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 90 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 91 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 92 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 93 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 94 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 95 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 96 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 97 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 98 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 99 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 100 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 101 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 102 ; GCN-DENORM-DAG: v_rcp_f32_e32 103 ; GCN-DENORM-DAG: v_rcp_f32_e32 104 ; GCN-DENORM-DAG: v_rcp_f32_e32 105 ; GCN-DENORM-DAG: v_rcp_f32_e32 106 ; GCN-DENORM-DAG: v_mul_f32_e32 107 ; GCN-DENORM-DAG: v_mul_f32_e32 108 ; GCN-DENORM-DAG: v_mul_f32_e32 109 ; GCN-DENORM-DAG: v_mul_f32_e32 110 111 ; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]] 112 ; GCN-FLUSH: v_rcp_f32_e32 113 ; GCN-FLUSH: v_rcp_f32_e32 114 ; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]] 115 ; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 116 define amdgpu_kernel void @div_v4_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 117 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 118 %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %load, !fpmath !0 119 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 120 ret void 121 } 122 123 ; GCN-LABEL: {{^}}div_v4_minus_1_by_x_25ulp: 124 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 125 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 126 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 127 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 128 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 129 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 130 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 131 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 132 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 133 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 134 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 135 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 136 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 137 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 138 ; GCN-DENORM-DAG: v_rcp_f32_e32 139 ; GCN-DENORM-DAG: v_rcp_f32_e32 140 ; GCN-DENORM-DAG: v_rcp_f32_e32 141 ; GCN-DENORM-DAG: v_rcp_f32_e32 142 ; GCN-DENORM-DAG: v_mul_f32_e32 143 ; GCN-DENORM-DAG: v_mul_f32_e32 144 ; GCN-DENORM-DAG: v_mul_f32_e32 145 ; GCN-DENORM-DAG: v_mul_f32_e32 146 147 ; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]] 148 ; GCN-FLUSH: v_rcp_f32_e64 149 ; GCN-FLUSH: v_rcp_f32_e64 150 ; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]] 151 define amdgpu_kernel void @div_v4_minus_1_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 152 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 153 %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %load, !fpmath !0 154 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 155 ret void 156 } 157 158 ; GCN-LABEL: {{^}}div_v4_1_by_minus_x_25ulp: 159 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 160 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 161 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 162 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 163 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 164 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 165 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 166 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 167 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 168 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 169 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 170 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 171 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 172 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 173 ; GCN-DENORM-DAG: v_rcp_f32_e32 174 ; GCN-DENORM-DAG: v_rcp_f32_e32 175 ; GCN-DENORM-DAG: v_rcp_f32_e32 176 ; GCN-DENORM-DAG: v_rcp_f32_e32 177 ; GCN-DENORM-DAG: v_mul_f32_e32 178 ; GCN-DENORM-DAG: v_mul_f32_e32 179 ; GCN-DENORM-DAG: v_mul_f32_e32 180 ; GCN-DENORM-DAG: v_mul_f32_e32 181 182 ; GCN-FLUSH: v_rcp_f32_e64 v[[OUT0:[0-9]+]], -s[[VAL0]] 183 ; GCN-FLUSH: v_rcp_f32_e64 184 ; GCN-FLUSH: v_rcp_f32_e64 185 ; GCN-FLUSH: v_rcp_f32_e64 v[[OUT3:[0-9]+]], -s[[VAL3]] 186 ; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 187 define amdgpu_kernel void @div_v4_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 188 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 189 %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load 190 %div = fdiv <4 x float> <float 1.000000e+00, float 1.000000e+00, float 1.000000e+00, float 1.000000e+00>, %neg, !fpmath !0 191 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 192 ret void 193 } 194 195 ; GCN-LABEL: {{^}}div_v4_minus_1_by_minus_x_25ulp: 196 ; GCN-DAG: s_load_dwordx4 s{{\[}}[[VAL0:[0-9]+]]:[[VAL3:[0-9]+]]], s[{{[0-9:]+}}], 0x0{{$}} 197 ; GCN-DENORM-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 198 ; GCN-DENORM-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 199 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 200 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 201 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 202 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 203 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 204 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 205 ; GCN-DENORM-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 206 ; GCN-DENORM-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 207 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 208 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 209 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 210 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 211 ; GCN-DENORM-DAG: v_rcp_f32_e32 212 ; GCN-DENORM-DAG: v_rcp_f32_e32 213 ; GCN-DENORM-DAG: v_rcp_f32_e32 214 ; GCN-DENORM-DAG: v_rcp_f32_e32 215 ; GCN-DENORM-DAG: v_mul_f32_e32 216 ; GCN-DENORM-DAG: v_mul_f32_e32 217 ; GCN-DENORM-DAG: v_mul_f32_e32 218 ; GCN-DENORM-DAG: v_mul_f32_e32 219 220 ; GCN-FLUSH: v_rcp_f32_e32 v[[OUT0:[0-9]+]], s[[VAL0]] 221 ; GCN-FLUSH: v_rcp_f32_e32 222 ; GCN-FLUSH: v_rcp_f32_e32 223 ; GCN-FLUSH: v_rcp_f32_e32 v[[OUT3:[0-9]+]], s[[VAL3]] 224 ; GCN-FLUSH: global_store_dwordx4 v[{{[0-9:]+}}], v{{\[}}[[OUT0]]:[[OUT3]]], off 225 define amdgpu_kernel void @div_v4_minus_1_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 226 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 227 %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load 228 %div = fdiv <4 x float> <float -1.000000e+00, float -1.000000e+00, float -1.000000e+00, float -1.000000e+00>, %neg, !fpmath !0 229 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 230 ret void 231 } 232 233 ; GCN-LABEL: {{^}}div_v4_c_by_x_25ulp: 234 ; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 235 ; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 236 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} 237 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, 2.0{{$}} 238 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 239 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 240 ; GCN-DENORM-DAG: v_rcp_f32_e32 241 ; GCN-DENORM-DAG: v_rcp_f32_e32 242 243 ; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 244 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 245 ; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 246 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 247 248 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 249 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, s{{[0-9]+}}, -v{{[0-9]+}} 250 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}} 251 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]] 252 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}} 253 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]] 254 255 ; GCN-DENORM-DAG: v_div_fmas_f32 256 ; GCN-DENORM-DAG: v_div_fmas_f32 257 ; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, 2.0{{$}} 258 ; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 259 260 ; GCN-FLUSH-DAG: v_rcp_f32_e32 261 ; GCN-FLUSH-DAG: v_rcp_f32_e64 262 263 ; GCN-NOT: v_cmp_gt_f32_e64 264 ; GCN-NOT: v_cndmask_b32_e32 265 ; GCN-FLUSH-NOT: v_div 266 267 ; GCN: global_store_dwordx4 268 define amdgpu_kernel void @div_v4_c_by_x_25ulp(<4 x float> addrspace(1)* %arg) { 269 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 270 %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %load, !fpmath !0 271 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 272 ret void 273 } 274 275 ; GCN-LABEL: {{^}}div_v4_c_by_minus_x_25ulp: 276 ; GCN-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 277 ; GCN-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 278 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 279 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 280 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 281 ; GCN-DENORM-DAG: v_div_scale_f32 {{.*}}, -2.0{{$}} 282 ; GCN-DENORM-DAG: v_rcp_f32_e32 283 ; GCN-DENORM-DAG: v_rcp_f32_e32 284 285 ; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 286 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 287 ; GCN-DAG: v_cmp_gt_f32_e64 vcc, |s{{[0-9]+}}|, [[L]] 288 ; GCN-DAG: v_cndmask_b32_e32 v{{[0-9]+}}, 1.0, [[S]], vcc 289 290 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, s{{[0-9]+}}, v{{[0-9]+}} 291 ; GCN-DENORM-DAG: v_mul_f32_e64 v{{[0-9]+}}, -s{{[0-9]+}}, v{{[0-9]+}} 292 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP1:v[0-9]+]], v{{[0-9]+}} 293 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP1]] 294 ; GCN-DENORM-DAG: v_rcp_f32_e32 [[RCP2:v[0-9]+]], v{{[0-9]+}} 295 ; GCN-DENORM-DAG: v_mul_f32_e32 v{{[0-9]+}}, v{{[0-9]+}}, [[RCP2]] 296 297 ; GCN-DENORM-DAG: v_div_fmas_f32 298 ; GCN-DENORM-DAG: v_div_fmas_f32 299 ; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 300 ; GCN-DENORM-DAG: v_div_fixup_f32 {{.*}}, -2.0{{$}} 301 302 ; GCN-FLUSH-DAG: v_rcp_f32_e32 303 ; GCN-FLUSH-DAG: v_rcp_f32_e64 304 305 ; GCN-NOT: v_cmp_gt_f32_e64 306 ; GCN-NOT: v_cndmask_b32_e32 307 ; GCN-FLUSH-NOT: v_div 308 309 ; GCN: global_store_dwordx4 310 define amdgpu_kernel void @div_v4_c_by_minus_x_25ulp(<4 x float> addrspace(1)* %arg) { 311 %load = load <4 x float>, <4 x float> addrspace(1)* %arg, align 16 312 %neg = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %load 313 %div = fdiv <4 x float> <float 2.000000e+00, float 1.000000e+00, float -1.000000e+00, float -2.000000e+00>, %neg, !fpmath !0 314 store <4 x float> %div, <4 x float> addrspace(1)* %arg, align 16 315 ret void 316 } 317 318 ; GCN-LABEL: {{^}}div_v_by_x_25ulp: 319 ; GCN-DAG: s_load_dword [[VAL:s[0-9]+]], s[{{[0-9:]+}}], 0x0{{$}} 320 321 ; GCN-DENORM-DAG: v_div_scale_f32 322 ; GCN-DENORM-DAG: v_rcp_f32_e32 323 ; GCN-DENORM-DAG: v_div_scale_f32 324 ; GCN-DENORM: v_div_fmas_f32 325 ; GCN-DENORM: v_div_fixup_f32 [[OUT:v[0-9]+]], 326 327 ; GCN-FLUSF-DAG: v_mov_b32_e32 [[L:v[0-9]+]], 0x6f800000 328 ; GCN-FLUSH-DAG: v_mov_b32_e32 [[S:v[0-9]+]], 0x2f800000 329 ; GCN-FLUSH-DAG: v_cmp_gt_f32_e64 vcc, |[[VAL]]|, [[L]] 330 ; GCN-FLUSH-DAG: v_cndmask_b32_e32 [[SCALE:v[0-9]+]], 1.0, [[S]], vcc 331 ; GCN-FLUSH: v_mul_f32_e32 [[PRESCALED:v[0-9]+]], [[VAL]], [[SCALE]] 332 ; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[PRESCALED]] 333 ; GCN-FLUSH: v_mul_f32_e32 [[OUT:v[0-9]+]], [[SCALE]], [[RCP]] 334 335 ; GCN: global_store_dword v[{{[0-9:]+}}], [[OUT]], off 336 define amdgpu_kernel void @div_v_by_x_25ulp(float addrspace(1)* %arg, float %num) { 337 %load = load float, float addrspace(1)* %arg, align 4 338 %div = fdiv float %num, %load, !fpmath !0 339 store float %div, float addrspace(1)* %arg, align 4 340 ret void 341 } 342 343 ; GCN-LABEL: {{^}}div_1_by_x_fast: 344 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 345 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 346 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 347 define amdgpu_kernel void @div_1_by_x_fast(float addrspace(1)* %arg) { 348 %load = load float, float addrspace(1)* %arg, align 4 349 %div = fdiv fast float 1.000000e+00, %load 350 store float %div, float addrspace(1)* %arg, align 4 351 ret void 352 } 353 354 ; GCN-LABEL: {{^}}div_minus_1_by_x_fast: 355 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 356 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 357 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 358 define amdgpu_kernel void @div_minus_1_by_x_fast(float addrspace(1)* %arg) { 359 %load = load float, float addrspace(1)* %arg, align 4 360 %div = fdiv fast float -1.000000e+00, %load 361 store float %div, float addrspace(1)* %arg, align 4 362 ret void 363 } 364 365 ; GCN-LABEL: {{^}}div_1_by_minus_x_fast: 366 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 367 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 368 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 369 define amdgpu_kernel void @div_1_by_minus_x_fast(float addrspace(1)* %arg) { 370 %load = load float, float addrspace(1)* %arg, align 4 371 %neg = fsub float -0.000000e+00, %load 372 %div = fdiv fast float 1.000000e+00, %neg 373 store float %div, float addrspace(1)* %arg, align 4 374 ret void 375 } 376 377 ; GCN-LABEL: {{^}}div_minus_1_by_minus_x_fast: 378 ; GCN: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 379 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 380 ; GCN: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 381 define amdgpu_kernel void @div_minus_1_by_minus_x_fast(float addrspace(1)* %arg) { 382 %load = load float, float addrspace(1)* %arg, align 4 383 %neg = fsub float -0.000000e+00, %load 384 %div = fdiv fast float -1.000000e+00, %neg 385 store float %div, float addrspace(1)* %arg, align 4 386 ret void 387 } 388 389 ; GCN-LABEL: {{^}}div_1_by_x_correctly_rounded: 390 ; GCN-DENORM-DAG: v_div_scale_f32 391 ; GCN-DENORM-DAG: v_rcp_f32_e32 392 ; GCN-DENORM-DAG: v_div_scale_f32 393 ; GCN-DENORM: v_div_fmas_f32 394 ; GCN-DENORM: v_div_fixup_f32 395 396 ; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 397 ; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 398 ; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 399 define amdgpu_kernel void @div_1_by_x_correctly_rounded(float addrspace(1)* %arg) { 400 %load = load float, float addrspace(1)* %arg, align 4 401 %div = fdiv float 1.000000e+00, %load 402 store float %div, float addrspace(1)* %arg, align 4 403 ret void 404 } 405 406 ; GCN-LABEL: {{^}}div_minus_1_by_x_correctly_rounded: 407 ; GCN-DENORM-DAG: v_div_scale_f32 408 ; GCN-DENORM-DAG: v_rcp_f32_e32 409 ; GCN-DENORM-DAG: v_div_scale_f32 410 ; GCN-DENORM: v_div_fmas_f32 411 ; GCN-DENORM: v_div_fixup_f32 412 413 ; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 414 ; GCN-FLUSH: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 415 ; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 416 define amdgpu_kernel void @div_minus_1_by_x_correctly_rounded(float addrspace(1)* %arg) { 417 %load = load float, float addrspace(1)* %arg, align 4 418 %div = fdiv float -1.000000e+00, %load 419 store float %div, float addrspace(1)* %arg, align 4 420 ret void 421 } 422 423 ; GCN-LABEL: {{^}}div_1_by_minus_x_correctly_rounded: 424 ; GCN-DENORM-DAG: v_div_scale_f32 425 ; GCN-DENORM-DAG: v_rcp_f32_e32 426 ; GCN-DENORM-DAG: v_div_scale_f32 427 ; GCN-DENORM: v_div_fmas_f32 428 ; GCN-DENORM: v_div_fixup_f32 429 430 ; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 431 ; GCN-FLUSH: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[VAL]] 432 ; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 433 define amdgpu_kernel void @div_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) { 434 %load = load float, float addrspace(1)* %arg, align 4 435 %neg = fsub float -0.000000e+00, %load 436 %div = fdiv float 1.000000e+00, %neg 437 store float %div, float addrspace(1)* %arg, align 4 438 ret void 439 } 440 441 ; GCN-LABEL: {{^}}div_minus_1_by_minus_x_correctly_rounded: 442 ; GCN-DENORM-DAG: v_div_scale_f32 443 ; GCN-DENORM-DAG: v_rcp_f32_e32 444 ; GCN-DENORM-DAG: v_div_scale_f32 445 ; GCN-DENORM: v_div_fmas_f32 446 ; GCN-DENORM: v_div_fixup_f32 447 448 ; GCN-FLUSH: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x0 449 ; GCN-FLUSH: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[VAL]] 450 ; GCN-FLUSH: global_store_dword v[{{[0-9:]+}}], [[RCP]], off 451 define amdgpu_kernel void @div_minus_1_by_minus_x_correctly_rounded(float addrspace(1)* %arg) { 452 %load = load float, float addrspace(1)* %arg, align 4 453 %neg = fsub float -0.000000e+00, %load 454 %div = fdiv float -1.000000e+00, %neg 455 store float %div, float addrspace(1)* %arg, align 4 456 ret void 457 } 458 459 !0 = !{float 2.500000e+00} 460