1 ; RUN: llc -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-SAFE -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -enable-no-signed-zeros-fp-math -march=amdgcn -mcpu=tahiti -start-after=sink -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN -check-prefix=GCN-NSZ -check-prefix=SI -check-prefix=FUNC %s 3 4 ; -------------------------------------------------------------------------------- 5 ; fadd tests 6 ; -------------------------------------------------------------------------------- 7 8 ; GCN-LABEL: {{^}}v_fneg_add_f32: 9 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 10 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 11 12 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 13 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 14 15 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -[[A]], [[B]] 16 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 17 define amdgpu_kernel void @v_fneg_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 18 %tid = call i32 @llvm.amdgcn.workitem.id.x() 19 %tid.ext = sext i32 %tid to i64 20 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 21 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 22 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 23 %a = load volatile float, float addrspace(1)* %a.gep 24 %b = load volatile float, float addrspace(1)* %b.gep 25 %add = fadd float %a, %b 26 %fneg = fsub float -0.000000e+00, %add 27 store float %fneg, float addrspace(1)* %out.gep 28 ret void 29 } 30 31 ; GCN-LABEL: {{^}}v_fneg_add_store_use_add_f32: 32 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 33 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 34 ; GCN-DAG: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 35 ; GCN-DAG: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 36 ; GCN-NEXT: buffer_store_dword [[NEG_ADD]] 37 ; GCN-NEXT: buffer_store_dword [[ADD]] 38 define amdgpu_kernel void @v_fneg_add_store_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 39 %tid = call i32 @llvm.amdgcn.workitem.id.x() 40 %tid.ext = sext i32 %tid to i64 41 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 42 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 43 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 44 %a = load volatile float, float addrspace(1)* %a.gep 45 %b = load volatile float, float addrspace(1)* %b.gep 46 %add = fadd float %a, %b 47 %fneg = fsub float -0.000000e+00, %add 48 store volatile float %fneg, float addrspace(1)* %out 49 store volatile float %add, float addrspace(1)* %out 50 ret void 51 } 52 53 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_add_f32: 54 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 55 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 56 57 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 58 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], 0x80000000, [[ADD]] 59 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[ADD]] 60 61 ; GCN-NSZ: v_sub_f32_e64 [[NEG_ADD:v[0-9]+]], -[[A]], [[B]] 62 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_ADD]] 63 ; GCN: buffer_store_dword [[NEG_ADD]] 64 ; GCN-NEXT: buffer_store_dword [[MUL]] 65 define amdgpu_kernel void @v_fneg_add_multi_use_add_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 66 %tid = call i32 @llvm.amdgcn.workitem.id.x() 67 %tid.ext = sext i32 %tid to i64 68 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 69 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 70 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 71 %a = load volatile float, float addrspace(1)* %a.gep 72 %b = load volatile float, float addrspace(1)* %b.gep 73 %add = fadd float %a, %b 74 %fneg = fsub float -0.000000e+00, %add 75 %use1 = fmul float %add, 4.0 76 store volatile float %fneg, float addrspace(1)* %out 77 store volatile float %use1, float addrspace(1)* %out 78 ret void 79 } 80 81 ; GCN-LABEL: {{^}}v_fneg_add_fneg_x_f32: 82 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 83 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 84 85 ; GCN-SAFE: v_sub_f32_e32 86 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, 87 88 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 89 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 90 define amdgpu_kernel void @v_fneg_add_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 91 %tid = call i32 @llvm.amdgcn.workitem.id.x() 92 %tid.ext = sext i32 %tid to i64 93 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 94 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 95 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 96 %a = load volatile float, float addrspace(1)* %a.gep 97 %b = load volatile float, float addrspace(1)* %b.gep 98 %fneg.a = fsub float -0.000000e+00, %a 99 %add = fadd float %fneg.a, %b 100 %fneg = fsub float -0.000000e+00, %add 101 store volatile float %fneg, float addrspace(1)* %out 102 ret void 103 } 104 105 ; GCN-LABEL: {{^}}v_fneg_add_x_fneg_f32: 106 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 107 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 108 109 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 110 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 111 112 ; GCN-NSZ: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 113 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 114 define amdgpu_kernel void @v_fneg_add_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 115 %tid = call i32 @llvm.amdgcn.workitem.id.x() 116 %tid.ext = sext i32 %tid to i64 117 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 118 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 119 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 120 %a = load volatile float, float addrspace(1)* %a.gep 121 %b = load volatile float, float addrspace(1)* %b.gep 122 %fneg.b = fsub float -0.000000e+00, %b 123 %add = fadd float %a, %fneg.b 124 %fneg = fsub float -0.000000e+00, %add 125 store volatile float %fneg, float addrspace(1)* %out 126 ret void 127 } 128 129 ; GCN-LABEL: {{^}}v_fneg_add_fneg_fneg_f32: 130 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 131 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 132 133 ; GCN-SAFE: v_sub_f32_e64 [[ADD:v[0-9]+]], -[[A]], [[B]] 134 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 135 136 ; GCN-NSZ: v_add_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 137 ; GCN-NSZ-NEXT: buffer_store_dword [[ADD]] 138 define amdgpu_kernel void @v_fneg_add_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 139 %tid = call i32 @llvm.amdgcn.workitem.id.x() 140 %tid.ext = sext i32 %tid to i64 141 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 142 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 143 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 144 %a = load volatile float, float addrspace(1)* %a.gep 145 %b = load volatile float, float addrspace(1)* %b.gep 146 %fneg.a = fsub float -0.000000e+00, %a 147 %fneg.b = fsub float -0.000000e+00, %b 148 %add = fadd float %fneg.a, %fneg.b 149 %fneg = fsub float -0.000000e+00, %add 150 store volatile float %fneg, float addrspace(1)* %out 151 ret void 152 } 153 154 ; GCN-LABEL: {{^}}v_fneg_add_store_use_fneg_x_f32: 155 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 156 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 157 158 ; GCN-SAFE: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1{{$}} 159 ; GCN-SAFE: v_xor_b32_e32 [[NEG_A:v[0-9]+]], [[A]], [[SIGNBIT]] 160 ; GCN-SAFE: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 161 ; GCN-SAFE: v_xor_b32_e32 [[NEG_ADD:v[0-9]+]], [[ADD]], [[SIGNBIT]] 162 163 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 164 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 165 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]] 166 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]] 167 define amdgpu_kernel void @v_fneg_add_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 168 %tid = call i32 @llvm.amdgcn.workitem.id.x() 169 %tid.ext = sext i32 %tid to i64 170 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 171 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 172 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 173 %a = load volatile float, float addrspace(1)* %a.gep 174 %b = load volatile float, float addrspace(1)* %b.gep 175 %fneg.a = fsub float -0.000000e+00, %a 176 %add = fadd float %fneg.a, %b 177 %fneg = fsub float -0.000000e+00, %add 178 store volatile float %fneg, float addrspace(1)* %out 179 store volatile float %fneg.a, float addrspace(1)* %out 180 ret void 181 } 182 183 ; GCN-LABEL: {{^}}v_fneg_add_multi_use_fneg_x_f32: 184 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 185 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 186 187 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 188 ; GCN-SAFE-DAG: v_sub_f32_e32 [[ADD:v[0-9]+]], [[B]], [[A]] 189 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[ADD]] 190 191 ; GCN-NSZ-DAG: v_sub_f32_e32 [[NEG_ADD:v[0-9]+]], [[A]], [[B]] 192 ; GCN-NSZ-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 193 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_ADD]] 194 ; GCN-NSZ-NEXT: buffer_store_dword [[MUL]] 195 define amdgpu_kernel void @v_fneg_add_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 196 %tid = call i32 @llvm.amdgcn.workitem.id.x() 197 %tid.ext = sext i32 %tid to i64 198 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 199 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 200 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 201 %a = load volatile float, float addrspace(1)* %a.gep 202 %b = load volatile float, float addrspace(1)* %b.gep 203 %fneg.a = fsub float -0.000000e+00, %a 204 %add = fadd float %fneg.a, %b 205 %fneg = fsub float -0.000000e+00, %add 206 %use1 = fmul float %fneg.a, %c 207 store volatile float %fneg, float addrspace(1)* %out 208 store volatile float %use1, float addrspace(1)* %out 209 ret void 210 } 211 212 ; -------------------------------------------------------------------------------- 213 ; fmul tests 214 ; -------------------------------------------------------------------------------- 215 216 ; GCN-LABEL: {{^}}v_fneg_mul_f32: 217 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 218 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 219 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 220 ; GCN-NEXT: buffer_store_dword [[RESULT]] 221 define amdgpu_kernel void @v_fneg_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 222 %tid = call i32 @llvm.amdgcn.workitem.id.x() 223 %tid.ext = sext i32 %tid to i64 224 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 225 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 226 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 227 %a = load volatile float, float addrspace(1)* %a.gep 228 %b = load volatile float, float addrspace(1)* %b.gep 229 %mul = fmul float %a, %b 230 %fneg = fsub float -0.000000e+00, %mul 231 store float %fneg, float addrspace(1)* %out.gep 232 ret void 233 } 234 235 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_mul_f32: 236 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 237 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 238 ; GCN-DAG: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 239 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL:v[0-9]+]], 0x80000000, [[ADD]] 240 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 241 ; GCN: buffer_store_dword [[ADD]] 242 define amdgpu_kernel void @v_fneg_mul_store_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 243 %tid = call i32 @llvm.amdgcn.workitem.id.x() 244 %tid.ext = sext i32 %tid to i64 245 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 246 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 247 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 248 %a = load volatile float, float addrspace(1)* %a.gep 249 %b = load volatile float, float addrspace(1)* %b.gep 250 %mul = fmul float %a, %b 251 %fneg = fsub float -0.000000e+00, %mul 252 store volatile float %fneg, float addrspace(1)* %out 253 store volatile float %mul, float addrspace(1)* %out 254 ret void 255 } 256 257 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_mul_f32: 258 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 259 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 260 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], [[A]], -[[B]] 261 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MUL0]] 262 ; GCN-NEXT: buffer_store_dword [[MUL0]] 263 ; GCN-NEXT: buffer_store_dword [[MUL1]] 264 define amdgpu_kernel void @v_fneg_mul_multi_use_mul_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 265 %tid = call i32 @llvm.amdgcn.workitem.id.x() 266 %tid.ext = sext i32 %tid to i64 267 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 268 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 269 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 270 %a = load volatile float, float addrspace(1)* %a.gep 271 %b = load volatile float, float addrspace(1)* %b.gep 272 %mul = fmul float %a, %b 273 %fneg = fsub float -0.000000e+00, %mul 274 %use1 = fmul float %mul, 4.0 275 store volatile float %fneg, float addrspace(1)* %out 276 store volatile float %use1, float addrspace(1)* %out 277 ret void 278 } 279 280 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_x_f32: 281 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 282 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 283 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 284 ; GCN-NEXT: buffer_store_dword [[ADD]] 285 define amdgpu_kernel void @v_fneg_mul_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 286 %tid = call i32 @llvm.amdgcn.workitem.id.x() 287 %tid.ext = sext i32 %tid to i64 288 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 289 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 290 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 291 %a = load volatile float, float addrspace(1)* %a.gep 292 %b = load volatile float, float addrspace(1)* %b.gep 293 %fneg.a = fsub float -0.000000e+00, %a 294 %mul = fmul float %fneg.a, %b 295 %fneg = fsub float -0.000000e+00, %mul 296 store volatile float %fneg, float addrspace(1)* %out 297 ret void 298 } 299 300 ; GCN-LABEL: {{^}}v_fneg_mul_x_fneg_f32: 301 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 302 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 303 ; GCN: v_mul_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 304 ; GCN-NEXT: buffer_store_dword [[ADD]] 305 define amdgpu_kernel void @v_fneg_mul_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 306 %tid = call i32 @llvm.amdgcn.workitem.id.x() 307 %tid.ext = sext i32 %tid to i64 308 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 309 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 310 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 311 %a = load volatile float, float addrspace(1)* %a.gep 312 %b = load volatile float, float addrspace(1)* %b.gep 313 %fneg.b = fsub float -0.000000e+00, %b 314 %mul = fmul float %a, %fneg.b 315 %fneg = fsub float -0.000000e+00, %mul 316 store volatile float %fneg, float addrspace(1)* %out 317 ret void 318 } 319 320 ; GCN-LABEL: {{^}}v_fneg_mul_fneg_fneg_f32: 321 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 322 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 323 ; GCN: v_mul_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 324 ; GCN-NEXT: buffer_store_dword [[ADD]] 325 define amdgpu_kernel void @v_fneg_mul_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 326 %tid = call i32 @llvm.amdgcn.workitem.id.x() 327 %tid.ext = sext i32 %tid to i64 328 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 329 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 330 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 331 %a = load volatile float, float addrspace(1)* %a.gep 332 %b = load volatile float, float addrspace(1)* %b.gep 333 %fneg.a = fsub float -0.000000e+00, %a 334 %fneg.b = fsub float -0.000000e+00, %b 335 %mul = fmul float %fneg.a, %fneg.b 336 %fneg = fsub float -0.000000e+00, %mul 337 store volatile float %fneg, float addrspace(1)* %out 338 ret void 339 } 340 341 ; GCN-LABEL: {{^}}v_fneg_mul_store_use_fneg_x_f32: 342 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 343 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 344 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 345 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 346 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 347 ; GCN: buffer_store_dword [[NEG_A]] 348 define amdgpu_kernel void @v_fneg_mul_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 349 %tid = call i32 @llvm.amdgcn.workitem.id.x() 350 %tid.ext = sext i32 %tid to i64 351 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 352 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 353 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 354 %a = load volatile float, float addrspace(1)* %a.gep 355 %b = load volatile float, float addrspace(1)* %b.gep 356 %fneg.a = fsub float -0.000000e+00, %a 357 %mul = fmul float %fneg.a, %b 358 %fneg = fsub float -0.000000e+00, %mul 359 store volatile float %fneg, float addrspace(1)* %out 360 store volatile float %fneg.a, float addrspace(1)* %out 361 ret void 362 } 363 364 ; GCN-LABEL: {{^}}v_fneg_mul_multi_use_fneg_x_f32: 365 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 366 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 367 ; GCN-DAG: v_mul_f32_e32 [[NEG_MUL:v[0-9]+]], [[A]], [[B]] 368 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 369 ; GCN-NEXT: buffer_store_dword [[NEG_MUL]] 370 ; GCN: buffer_store_dword [[MUL]] 371 define amdgpu_kernel void @v_fneg_mul_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 372 %tid = call i32 @llvm.amdgcn.workitem.id.x() 373 %tid.ext = sext i32 %tid to i64 374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 375 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 376 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 377 %a = load volatile float, float addrspace(1)* %a.gep 378 %b = load volatile float, float addrspace(1)* %b.gep 379 %fneg.a = fsub float -0.000000e+00, %a 380 %mul = fmul float %fneg.a, %b 381 %fneg = fsub float -0.000000e+00, %mul 382 %use1 = fmul float %fneg.a, %c 383 store volatile float %fneg, float addrspace(1)* %out 384 store volatile float %use1, float addrspace(1)* %out 385 ret void 386 } 387 388 ; -------------------------------------------------------------------------------- 389 ; fminnum tests 390 ; -------------------------------------------------------------------------------- 391 392 ; GCN-LABEL: {{^}}v_fneg_minnum_f32: 393 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 394 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 395 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] 396 ; GCN: buffer_store_dword [[RESULT]] 397 define amdgpu_kernel void @v_fneg_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 398 %tid = call i32 @llvm.amdgcn.workitem.id.x() 399 %tid.ext = sext i32 %tid to i64 400 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 401 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 402 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 403 %a = load volatile float, float addrspace(1)* %a.gep 404 %b = load volatile float, float addrspace(1)* %b.gep 405 %min = call float @llvm.minnum.f32(float %a, float %b) 406 %fneg = fsub float -0.000000e+00, %min 407 store float %fneg, float addrspace(1)* %out.gep 408 ret void 409 } 410 411 ; GCN-LABEL: {{^}}v_fneg_self_minnum_f32: 412 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 413 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] 414 ; GCN: buffer_store_dword [[RESULT]] 415 define amdgpu_kernel void @v_fneg_self_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 416 %tid = call i32 @llvm.amdgcn.workitem.id.x() 417 %tid.ext = sext i32 %tid to i64 418 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 419 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 420 %a = load volatile float, float addrspace(1)* %a.gep 421 %min = call float @llvm.minnum.f32(float %a, float %a) 422 %min.fneg = fsub float -0.0, %min 423 store float %min.fneg, float addrspace(1)* %out.gep 424 ret void 425 } 426 427 ; GCN-LABEL: {{^}}v_fneg_posk_minnum_f32: 428 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 429 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 430 ; GCN: buffer_store_dword [[RESULT]] 431 define amdgpu_kernel void @v_fneg_posk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 432 %tid = call i32 @llvm.amdgcn.workitem.id.x() 433 %tid.ext = sext i32 %tid to i64 434 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 435 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 436 %a = load volatile float, float addrspace(1)* %a.gep 437 %min = call float @llvm.minnum.f32(float 4.0, float %a) 438 %fneg = fsub float -0.000000e+00, %min 439 store float %fneg, float addrspace(1)* %out.gep 440 ret void 441 } 442 443 ; GCN-LABEL: {{^}}v_fneg_negk_minnum_f32: 444 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 445 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 446 ; GCN: buffer_store_dword [[RESULT]] 447 define amdgpu_kernel void @v_fneg_negk_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 448 %tid = call i32 @llvm.amdgcn.workitem.id.x() 449 %tid.ext = sext i32 %tid to i64 450 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 451 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 452 %a = load volatile float, float addrspace(1)* %a.gep 453 %min = call float @llvm.minnum.f32(float -4.0, float %a) 454 %fneg = fsub float -0.000000e+00, %min 455 store float %fneg, float addrspace(1)* %out.gep 456 ret void 457 } 458 459 ; GCN-LABEL: {{^}}v_fneg_0_minnum_f32: 460 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 461 ; GCN: v_min_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 462 ; GCN: buffer_store_dword [[RESULT]] 463 define amdgpu_kernel void @v_fneg_0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 464 %tid = call i32 @llvm.amdgcn.workitem.id.x() 465 %tid.ext = sext i32 %tid to i64 466 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 467 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 468 %a = load volatile float, float addrspace(1)* %a.gep 469 %min = call float @llvm.minnum.f32(float 0.0, float %a) 470 %fneg = fsub float -0.000000e+00, %min 471 store float %fneg, float addrspace(1)* %out.gep 472 ret void 473 } 474 475 ; GCN-LABEL: {{^}}v_fneg_neg0_minnum_f32: 476 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 477 ; GCN: v_max_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0 478 ; GCN: buffer_store_dword [[RESULT]] 479 define amdgpu_kernel void @v_fneg_neg0_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 480 %tid = call i32 @llvm.amdgcn.workitem.id.x() 481 %tid.ext = sext i32 %tid to i64 482 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 484 %a = load volatile float, float addrspace(1)* %a.gep 485 %min = call float @llvm.minnum.f32(float -0.0, float %a) 486 %fneg = fsub float -0.000000e+00, %min 487 store float %fneg, float addrspace(1)* %out.gep 488 ret void 489 } 490 491 ; GCN-LABEL: {{^}}v_fneg_0_minnum_foldable_use_f32: 492 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 493 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 494 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 0, [[A]] 495 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MIN]], [[B]] 496 ; GCN: buffer_store_dword [[RESULT]] 497 define amdgpu_kernel void @v_fneg_0_minnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 498 %tid = call i32 @llvm.amdgcn.workitem.id.x() 499 %tid.ext = sext i32 %tid to i64 500 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 501 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 502 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 503 %a = load volatile float, float addrspace(1)* %a.gep 504 %b = load volatile float, float addrspace(1)* %b.gep 505 %min = call float @llvm.minnum.f32(float 0.0, float %a) 506 %fneg = fsub float -0.000000e+00, %min 507 %mul = fmul float %fneg, %b 508 store float %mul, float addrspace(1)* %out.gep 509 ret void 510 } 511 512 ; GCN-LABEL: {{^}}v_fneg_minnum_multi_use_minnum_f32: 513 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 514 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 515 ; GCN: v_max_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] 516 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 517 ; GCN-NEXT: buffer_store_dword [[MAX0]] 518 ; GCN-NEXT: buffer_store_dword [[MUL1]] 519 define amdgpu_kernel void @v_fneg_minnum_multi_use_minnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 520 %tid = call i32 @llvm.amdgcn.workitem.id.x() 521 %tid.ext = sext i32 %tid to i64 522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 525 %a = load volatile float, float addrspace(1)* %a.gep 526 %b = load volatile float, float addrspace(1)* %b.gep 527 %min = call float @llvm.minnum.f32(float %a, float %b) 528 %fneg = fsub float -0.000000e+00, %min 529 %use1 = fmul float %min, 4.0 530 store volatile float %fneg, float addrspace(1)* %out 531 store volatile float %use1, float addrspace(1)* %out 532 ret void 533 } 534 535 ; -------------------------------------------------------------------------------- 536 ; fmaxnum tests 537 ; -------------------------------------------------------------------------------- 538 539 ; GCN-LABEL: {{^}}v_fneg_maxnum_f32: 540 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 541 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 542 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[B]] 543 ; GCN: buffer_store_dword [[RESULT]] 544 define amdgpu_kernel void @v_fneg_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 545 %tid = call i32 @llvm.amdgcn.workitem.id.x() 546 %tid.ext = sext i32 %tid to i64 547 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 548 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 549 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 550 %a = load volatile float, float addrspace(1)* %a.gep 551 %b = load volatile float, float addrspace(1)* %b.gep 552 %min = call float @llvm.maxnum.f32(float %a, float %b) 553 %fneg = fsub float -0.000000e+00, %min 554 store float %fneg, float addrspace(1)* %out.gep 555 ret void 556 } 557 558 ; GCN-LABEL: {{^}}v_fneg_self_maxnum_f32: 559 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 560 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -[[A]] 561 ; GCN: buffer_store_dword [[RESULT]] 562 define amdgpu_kernel void @v_fneg_self_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 563 %tid = call i32 @llvm.amdgcn.workitem.id.x() 564 %tid.ext = sext i32 %tid to i64 565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 566 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 567 %a = load volatile float, float addrspace(1)* %a.gep 568 %min = call float @llvm.maxnum.f32(float %a, float %a) 569 %min.fneg = fsub float -0.0, %min 570 store float %min.fneg, float addrspace(1)* %out.gep 571 ret void 572 } 573 574 ; GCN-LABEL: {{^}}v_fneg_posk_maxnum_f32: 575 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 576 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], -4.0 577 ; GCN: buffer_store_dword [[RESULT]] 578 define amdgpu_kernel void @v_fneg_posk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 579 %tid = call i32 @llvm.amdgcn.workitem.id.x() 580 %tid.ext = sext i32 %tid to i64 581 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 582 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 583 %a = load volatile float, float addrspace(1)* %a.gep 584 %min = call float @llvm.maxnum.f32(float 4.0, float %a) 585 %fneg = fsub float -0.000000e+00, %min 586 store float %fneg, float addrspace(1)* %out.gep 587 ret void 588 } 589 590 ; GCN-LABEL: {{^}}v_fneg_negk_maxnum_f32: 591 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 592 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 4.0 593 ; GCN: buffer_store_dword [[RESULT]] 594 define amdgpu_kernel void @v_fneg_negk_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 595 %tid = call i32 @llvm.amdgcn.workitem.id.x() 596 %tid.ext = sext i32 %tid to i64 597 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 598 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 599 %a = load volatile float, float addrspace(1)* %a.gep 600 %min = call float @llvm.maxnum.f32(float -4.0, float %a) 601 %fneg = fsub float -0.000000e+00, %min 602 store float %fneg, float addrspace(1)* %out.gep 603 ret void 604 } 605 606 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_f32: 607 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 608 ; GCN: v_max_f32_e32 [[RESULT:v[0-9]+]], 0, [[A]] 609 ; GCN: buffer_store_dword [[RESULT]] 610 define amdgpu_kernel void @v_fneg_0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 611 %tid = call i32 @llvm.amdgcn.workitem.id.x() 612 %tid.ext = sext i32 %tid to i64 613 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 614 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 615 %a = load volatile float, float addrspace(1)* %a.gep 616 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 617 %fneg = fsub float -0.000000e+00, %max 618 store float %fneg, float addrspace(1)* %out.gep 619 ret void 620 } 621 622 ; GCN-LABEL: {{^}}v_fneg_neg0_maxnum_f32: 623 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 624 ; GCN: v_min_f32_e64 [[RESULT:v[0-9]+]], -[[A]], 0 625 ; GCN: buffer_store_dword [[RESULT]] 626 define amdgpu_kernel void @v_fneg_neg0_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 627 %tid = call i32 @llvm.amdgcn.workitem.id.x() 628 %tid.ext = sext i32 %tid to i64 629 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 630 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 631 %a = load volatile float, float addrspace(1)* %a.gep 632 %max = call float @llvm.maxnum.f32(float -0.0, float %a) 633 %fneg = fsub float -0.000000e+00, %max 634 store float %fneg, float addrspace(1)* %out.gep 635 ret void 636 } 637 638 ; GCN-LABEL: {{^}}v_fneg_0_maxnum_foldable_use_f32: 639 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 640 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 641 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]] 642 ; GCN: v_mul_f32_e64 [[RESULT:v[0-9]+]], -[[MAX]], [[B]] 643 ; GCN: buffer_store_dword [[RESULT]] 644 define amdgpu_kernel void @v_fneg_0_maxnum_foldable_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 645 %tid = call i32 @llvm.amdgcn.workitem.id.x() 646 %tid.ext = sext i32 %tid to i64 647 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 648 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 649 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 650 %a = load volatile float, float addrspace(1)* %a.gep 651 %b = load volatile float, float addrspace(1)* %b.gep 652 %max = call float @llvm.maxnum.f32(float 0.0, float %a) 653 %fneg = fsub float -0.000000e+00, %max 654 %mul = fmul float %fneg, %b 655 store float %mul, float addrspace(1)* %out.gep 656 ret void 657 } 658 659 ; GCN-LABEL: {{^}}v_fneg_maxnum_multi_use_maxnum_f32: 660 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 661 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 662 ; GCN: v_min_f32_e64 [[MAX0:v[0-9]+]], -[[A]], -[[B]] 663 ; GCN-NEXT: v_mul_f32_e32 [[MUL1:v[0-9]+]], -4.0, [[MAX0]] 664 ; GCN-NEXT: buffer_store_dword [[MAX0]] 665 ; GCN-NEXT: buffer_store_dword [[MUL1]] 666 define amdgpu_kernel void @v_fneg_maxnum_multi_use_maxnum_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 667 %tid = call i32 @llvm.amdgcn.workitem.id.x() 668 %tid.ext = sext i32 %tid to i64 669 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 670 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 671 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 672 %a = load volatile float, float addrspace(1)* %a.gep 673 %b = load volatile float, float addrspace(1)* %b.gep 674 %min = call float @llvm.maxnum.f32(float %a, float %b) 675 %fneg = fsub float -0.000000e+00, %min 676 %use1 = fmul float %min, 4.0 677 store volatile float %fneg, float addrspace(1)* %out 678 store volatile float %use1, float addrspace(1)* %out 679 ret void 680 } 681 682 ; -------------------------------------------------------------------------------- 683 ; fma tests 684 ; -------------------------------------------------------------------------------- 685 686 ; GCN-LABEL: {{^}}v_fneg_fma_f32: 687 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 688 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 689 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 690 691 ; GCN-SAFE: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], [[B]], [[C]] 692 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[RESULT]] 693 694 ; GCN-NSZ: v_fma_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 695 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 696 define amdgpu_kernel void @v_fneg_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 697 %tid = call i32 @llvm.amdgcn.workitem.id.x() 698 %tid.ext = sext i32 %tid to i64 699 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 700 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 701 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 702 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 703 %a = load volatile float, float addrspace(1)* %a.gep 704 %b = load volatile float, float addrspace(1)* %b.gep 705 %c = load volatile float, float addrspace(1)* %c.gep 706 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 707 %fneg = fsub float -0.000000e+00, %fma 708 store float %fneg, float addrspace(1)* %out.gep 709 ret void 710 } 711 712 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fma_f32: 713 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 714 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 715 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 716 ; GCN-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 717 ; GCN-DAG: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 718 ; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 719 ; GCN-NEXT: buffer_store_dword [[FMA]] 720 define amdgpu_kernel void @v_fneg_fma_store_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 721 %tid = call i32 @llvm.amdgcn.workitem.id.x() 722 %tid.ext = sext i32 %tid to i64 723 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 724 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 725 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 726 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 727 %a = load volatile float, float addrspace(1)* %a.gep 728 %b = load volatile float, float addrspace(1)* %b.gep 729 %c = load volatile float, float addrspace(1)* %c.gep 730 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 731 %fneg = fsub float -0.000000e+00, %fma 732 store volatile float %fneg, float addrspace(1)* %out 733 store volatile float %fma, float addrspace(1)* %out 734 ret void 735 } 736 737 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fma_f32: 738 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 739 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 740 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 741 742 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 743 ; GCN-SAFE: v_xor_b32_e32 [[NEG_FMA:v[0-9]+]], 0x80000000, [[FMA]] 744 ; GCN-SAFE: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[FMA]] 745 746 ; GCN-NSZ: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 747 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_FMA]] 748 749 ; GCN-NEXT: buffer_store_dword [[NEG_FMA]] 750 ; GCN-NEXT: buffer_store_dword [[MUL]] 751 define amdgpu_kernel void @v_fneg_fma_multi_use_fma_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 752 %tid = call i32 @llvm.amdgcn.workitem.id.x() 753 %tid.ext = sext i32 %tid to i64 754 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 755 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 756 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 757 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 758 %a = load volatile float, float addrspace(1)* %a.gep 759 %b = load volatile float, float addrspace(1)* %b.gep 760 %c = load volatile float, float addrspace(1)* %c.gep 761 %fma = call float @llvm.fma.f32(float %a, float %b, float %c) 762 %fneg = fsub float -0.000000e+00, %fma 763 %use1 = fmul float %fma, 4.0 764 store volatile float %fneg, float addrspace(1)* %out 765 store volatile float %use1, float addrspace(1)* %out 766 ret void 767 } 768 769 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_y_f32: 770 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 771 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 772 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 773 774 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], [[C]] 775 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 776 777 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 778 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 779 define amdgpu_kernel void @v_fneg_fma_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 780 %tid = call i32 @llvm.amdgcn.workitem.id.x() 781 %tid.ext = sext i32 %tid to i64 782 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 783 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 784 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 785 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 786 %a = load volatile float, float addrspace(1)* %a.gep 787 %b = load volatile float, float addrspace(1)* %b.gep 788 %c = load volatile float, float addrspace(1)* %c.gep 789 %fneg.a = fsub float -0.000000e+00, %a 790 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 791 %fneg = fsub float -0.000000e+00, %fma 792 store volatile float %fneg, float addrspace(1)* %out 793 ret void 794 } 795 796 ; GCN-LABEL: {{^}}v_fneg_fma_x_fneg_y_f32: 797 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 798 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 799 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 800 801 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 802 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 803 804 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 805 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 806 define amdgpu_kernel void @v_fneg_fma_x_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 807 %tid = call i32 @llvm.amdgcn.workitem.id.x() 808 %tid.ext = sext i32 %tid to i64 809 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 810 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 811 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 812 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 813 %a = load volatile float, float addrspace(1)* %a.gep 814 %b = load volatile float, float addrspace(1)* %b.gep 815 %c = load volatile float, float addrspace(1)* %c.gep 816 %fneg.b = fsub float -0.000000e+00, %b 817 %fma = call float @llvm.fma.f32(float %a, float %fneg.b, float %c) 818 %fneg = fsub float -0.000000e+00, %fma 819 store volatile float %fneg, float addrspace(1)* %out 820 ret void 821 } 822 823 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_fneg_y_f32: 824 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 825 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 826 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 827 828 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], -[[B]], [[C]] 829 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 830 831 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], -[[C]] 832 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 833 define amdgpu_kernel void @v_fneg_fma_fneg_fneg_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 834 %tid = call i32 @llvm.amdgcn.workitem.id.x() 835 %tid.ext = sext i32 %tid to i64 836 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 837 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 838 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 839 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 840 %a = load volatile float, float addrspace(1)* %a.gep 841 %b = load volatile float, float addrspace(1)* %b.gep 842 %c = load volatile float, float addrspace(1)* %c.gep 843 %fneg.a = fsub float -0.000000e+00, %a 844 %fneg.b = fsub float -0.000000e+00, %b 845 %fma = call float @llvm.fma.f32(float %fneg.a, float %fneg.b, float %c) 846 %fneg = fsub float -0.000000e+00, %fma 847 store volatile float %fneg, float addrspace(1)* %out 848 ret void 849 } 850 851 ; GCN-LABEL: {{^}}v_fneg_fma_fneg_x_fneg_f32: 852 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 853 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 854 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 855 856 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], [[B]], -[[C]] 857 ; GCN-SAFE: v_xor_b32_e32 v{{[[0-9]+}}, 0x80000000, [[FMA]] 858 859 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], [[C]] 860 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 861 define amdgpu_kernel void @v_fneg_fma_fneg_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 862 %tid = call i32 @llvm.amdgcn.workitem.id.x() 863 %tid.ext = sext i32 %tid to i64 864 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 865 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 866 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 867 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 868 %a = load volatile float, float addrspace(1)* %a.gep 869 %b = load volatile float, float addrspace(1)* %b.gep 870 %c = load volatile float, float addrspace(1)* %c.gep 871 %fneg.a = fsub float -0.000000e+00, %a 872 %fneg.c = fsub float -0.000000e+00, %c 873 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %fneg.c) 874 %fneg = fsub float -0.000000e+00, %fma 875 store volatile float %fneg, float addrspace(1)* %out 876 ret void 877 } 878 879 ; GCN-LABEL: {{^}}v_fneg_fma_x_y_fneg_f32: 880 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 881 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 882 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 883 884 ; GCN-NSZ-SAFE: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 885 ; GCN-NSZ-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 886 887 ; GCN-NSZ: v_fma_f32 [[FMA:v[0-9]+]], [[A]], -[[B]], [[C]] 888 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 889 define amdgpu_kernel void @v_fneg_fma_x_y_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 890 %tid = call i32 @llvm.amdgcn.workitem.id.x() 891 %tid.ext = sext i32 %tid to i64 892 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 893 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 894 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 895 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 896 %a = load volatile float, float addrspace(1)* %a.gep 897 %b = load volatile float, float addrspace(1)* %b.gep 898 %c = load volatile float, float addrspace(1)* %c.gep 899 %fneg.c = fsub float -0.000000e+00, %c 900 %fma = call float @llvm.fma.f32(float %a, float %b, float %fneg.c) 901 %fneg = fsub float -0.000000e+00, %fma 902 store volatile float %fneg, float addrspace(1)* %out 903 ret void 904 } 905 906 ; GCN-LABEL: {{^}}v_fneg_fma_store_use_fneg_x_y_f32: 907 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 908 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 909 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 910 911 ; GCN-SAFE: v_xor_b32 912 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]], -[[A]], 913 ; GCN-SAFE: v_xor_b32 914 915 ; GCN-NSZ-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 916 ; GCN-NSZ-DAG: v_fma_f32 [[FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 917 ; GCN-NSZ-NEXT: buffer_store_dword [[FMA]] 918 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_A]] 919 define amdgpu_kernel void @v_fneg_fma_store_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 920 %tid = call i32 @llvm.amdgcn.workitem.id.x() 921 %tid.ext = sext i32 %tid to i64 922 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 923 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 924 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 925 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 926 %a = load volatile float, float addrspace(1)* %a.gep 927 %b = load volatile float, float addrspace(1)* %b.gep 928 %c = load volatile float, float addrspace(1)* %c.gep 929 %fneg.a = fsub float -0.000000e+00, %a 930 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 931 %fneg = fsub float -0.000000e+00, %fma 932 store volatile float %fneg, float addrspace(1)* %out 933 store volatile float %fneg.a, float addrspace(1)* %out 934 ret void 935 } 936 937 ; GCN-LABEL: {{^}}v_fneg_fma_multi_use_fneg_x_y_f32: 938 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 939 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 940 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 941 942 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 943 ; GCN-SAFE: v_fma_f32 [[FMA:v[0-9]+]] 944 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[FMA]] 945 946 ; GCN-NSZ-DAG: v_fma_f32 [[NEG_FMA:v[0-9]+]], [[A]], [[B]], -[[C]] 947 ; GCN-NSZ-NEXT: buffer_store_dword [[NEG_FMA]] 948 ; GCN-NSZ-NEXT: buffer_store_dword [[MUL]] 949 define amdgpu_kernel void @v_fneg_fma_multi_use_fneg_x_y_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float %d) #0 { 950 %tid = call i32 @llvm.amdgcn.workitem.id.x() 951 %tid.ext = sext i32 %tid to i64 952 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 953 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 954 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 955 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 956 %a = load volatile float, float addrspace(1)* %a.gep 957 %b = load volatile float, float addrspace(1)* %b.gep 958 %c = load volatile float, float addrspace(1)* %c.gep 959 %fneg.a = fsub float -0.000000e+00, %a 960 %fma = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 961 %fneg = fsub float -0.000000e+00, %fma 962 %use1 = fmul float %fneg.a, %d 963 store volatile float %fneg, float addrspace(1)* %out 964 store volatile float %use1, float addrspace(1)* %out 965 ret void 966 } 967 968 ; -------------------------------------------------------------------------------- 969 ; fmad tests 970 ; -------------------------------------------------------------------------------- 971 972 ; GCN-LABEL: {{^}}v_fneg_fmad_f32: 973 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 974 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 975 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 976 977 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 978 ; GCN-SAFE: v_xor_b32_e32 v{{[0-9]+}}, 0x80000000, [[C]] 979 980 ; GCN-NSZ: v_mad_f32 [[RESULT:v[0-9]+]], [[A]], -[[B]], -[[C]] 981 ; GCN-NSZ-NEXT: buffer_store_dword [[RESULT]] 982 define amdgpu_kernel void @v_fneg_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 983 %tid = call i32 @llvm.amdgcn.workitem.id.x() 984 %tid.ext = sext i32 %tid to i64 985 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 986 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 987 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 988 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 989 %a = load volatile float, float addrspace(1)* %a.gep 990 %b = load volatile float, float addrspace(1)* %b.gep 991 %c = load volatile float, float addrspace(1)* %c.gep 992 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 993 %fneg = fsub float -0.000000e+00, %fma 994 store float %fneg, float addrspace(1)* %out.gep 995 ret void 996 } 997 998 ; GCN-LABEL: {{^}}v_fneg_fmad_multi_use_fmad_f32: 999 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1000 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1001 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1002 1003 ; GCN-SAFE: v_mac_f32_e32 [[C]], [[A]], [[B]] 1004 ; GCN-SAFE: v_xor_b32_e32 [[NEG_MAD:v[0-9]+]], 0x80000000, [[C]] 1005 ; GCN-SAFE-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], 4.0, [[C]] 1006 1007 ; GCN-NSZ: v_mad_f32 [[NEG_MAD:v[0-9]+]], -[[A]], [[B]], -[[C]] 1008 ; GCN-NSZ-NEXT: v_mul_f32_e32 [[MUL:v[0-9]+]], -4.0, [[NEG_MAD]] 1009 1010 ; GCN: buffer_store_dword [[NEG_MAD]] 1011 ; GCN-NEXT: buffer_store_dword [[MUL]] 1012 define amdgpu_kernel void @v_fneg_fmad_multi_use_fmad_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1013 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1014 %tid.ext = sext i32 %tid to i64 1015 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1016 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1017 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1018 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1019 %a = load volatile float, float addrspace(1)* %a.gep 1020 %b = load volatile float, float addrspace(1)* %b.gep 1021 %c = load volatile float, float addrspace(1)* %c.gep 1022 %fma = call float @llvm.fmuladd.f32(float %a, float %b, float %c) 1023 %fneg = fsub float -0.000000e+00, %fma 1024 %use1 = fmul float %fma, 4.0 1025 store volatile float %fneg, float addrspace(1)* %out 1026 store volatile float %use1, float addrspace(1)* %out 1027 ret void 1028 } 1029 1030 ; -------------------------------------------------------------------------------- 1031 ; fp_extend tests 1032 ; -------------------------------------------------------------------------------- 1033 1034 ; GCN-LABEL: {{^}}v_fneg_fp_extend_f32_to_f64: 1035 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1036 ; GCN: v_cvt_f64_f32_e64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]] 1037 ; GCN: buffer_store_dwordx2 [[RESULT]] 1038 define amdgpu_kernel void @v_fneg_fp_extend_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1039 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1040 %tid.ext = sext i32 %tid to i64 1041 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1042 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1043 %a = load volatile float, float addrspace(1)* %a.gep 1044 %fpext = fpext float %a to double 1045 %fneg = fsub double -0.000000e+00, %fpext 1046 store double %fneg, double addrspace(1)* %out.gep 1047 ret void 1048 } 1049 1050 ; GCN-LABEL: {{^}}v_fneg_fp_extend_fneg_f32_to_f64: 1051 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1052 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1053 ; GCN: buffer_store_dwordx2 [[RESULT]] 1054 define amdgpu_kernel void @v_fneg_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1055 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1056 %tid.ext = sext i32 %tid to i64 1057 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1058 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1059 %a = load volatile float, float addrspace(1)* %a.gep 1060 %fneg.a = fsub float -0.000000e+00, %a 1061 %fpext = fpext float %fneg.a to double 1062 %fneg = fsub double -0.000000e+00, %fpext 1063 store double %fneg, double addrspace(1)* %out.gep 1064 ret void 1065 } 1066 1067 ; GCN-LABEL: {{^}}v_fneg_fp_extend_store_use_fneg_f32_to_f64: 1068 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1069 ; GCN-DAG: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]] 1070 ; GCN-DAG: v_xor_b32_e32 [[FNEG_A:v[0-9]+]], 0x80000000, [[A]] 1071 ; GCN: buffer_store_dwordx2 [[RESULT]] 1072 ; GCN: buffer_store_dword [[FNEG_A]] 1073 define amdgpu_kernel void @v_fneg_fp_extend_store_use_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1074 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1075 %tid.ext = sext i32 %tid to i64 1076 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1077 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1078 %a = load volatile float, float addrspace(1)* %a.gep 1079 %fneg.a = fsub float -0.000000e+00, %a 1080 %fpext = fpext float %fneg.a to double 1081 %fneg = fsub double -0.000000e+00, %fpext 1082 store volatile double %fneg, double addrspace(1)* %out.gep 1083 store volatile float %fneg.a, float addrspace(1)* undef 1084 ret void 1085 } 1086 1087 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f32_to_f64: 1088 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1089 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1090 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1091 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1092 ; GCN: buffer_store_dwordx2 v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}} 1093 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1094 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1095 %tid.ext = sext i32 %tid to i64 1096 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1097 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1098 %a = load volatile float, float addrspace(1)* %a.gep 1099 %fpext = fpext float %a to double 1100 %fneg = fsub double -0.000000e+00, %fpext 1101 store volatile double %fneg, double addrspace(1)* %out.gep 1102 store volatile double %fpext, double addrspace(1)* undef 1103 ret void 1104 } 1105 1106 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64: 1107 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1108 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT_LO:[0-9]+]]:[[CVT_HI:[0-9]+]]{{\]}}, [[A]] 1109 ; GCN-DAG: v_xor_b32_e32 v[[FNEG_A:[0-9]+]], 0x80000000, v[[CVT_HI]] 1110 ; GCN-DAG: v_mul_f64 [[MUL:v\[[0-9]+:[0-9]+\]]], v{{\[}}[[CVT_LO]]:[[CVT_HI]]{{\]}}, 4.0 1111 ; GCN: buffer_store_dwordx2 v{{\[[0-9]+}}:[[FNEG_A]]{{\]}} 1112 ; GCN: buffer_store_dwordx2 [[MUL]] 1113 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f32_to_f64(double addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1114 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1115 %tid.ext = sext i32 %tid to i64 1116 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1117 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 1118 %a = load volatile float, float addrspace(1)* %a.gep 1119 %fpext = fpext float %a to double 1120 %fneg = fsub double -0.000000e+00, %fpext 1121 %mul = fmul double %fpext, 4.0 1122 store volatile double %fneg, double addrspace(1)* %out.gep 1123 store volatile double %mul, double addrspace(1)* %out.gep 1124 ret void 1125 } 1126 1127 ; FIXME: Source modifiers not folded for f16->f32 1128 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_extend_fneg_f16_to_f32: 1129 define amdgpu_kernel void @v_fneg_multi_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1130 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1131 %tid.ext = sext i32 %tid to i64 1132 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1133 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1134 %a = load volatile half, half addrspace(1)* %a.gep 1135 %fpext = fpext half %a to float 1136 %fneg = fsub float -0.000000e+00, %fpext 1137 store volatile float %fneg, float addrspace(1)* %out.gep 1138 store volatile float %fpext, float addrspace(1)* %out.gep 1139 ret void 1140 } 1141 1142 ; GCN-LABEL: {{^}}v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32: 1143 define amdgpu_kernel void @v_fneg_multi_foldable_use_fp_extend_fneg_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %a.ptr) #0 { 1144 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1145 %tid.ext = sext i32 %tid to i64 1146 %a.gep = getelementptr inbounds half, half addrspace(1)* %a.ptr, i64 %tid.ext 1147 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1148 %a = load volatile half, half addrspace(1)* %a.gep 1149 %fpext = fpext half %a to float 1150 %fneg = fsub float -0.000000e+00, %fpext 1151 %mul = fmul float %fpext, 4.0 1152 store volatile float %fneg, float addrspace(1)* %out.gep 1153 store volatile float %mul, float addrspace(1)* %out.gep 1154 ret void 1155 } 1156 1157 ; -------------------------------------------------------------------------------- 1158 ; fp_round tests 1159 ; -------------------------------------------------------------------------------- 1160 1161 ; GCN-LABEL: {{^}}v_fneg_fp_round_f64_to_f32: 1162 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1163 ; GCN: v_cvt_f32_f64_e64 [[RESULT:v[0-9]+]], -[[A]] 1164 ; GCN: buffer_store_dword [[RESULT]] 1165 define amdgpu_kernel void @v_fneg_fp_round_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1166 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1167 %tid.ext = sext i32 %tid to i64 1168 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1169 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1170 %a = load volatile double, double addrspace(1)* %a.gep 1171 %fpround = fptrunc double %a to float 1172 %fneg = fsub float -0.000000e+00, %fpround 1173 store float %fneg, float addrspace(1)* %out.gep 1174 ret void 1175 } 1176 1177 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f64_to_f32: 1178 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1179 ; GCN: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1180 ; GCN: buffer_store_dword [[RESULT]] 1181 define amdgpu_kernel void @v_fneg_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1182 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1183 %tid.ext = sext i32 %tid to i64 1184 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1185 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1186 %a = load volatile double, double addrspace(1)* %a.gep 1187 %fneg.a = fsub double -0.000000e+00, %a 1188 %fpround = fptrunc double %fneg.a to float 1189 %fneg = fsub float -0.000000e+00, %fpround 1190 store float %fneg, float addrspace(1)* %out.gep 1191 ret void 1192 } 1193 1194 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f64_to_f32: 1195 ; GCN: {{buffer|flat}}_load_dwordx2 v{{\[}}[[A_LO:[0-9]+]]:[[A_HI:[0-9]+]]{{\]}} 1196 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], v{{\[}}[[A_LO]]:[[A_HI]]{{\]}} 1197 ; GCN-DAG: v_xor_b32_e32 v[[NEG_A_HI:[0-9]+]], 0x80000000, v[[A_HI]] 1198 ; GCN: buffer_store_dword [[RESULT]] 1199 ; GCN: buffer_store_dwordx2 v{{\[}}[[A_LO]]:[[NEG_A_HI]]{{\]}} 1200 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1201 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1202 %tid.ext = sext i32 %tid to i64 1203 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1204 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1205 %a = load volatile double, double addrspace(1)* %a.gep 1206 %fneg.a = fsub double -0.000000e+00, %a 1207 %fpround = fptrunc double %fneg.a to float 1208 %fneg = fsub float -0.000000e+00, %fpround 1209 store volatile float %fneg, float addrspace(1)* %out.gep 1210 store volatile double %fneg.a, double addrspace(1)* undef 1211 ret void 1212 } 1213 1214 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f64_to_f32: 1215 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1216 ; GCN-DAG: v_cvt_f32_f64_e32 [[RESULT:v[0-9]+]], [[A]] 1217 ; GCN-DAG: v_mul_f64 [[USE1:v\[[0-9]+:[0-9]+\]]], -[[A]], s{{\[}} 1218 ; GCN: buffer_store_dword [[RESULT]] 1219 ; GCN: buffer_store_dwordx2 [[USE1]] 1220 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr, double %c) #0 { 1221 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1222 %tid.ext = sext i32 %tid to i64 1223 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1224 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1225 %a = load volatile double, double addrspace(1)* %a.gep 1226 %fneg.a = fsub double -0.000000e+00, %a 1227 %fpround = fptrunc double %fneg.a to float 1228 %fneg = fsub float -0.000000e+00, %fpround 1229 %use1 = fmul double %fneg.a, %c 1230 store volatile float %fneg, float addrspace(1)* %out.gep 1231 store volatile double %use1, double addrspace(1)* undef 1232 ret void 1233 } 1234 1235 ; GCN-LABEL: {{^}}v_fneg_fp_round_f32_to_f16: 1236 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1237 ; GCN: v_cvt_f16_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1238 ; GCN: buffer_store_short [[RESULT]] 1239 define amdgpu_kernel void @v_fneg_fp_round_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1240 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1241 %tid.ext = sext i32 %tid to i64 1242 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1243 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1244 %a = load volatile float, float addrspace(1)* %a.gep 1245 %fpround = fptrunc float %a to half 1246 %fneg = fsub half -0.000000e+00, %fpround 1247 store half %fneg, half addrspace(1)* %out.gep 1248 ret void 1249 } 1250 1251 ; GCN-LABEL: {{^}}v_fneg_fp_round_fneg_f32_to_f16: 1252 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1253 ; GCN: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1254 ; GCN: buffer_store_short [[RESULT]] 1255 define amdgpu_kernel void @v_fneg_fp_round_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1256 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1257 %tid.ext = sext i32 %tid to i64 1258 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1259 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1260 %a = load volatile float, float addrspace(1)* %a.gep 1261 %fneg.a = fsub float -0.000000e+00, %a 1262 %fpround = fptrunc float %fneg.a to half 1263 %fneg = fsub half -0.000000e+00, %fpround 1264 store half %fneg, half addrspace(1)* %out.gep 1265 ret void 1266 } 1267 1268 ; GCN-LABEL: {{^}}v_fneg_multi_use_fp_round_fneg_f64_to_f32: 1269 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 1270 ; GCN-DAG: v_cvt_f32_f64_e32 [[CVT:v[0-9]+]], [[A]] 1271 ; GCN-DAG: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[CVT]] 1272 ; GCN: buffer_store_dword [[NEG]] 1273 ; GCN: buffer_store_dword [[CVT]] 1274 define amdgpu_kernel void @v_fneg_multi_use_fp_round_fneg_f64_to_f32(float addrspace(1)* %out, double addrspace(1)* %a.ptr) #0 { 1275 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1276 %tid.ext = sext i32 %tid to i64 1277 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 1278 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1279 %a = load volatile double, double addrspace(1)* %a.gep 1280 %fpround = fptrunc double %a to float 1281 %fneg = fsub float -0.000000e+00, %fpround 1282 store volatile float %fneg, float addrspace(1)* %out.gep 1283 store volatile float %fpround, float addrspace(1)* %out.gep 1284 ret void 1285 } 1286 1287 ; GCN-LABEL: {{^}}v_fneg_fp_round_store_use_fneg_f32_to_f16: 1288 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1289 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1290 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1291 ; GCN: buffer_store_short [[RESULT]] 1292 ; GCN: buffer_store_dword [[NEG_A]] 1293 define amdgpu_kernel void @v_fneg_fp_round_store_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1294 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1295 %tid.ext = sext i32 %tid to i64 1296 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1297 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1298 %a = load volatile float, float addrspace(1)* %a.gep 1299 %fneg.a = fsub float -0.000000e+00, %a 1300 %fpround = fptrunc float %fneg.a to half 1301 %fneg = fsub half -0.000000e+00, %fpround 1302 store volatile half %fneg, half addrspace(1)* %out.gep 1303 store volatile float %fneg.a, float addrspace(1)* undef 1304 ret void 1305 } 1306 1307 ; GCN-LABEL: {{^}}v_fneg_fp_round_multi_use_fneg_f32_to_f16: 1308 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1309 ; GCN-DAG: v_cvt_f16_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1310 ; GCN-DAG: v_mul_f32_e64 [[USE1:v[0-9]+]], -[[A]], s 1311 ; GCN: buffer_store_short [[RESULT]] 1312 ; GCN: buffer_store_dword [[USE1]] 1313 define amdgpu_kernel void @v_fneg_fp_round_multi_use_fneg_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1314 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1315 %tid.ext = sext i32 %tid to i64 1316 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1317 %out.gep = getelementptr inbounds half, half addrspace(1)* %out, i64 %tid.ext 1318 %a = load volatile float, float addrspace(1)* %a.gep 1319 %fneg.a = fsub float -0.000000e+00, %a 1320 %fpround = fptrunc float %fneg.a to half 1321 %fneg = fsub half -0.000000e+00, %fpround 1322 %use1 = fmul float %fneg.a, %c 1323 store volatile half %fneg, half addrspace(1)* %out.gep 1324 store volatile float %use1, float addrspace(1)* undef 1325 ret void 1326 } 1327 1328 ; -------------------------------------------------------------------------------- 1329 ; rcp tests 1330 ; -------------------------------------------------------------------------------- 1331 1332 ; GCN-LABEL: {{^}}v_fneg_rcp_f32: 1333 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1334 ; GCN: v_rcp_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1335 ; GCN: buffer_store_dword [[RESULT]] 1336 define amdgpu_kernel void @v_fneg_rcp_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1337 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1338 %tid.ext = sext i32 %tid to i64 1339 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1340 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1341 %a = load volatile float, float addrspace(1)* %a.gep 1342 %rcp = call float @llvm.amdgcn.rcp.f32(float %a) 1343 %fneg = fsub float -0.000000e+00, %rcp 1344 store float %fneg, float addrspace(1)* %out.gep 1345 ret void 1346 } 1347 1348 ; GCN-LABEL: {{^}}v_fneg_rcp_fneg_f32: 1349 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1350 ; GCN: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1351 ; GCN: buffer_store_dword [[RESULT]] 1352 define amdgpu_kernel void @v_fneg_rcp_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1353 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1354 %tid.ext = sext i32 %tid to i64 1355 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1356 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1357 %a = load volatile float, float addrspace(1)* %a.gep 1358 %fneg.a = fsub float -0.000000e+00, %a 1359 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1360 %fneg = fsub float -0.000000e+00, %rcp 1361 store float %fneg, float addrspace(1)* %out.gep 1362 ret void 1363 } 1364 1365 ; GCN-LABEL: {{^}}v_fneg_rcp_store_use_fneg_f32: 1366 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1367 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1368 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1369 ; GCN: buffer_store_dword [[RESULT]] 1370 ; GCN: buffer_store_dword [[NEG_A]] 1371 define amdgpu_kernel void @v_fneg_rcp_store_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1372 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1373 %tid.ext = sext i32 %tid to i64 1374 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1375 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1376 %a = load volatile float, float addrspace(1)* %a.gep 1377 %fneg.a = fsub float -0.000000e+00, %a 1378 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1379 %fneg = fsub float -0.000000e+00, %rcp 1380 store volatile float %fneg, float addrspace(1)* %out.gep 1381 store volatile float %fneg.a, float addrspace(1)* undef 1382 ret void 1383 } 1384 1385 ; GCN-LABEL: {{^}}v_fneg_rcp_multi_use_fneg_f32: 1386 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1387 ; GCN-DAG: v_rcp_f32_e32 [[RESULT:v[0-9]+]], [[A]] 1388 ; GCN-DAG: v_mul_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1389 ; GCN: buffer_store_dword [[RESULT]] 1390 ; GCN: buffer_store_dword [[MUL]] 1391 define amdgpu_kernel void @v_fneg_rcp_multi_use_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float %c) #0 { 1392 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1393 %tid.ext = sext i32 %tid to i64 1394 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1395 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1396 %a = load volatile float, float addrspace(1)* %a.gep 1397 %fneg.a = fsub float -0.000000e+00, %a 1398 %rcp = call float @llvm.amdgcn.rcp.f32(float %fneg.a) 1399 %fneg = fsub float -0.000000e+00, %rcp 1400 %use1 = fmul float %fneg.a, %c 1401 store volatile float %fneg, float addrspace(1)* %out.gep 1402 store volatile float %use1, float addrspace(1)* undef 1403 ret void 1404 } 1405 1406 ; -------------------------------------------------------------------------------- 1407 ; rcp_legacy tests 1408 ; -------------------------------------------------------------------------------- 1409 1410 ; GCN-LABEL: {{^}}v_fneg_rcp_legacy_f32: 1411 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1412 ; GCN: v_rcp_legacy_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1413 ; GCN: buffer_store_dword [[RESULT]] 1414 define amdgpu_kernel void @v_fneg_rcp_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1415 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1416 %tid.ext = sext i32 %tid to i64 1417 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1418 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1419 %a = load volatile float, float addrspace(1)* %a.gep 1420 %rcp = call float @llvm.amdgcn.rcp.legacy(float %a) 1421 %fneg = fsub float -0.000000e+00, %rcp 1422 store float %fneg, float addrspace(1)* %out.gep 1423 ret void 1424 } 1425 1426 ; -------------------------------------------------------------------------------- 1427 ; fmul_legacy tests 1428 ; -------------------------------------------------------------------------------- 1429 1430 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_f32: 1431 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1432 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1433 ; GCN: v_mul_legacy_f32_e64 [[RESULT:v[0-9]+]], [[A]], -[[B]] 1434 ; GCN-NEXT: buffer_store_dword [[RESULT]] 1435 define amdgpu_kernel void @v_fneg_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1436 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1437 %tid.ext = sext i32 %tid to i64 1438 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1439 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1440 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1441 %a = load volatile float, float addrspace(1)* %a.gep 1442 %b = load volatile float, float addrspace(1)* %b.gep 1443 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1444 %fneg = fsub float -0.000000e+00, %mul 1445 store float %fneg, float addrspace(1)* %out.gep 1446 ret void 1447 } 1448 1449 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_mul_legacy_f32: 1450 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1451 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1452 ; GCN-DAG: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1453 ; GCN-DAG: v_xor_b32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], 0x80000000, [[ADD]] 1454 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1455 ; GCN: buffer_store_dword [[ADD]] 1456 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1457 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1458 %tid.ext = sext i32 %tid to i64 1459 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1460 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1461 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1462 %a = load volatile float, float addrspace(1)* %a.gep 1463 %b = load volatile float, float addrspace(1)* %b.gep 1464 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1465 %fneg = fsub float -0.000000e+00, %mul 1466 store volatile float %fneg, float addrspace(1)* %out 1467 store volatile float %mul, float addrspace(1)* %out 1468 ret void 1469 } 1470 1471 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_mul_legacy_f32: 1472 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1473 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1474 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1475 ; GCN-NEXT: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[ADD]], 4.0 1476 ; GCN-NEXT: buffer_store_dword [[ADD]] 1477 ; GCN-NEXT: buffer_store_dword [[MUL]] 1478 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_mul_legacy_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1479 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1480 %tid.ext = sext i32 %tid to i64 1481 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1482 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1483 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1484 %a = load volatile float, float addrspace(1)* %a.gep 1485 %b = load volatile float, float addrspace(1)* %b.gep 1486 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %b) 1487 %fneg = fsub float -0.000000e+00, %mul 1488 %use1 = call float @llvm.amdgcn.fmul.legacy(float %mul, float 4.0) 1489 store volatile float %fneg, float addrspace(1)* %out 1490 store volatile float %use1, float addrspace(1)* %out 1491 ret void 1492 } 1493 1494 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_x_f32: 1495 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1496 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1497 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1498 ; GCN-NEXT: buffer_store_dword [[ADD]] 1499 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1500 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1501 %tid.ext = sext i32 %tid to i64 1502 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1503 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1504 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1505 %a = load volatile float, float addrspace(1)* %a.gep 1506 %b = load volatile float, float addrspace(1)* %b.gep 1507 %fneg.a = fsub float -0.000000e+00, %a 1508 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1509 %fneg = fsub float -0.000000e+00, %mul 1510 store volatile float %fneg, float addrspace(1)* %out 1511 ret void 1512 } 1513 1514 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_x_fneg_f32: 1515 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1516 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1517 ; GCN: v_mul_legacy_f32_e32 [[ADD:v[0-9]+]], [[A]], [[B]] 1518 ; GCN-NEXT: buffer_store_dword [[ADD]] 1519 define amdgpu_kernel void @v_fneg_mul_legacy_x_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1520 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1521 %tid.ext = sext i32 %tid to i64 1522 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1523 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1524 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1525 %a = load volatile float, float addrspace(1)* %a.gep 1526 %b = load volatile float, float addrspace(1)* %b.gep 1527 %fneg.b = fsub float -0.000000e+00, %b 1528 %mul = call float @llvm.amdgcn.fmul.legacy(float %a, float %fneg.b) 1529 %fneg = fsub float -0.000000e+00, %mul 1530 store volatile float %fneg, float addrspace(1)* %out 1531 ret void 1532 } 1533 1534 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_fneg_fneg_f32: 1535 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1536 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1537 ; GCN: v_mul_legacy_f32_e64 [[ADD:v[0-9]+]], [[A]], -[[B]] 1538 ; GCN-NEXT: buffer_store_dword [[ADD]] 1539 define amdgpu_kernel void @v_fneg_mul_legacy_fneg_fneg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1540 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1541 %tid.ext = sext i32 %tid to i64 1542 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1543 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1544 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1545 %a = load volatile float, float addrspace(1)* %a.gep 1546 %b = load volatile float, float addrspace(1)* %b.gep 1547 %fneg.a = fsub float -0.000000e+00, %a 1548 %fneg.b = fsub float -0.000000e+00, %b 1549 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %fneg.b) 1550 %fneg = fsub float -0.000000e+00, %mul 1551 store volatile float %fneg, float addrspace(1)* %out 1552 ret void 1553 } 1554 1555 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_store_use_fneg_x_f32: 1556 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1557 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1558 ; GCN-DAG: v_xor_b32_e32 [[NEG_A:v[0-9]+]], 0x80000000, [[A]] 1559 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1560 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1561 ; GCN: buffer_store_dword [[NEG_A]] 1562 define amdgpu_kernel void @v_fneg_mul_legacy_store_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1563 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1564 %tid.ext = sext i32 %tid to i64 1565 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1566 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1567 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1568 %a = load volatile float, float addrspace(1)* %a.gep 1569 %b = load volatile float, float addrspace(1)* %b.gep 1570 %fneg.a = fsub float -0.000000e+00, %a 1571 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1572 %fneg = fsub float -0.000000e+00, %mul 1573 store volatile float %fneg, float addrspace(1)* %out 1574 store volatile float %fneg.a, float addrspace(1)* %out 1575 ret void 1576 } 1577 1578 ; GCN-LABEL: {{^}}v_fneg_mul_legacy_multi_use_fneg_x_f32: 1579 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1580 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1581 ; GCN-DAG: v_mul_legacy_f32_e32 [[NEG_MUL_LEGACY:v[0-9]+]], [[A]], [[B]] 1582 ; GCN-DAG: v_mul_legacy_f32_e64 [[MUL:v[0-9]+]], -[[A]], s{{[0-9]+}} 1583 ; GCN-NEXT: buffer_store_dword [[NEG_MUL_LEGACY]] 1584 ; GCN: buffer_store_dword [[MUL]] 1585 define amdgpu_kernel void @v_fneg_mul_legacy_multi_use_fneg_x_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float %c) #0 { 1586 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1587 %tid.ext = sext i32 %tid to i64 1588 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1589 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1590 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1591 %a = load volatile float, float addrspace(1)* %a.gep 1592 %b = load volatile float, float addrspace(1)* %b.gep 1593 %fneg.a = fsub float -0.000000e+00, %a 1594 %mul = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %b) 1595 %fneg = fsub float -0.000000e+00, %mul 1596 %use1 = call float @llvm.amdgcn.fmul.legacy(float %fneg.a, float %c) 1597 store volatile float %fneg, float addrspace(1)* %out 1598 store volatile float %use1, float addrspace(1)* %out 1599 ret void 1600 } 1601 1602 ; -------------------------------------------------------------------------------- 1603 ; sin tests 1604 ; -------------------------------------------------------------------------------- 1605 1606 ; GCN-LABEL: {{^}}v_fneg_sin_f32: 1607 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1608 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbe22f983, [[A]] 1609 ; GCN: v_fract_f32_e32 [[FRACT:v[0-9]+]], [[MUL]] 1610 ; GCN: v_sin_f32_e32 [[RESULT:v[0-9]+]], [[FRACT]] 1611 ; GCN: buffer_store_dword [[RESULT]] 1612 define amdgpu_kernel void @v_fneg_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1613 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1614 %tid.ext = sext i32 %tid to i64 1615 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1616 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1617 %a = load volatile float, float addrspace(1)* %a.gep 1618 %sin = call float @llvm.sin.f32(float %a) 1619 %fneg = fsub float -0.000000e+00, %sin 1620 store float %fneg, float addrspace(1)* %out.gep 1621 ret void 1622 } 1623 1624 ; GCN-LABEL: {{^}}v_fneg_amdgcn_sin_f32: 1625 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1626 ; GCN: v_sin_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1627 ; GCN: buffer_store_dword [[RESULT]] 1628 define amdgpu_kernel void @v_fneg_amdgcn_sin_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1629 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1630 %tid.ext = sext i32 %tid to i64 1631 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1632 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1633 %a = load volatile float, float addrspace(1)* %a.gep 1634 %sin = call float @llvm.amdgcn.sin.f32(float %a) 1635 %fneg = fsub float -0.0, %sin 1636 store float %fneg, float addrspace(1)* %out.gep 1637 ret void 1638 } 1639 1640 ; -------------------------------------------------------------------------------- 1641 ; ftrunc tests 1642 ; -------------------------------------------------------------------------------- 1643 1644 ; GCN-LABEL: {{^}}v_fneg_trunc_f32: 1645 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1646 ; GCN: v_trunc_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1647 ; GCN: buffer_store_dword [[RESULT]] 1648 define amdgpu_kernel void @v_fneg_trunc_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1649 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1650 %tid.ext = sext i32 %tid to i64 1651 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1652 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1653 %a = load volatile float, float addrspace(1)* %a.gep 1654 %trunc = call float @llvm.trunc.f32(float %a) 1655 %fneg = fsub float -0.0, %trunc 1656 store float %fneg, float addrspace(1)* %out.gep 1657 ret void 1658 } 1659 1660 ; -------------------------------------------------------------------------------- 1661 ; fround tests 1662 ; -------------------------------------------------------------------------------- 1663 1664 ; GCN-LABEL: {{^}}v_fneg_round_f32: 1665 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1666 ; GCN: v_trunc_f32_e32 1667 ; GCN: v_sub_f32_e32 1668 ; GCN: v_cndmask_b32 1669 1670 ; GCN-SAFE: v_add_f32_e32 [[ADD:v[0-9]+]], v{{[0-9]+}}, v{{[0-9]+}} 1671 ; GCN-SAFE: v_xor_b32_e32 [[RESULT:v[0-9]+]], 0x80000000, [[ADD]] 1672 1673 ; GCN-NSZ: v_sub_f32_e64 [[RESULT:v[0-9]+]], -v{{[0-9]+}}, v{{[0-9]+}} 1674 ; GCN: buffer_store_dword [[RESULT]] 1675 define amdgpu_kernel void @v_fneg_round_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1676 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1677 %tid.ext = sext i32 %tid to i64 1678 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1679 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1680 %a = load volatile float, float addrspace(1)* %a.gep 1681 %round = call float @llvm.round.f32(float %a) 1682 %fneg = fsub float -0.0, %round 1683 store float %fneg, float addrspace(1)* %out.gep 1684 ret void 1685 } 1686 1687 ; -------------------------------------------------------------------------------- 1688 ; rint tests 1689 ; -------------------------------------------------------------------------------- 1690 1691 ; GCN-LABEL: {{^}}v_fneg_rint_f32: 1692 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1693 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1694 ; GCN: buffer_store_dword [[RESULT]] 1695 define amdgpu_kernel void @v_fneg_rint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1696 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1697 %tid.ext = sext i32 %tid to i64 1698 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1699 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1700 %a = load volatile float, float addrspace(1)* %a.gep 1701 %rint = call float @llvm.rint.f32(float %a) 1702 %fneg = fsub float -0.0, %rint 1703 store float %fneg, float addrspace(1)* %out.gep 1704 ret void 1705 } 1706 1707 ; -------------------------------------------------------------------------------- 1708 ; nearbyint tests 1709 ; -------------------------------------------------------------------------------- 1710 1711 ; GCN-LABEL: {{^}}v_fneg_nearbyint_f32: 1712 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1713 ; GCN: v_rndne_f32_e64 [[RESULT:v[0-9]+]], -[[A]] 1714 ; GCN: buffer_store_dword [[RESULT]] 1715 define amdgpu_kernel void @v_fneg_nearbyint_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1716 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1717 %tid.ext = sext i32 %tid to i64 1718 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1719 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1720 %a = load volatile float, float addrspace(1)* %a.gep 1721 %nearbyint = call float @llvm.nearbyint.f32(float %a) 1722 %fneg = fsub float -0.0, %nearbyint 1723 store float %fneg, float addrspace(1)* %out.gep 1724 ret void 1725 } 1726 1727 ; -------------------------------------------------------------------------------- 1728 ; fcanonicalize tests 1729 ; -------------------------------------------------------------------------------- 1730 1731 ; GCN-LABEL: {{^}}v_fneg_canonicalize_f32: 1732 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1733 ; GCN: v_mul_f32_e32 [[RESULT:v[0-9]+]], -1.0, [[A]] 1734 ; GCN: buffer_store_dword [[RESULT]] 1735 define amdgpu_kernel void @v_fneg_canonicalize_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr) #0 { 1736 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1737 %tid.ext = sext i32 %tid to i64 1738 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1739 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1740 %a = load volatile float, float addrspace(1)* %a.gep 1741 %trunc = call float @llvm.canonicalize.f32(float %a) 1742 %fneg = fsub float -0.0, %trunc 1743 store float %fneg, float addrspace(1)* %out.gep 1744 ret void 1745 } 1746 1747 ; -------------------------------------------------------------------------------- 1748 ; vintrp tests 1749 ; -------------------------------------------------------------------------------- 1750 1751 ; GCN-LABEL: {{^}}v_fneg_interp_p1_f32: 1752 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1753 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1754 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1755 ; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]] 1756 ; GCN: v_interp_p1_f32 v{{[0-9]+}}, [[MUL]] 1757 define amdgpu_kernel void @v_fneg_interp_p1_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1758 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1759 %tid.ext = sext i32 %tid to i64 1760 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1761 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1762 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1763 %a = load volatile float, float addrspace(1)* %a.gep 1764 %b = load volatile float, float addrspace(1)* %b.gep 1765 %mul = fmul float %a, %b 1766 %fneg = fsub float -0.0, %mul 1767 %intrp0 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 0, i32 0, i32 0) 1768 %intrp1 = call float @llvm.amdgcn.interp.p1(float %fneg, i32 1, i32 0, i32 0) 1769 store volatile float %intrp0, float addrspace(1)* %out.gep 1770 store volatile float %intrp1, float addrspace(1)* %out.gep 1771 ret void 1772 } 1773 1774 ; GCN-LABEL: {{^}}v_fneg_interp_p2_f32: 1775 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1776 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1777 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1778 ; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]] 1779 ; GCN: v_interp_p2_f32 v{{[0-9]+}}, [[MUL]] 1780 define amdgpu_kernel void @v_fneg_interp_p2_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr) #0 { 1781 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1782 %tid.ext = sext i32 %tid to i64 1783 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1784 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1785 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1786 %a = load volatile float, float addrspace(1)* %a.gep 1787 %b = load volatile float, float addrspace(1)* %b.gep 1788 %mul = fmul float %a, %b 1789 %fneg = fsub float -0.0, %mul 1790 %intrp0 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 0, i32 0, i32 0) 1791 %intrp1 = call float @llvm.amdgcn.interp.p2(float 4.0, float %fneg, i32 1, i32 0, i32 0) 1792 store volatile float %intrp0, float addrspace(1)* %out.gep 1793 store volatile float %intrp1, float addrspace(1)* %out.gep 1794 ret void 1795 } 1796 1797 ; -------------------------------------------------------------------------------- 1798 ; CopyToReg tests 1799 ; -------------------------------------------------------------------------------- 1800 1801 ; GCN-LABEL: {{^}}v_fneg_copytoreg_f32: 1802 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1803 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1804 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1805 ; GCN: v_mul_f32_e32 [[MUL0:v[0-9]+]], [[A]], [[B]] 1806 ; GCN: s_cbranch_scc1 1807 1808 ; GCN: v_xor_b32_e32 [[XOR:v[0-9]+]], 0x80000000, [[MUL0]] 1809 ; GCN: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[XOR]], [[C]] 1810 ; GCN: buffer_store_dword [[MUL1]] 1811 1812 ; GCN: buffer_store_dword [[MUL0]] 1813 define amdgpu_kernel void @v_fneg_copytoreg_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1814 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1815 %tid.ext = sext i32 %tid to i64 1816 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1817 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1818 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1819 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1820 %a = load volatile float, float addrspace(1)* %a.gep 1821 %b = load volatile float, float addrspace(1)* %b.gep 1822 %c = load volatile float, float addrspace(1)* %c.gep 1823 %mul = fmul float %a, %b 1824 %fneg = fsub float -0.0, %mul 1825 %cmp0 = icmp eq i32 %d, 0 1826 br i1 %cmp0, label %if, label %endif 1827 1828 if: 1829 %mul1 = fmul float %fneg, %c 1830 store volatile float %mul1, float addrspace(1)* %out.gep 1831 br label %endif 1832 1833 endif: 1834 store volatile float %mul, float addrspace(1)* %out.gep 1835 ret void 1836 } 1837 1838 ; -------------------------------------------------------------------------------- 1839 ; inlineasm tests 1840 ; -------------------------------------------------------------------------------- 1841 1842 ; Can't fold into use, so should fold into source 1843 ; GCN-LABEL: {{^}}v_fneg_inlineasm_f32: 1844 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1845 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1846 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[A]], -[[B]] 1847 ; GCN: ; use [[MUL]] 1848 ; GCN: buffer_store_dword [[MUL]] 1849 define amdgpu_kernel void @v_fneg_inlineasm_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1850 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1851 %tid.ext = sext i32 %tid to i64 1852 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1853 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1854 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1855 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1856 %a = load volatile float, float addrspace(1)* %a.gep 1857 %b = load volatile float, float addrspace(1)* %b.gep 1858 %c = load volatile float, float addrspace(1)* %c.gep 1859 %mul = fmul float %a, %b 1860 %fneg = fsub float -0.0, %mul 1861 call void asm sideeffect "; use $0", "v"(float %fneg) #0 1862 store volatile float %fneg, float addrspace(1)* %out.gep 1863 ret void 1864 } 1865 1866 ; -------------------------------------------------------------------------------- 1867 ; inlineasm tests 1868 ; -------------------------------------------------------------------------------- 1869 1870 ; Can't fold into use, so should fold into source 1871 ; GCN-LABEL: {{^}}v_fneg_inlineasm_multi_use_src_f32: 1872 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1873 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1874 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], [[A]], [[B]] 1875 ; GCN: v_xor_b32_e32 [[NEG:v[0-9]+]], 0x80000000, [[MUL]] 1876 ; GCN: ; use [[NEG]] 1877 ; GCN: buffer_store_dword [[MUL]] 1878 define amdgpu_kernel void @v_fneg_inlineasm_multi_use_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, i32 %d) #0 { 1879 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1880 %tid.ext = sext i32 %tid to i64 1881 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1882 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1883 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1884 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1885 %a = load volatile float, float addrspace(1)* %a.gep 1886 %b = load volatile float, float addrspace(1)* %b.gep 1887 %c = load volatile float, float addrspace(1)* %c.gep 1888 %mul = fmul float %a, %b 1889 %fneg = fsub float -0.0, %mul 1890 call void asm sideeffect "; use $0", "v"(float %fneg) #0 1891 store volatile float %mul, float addrspace(1)* %out.gep 1892 ret void 1893 } 1894 1895 ; -------------------------------------------------------------------------------- 1896 ; code size regression tests 1897 ; -------------------------------------------------------------------------------- 1898 1899 ; There are multiple users of the fneg that must use a VOP3 1900 ; instruction, so there is no penalty 1901 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop3_users_f32: 1902 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1903 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1904 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1905 1906 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], [[C]] 1907 ; GCN-NEXT: v_fma_f32 [[FMA1:v[0-9]+]], -[[A]], [[C]], 2.0 1908 ; GCN-NEXT: buffer_store_dword [[FMA0]] 1909 ; GCN-NEXT: buffer_store_dword [[FMA1]] 1910 define amdgpu_kernel void @multiuse_fneg_2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1911 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1912 %tid.ext = sext i32 %tid to i64 1913 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1914 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1915 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1916 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1917 %a = load volatile float, float addrspace(1)* %a.gep 1918 %b = load volatile float, float addrspace(1)* %b.gep 1919 %c = load volatile float, float addrspace(1)* %c.gep 1920 1921 %fneg.a = fsub float -0.0, %a 1922 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float %c) 1923 %fma1 = call float @llvm.fma.f32(float %fneg.a, float %c, float 2.0) 1924 1925 store volatile float %fma0, float addrspace(1)* %out 1926 store volatile float %fma1, float addrspace(1)* %out 1927 ret void 1928 } 1929 1930 ; There are multiple users, but both require using a larger encoding 1931 ; for the modifier. 1932 1933 ; GCN-LABEL: {{^}}multiuse_fneg_2_vop2_users_f32: 1934 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1935 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1936 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1937 1938 ; GCN: v_mul_f32_e64 [[MUL0:v[0-9]+]], -[[A]], [[B]] 1939 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 1940 ; GCN-NEXT: buffer_store_dword [[MUL0]] 1941 ; GCN-NEXT: buffer_store_dword [[MUL1]] 1942 define amdgpu_kernel void @multiuse_fneg_2_vop2_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1943 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1944 %tid.ext = sext i32 %tid to i64 1945 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1946 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1947 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1948 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1949 %a = load volatile float, float addrspace(1)* %a.gep 1950 %b = load volatile float, float addrspace(1)* %b.gep 1951 %c = load volatile float, float addrspace(1)* %c.gep 1952 1953 %fneg.a = fsub float -0.0, %a 1954 %mul0 = fmul float %fneg.a, %b 1955 %mul1 = fmul float %fneg.a, %c 1956 1957 store volatile float %mul0, float addrspace(1)* %out 1958 store volatile float %mul1, float addrspace(1)* %out 1959 ret void 1960 } 1961 1962 ; One user is VOP3 so has no cost to folding the modifier, the other does. 1963 ; GCN-LABEL: {{^}}multiuse_fneg_vop2_vop3_users_f32: 1964 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1965 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1966 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 1967 1968 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[A]], [[B]], 2.0 1969 ; GCN: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[A]], [[C]] 1970 1971 ; GCN: buffer_store_dword [[FMA0]] 1972 ; GCN-NEXT: buffer_store_dword [[MUL1]] 1973 define amdgpu_kernel void @multiuse_fneg_vop2_vop3_users_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr) #0 { 1974 %tid = call i32 @llvm.amdgcn.workitem.id.x() 1975 %tid.ext = sext i32 %tid to i64 1976 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 1977 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 1978 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 1979 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 1980 %a = load volatile float, float addrspace(1)* %a.gep 1981 %b = load volatile float, float addrspace(1)* %b.gep 1982 %c = load volatile float, float addrspace(1)* %c.gep 1983 1984 %fneg.a = fsub float -0.0, %a 1985 %fma0 = call float @llvm.fma.f32(float %fneg.a, float %b, float 2.0) 1986 %mul1 = fmul float %fneg.a, %c 1987 1988 store volatile float %fma0, float addrspace(1)* %out 1989 store volatile float %mul1, float addrspace(1)* %out 1990 ret void 1991 } 1992 1993 ; The use of the fneg requires a code size increase, but folding into 1994 ; the source does not 1995 1996 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f32: 1997 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 1998 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 1999 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2000 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2001 2002 ; GCN-SAFE: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], [[B]], 2.0 2003 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL1:v[0-9]+]], -[[FMA0]], [[C]] 2004 ; GCN-SAFE-DAG: v_mul_f32_e64 [[MUL2:v[0-9]+]], -[[FMA0]], [[D]] 2005 2006 ; GCN-NSZ: v_fma_f32 [[FMA0:v[0-9]+]], [[A]], -[[B]], -2.0 2007 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[FMA0]], [[C]] 2008 ; GCN-NSZ-DAG: v_mul_f32_e32 [[MUL2:v[0-9]+]], [[FMA0]], [[D]] 2009 2010 ; GCN: buffer_store_dword [[MUL1]] 2011 ; GCN-NEXT: buffer_store_dword [[MUL2]] 2012 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2013 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2014 %tid.ext = sext i32 %tid to i64 2015 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2016 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2017 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2018 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2019 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2020 %a = load volatile float, float addrspace(1)* %a.gep 2021 %b = load volatile float, float addrspace(1)* %b.gep 2022 %c = load volatile float, float addrspace(1)* %c.gep 2023 %d = load volatile float, float addrspace(1)* %d.gep 2024 2025 %fma0 = call float @llvm.fma.f32(float %a, float %b, float 2.0) 2026 %fneg.fma0 = fsub float -0.0, %fma0 2027 %mul1 = fmul float %fneg.fma0, %c 2028 %mul2 = fmul float %fneg.fma0, %d 2029 2030 store volatile float %mul1, float addrspace(1)* %out 2031 store volatile float %mul2, float addrspace(1)* %out 2032 ret void 2033 } 2034 2035 ; GCN-LABEL: {{^}}free_fold_src_code_size_cost_use_f64: 2036 ; GCN: {{buffer|flat}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]] 2037 ; GCN: {{buffer|flat}}_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]] 2038 ; GCN: {{buffer|flat}}_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]] 2039 ; GCN: {{buffer|flat}}_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]] 2040 2041 ; GCN: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], 2.0 2042 ; GCN-DAG: v_mul_f64 [[MUL0:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[C]] 2043 ; GCN-DAG: v_mul_f64 [[MUL1:v\[[0-9]+:[0-9]+\]]], -[[FMA0]], [[D]] 2044 2045 ; GCN: buffer_store_dwordx2 [[MUL0]] 2046 ; GCN: buffer_store_dwordx2 [[MUL1]] 2047 define amdgpu_kernel void @free_fold_src_code_size_cost_use_f64(double addrspace(1)* %out, double addrspace(1)* %a.ptr, double addrspace(1)* %b.ptr, double addrspace(1)* %c.ptr, double addrspace(1)* %d.ptr) #0 { 2048 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2049 %tid.ext = sext i32 %tid to i64 2050 %a.gep = getelementptr inbounds double, double addrspace(1)* %a.ptr, i64 %tid.ext 2051 %b.gep = getelementptr inbounds double, double addrspace(1)* %b.ptr, i64 %tid.ext 2052 %c.gep = getelementptr inbounds double, double addrspace(1)* %c.ptr, i64 %tid.ext 2053 %d.gep = getelementptr inbounds double, double addrspace(1)* %d.ptr, i64 %tid.ext 2054 %out.gep = getelementptr inbounds double, double addrspace(1)* %out, i64 %tid.ext 2055 %a = load volatile double, double addrspace(1)* %a.gep 2056 %b = load volatile double, double addrspace(1)* %b.gep 2057 %c = load volatile double, double addrspace(1)* %c.gep 2058 %d = load volatile double, double addrspace(1)* %d.gep 2059 2060 %fma0 = call double @llvm.fma.f64(double %a, double %b, double 2.0) 2061 %fneg.fma0 = fsub double -0.0, %fma0 2062 %mul1 = fmul double %fneg.fma0, %c 2063 %mul2 = fmul double %fneg.fma0, %d 2064 2065 store volatile double %mul1, double addrspace(1)* %out 2066 store volatile double %mul2, double addrspace(1)* %out 2067 ret void 2068 } 2069 2070 ; %trunc.a has one fneg use, but it requires a code size increase and 2071 ; %the fneg can instead be folded for free into the fma. 2072 2073 ; GCN-LABEL: {{^}}one_use_cost_to_fold_into_src_f32: 2074 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2075 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2076 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2077 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2078 ; GCN: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2079 ; GCN: buffer_store_dword [[FMA0]] 2080 define amdgpu_kernel void @one_use_cost_to_fold_into_src_f32(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2081 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2082 %tid.ext = sext i32 %tid to i64 2083 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2084 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2085 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2086 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2087 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2088 %a = load volatile float, float addrspace(1)* %a.gep 2089 %b = load volatile float, float addrspace(1)* %b.gep 2090 %c = load volatile float, float addrspace(1)* %c.gep 2091 %d = load volatile float, float addrspace(1)* %d.gep 2092 2093 %trunc.a = call float @llvm.trunc.f32(float %a) 2094 %trunc.fneg.a = fsub float -0.0, %trunc.a 2095 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2096 store volatile float %fma0, float addrspace(1)* %out 2097 ret void 2098 } 2099 2100 ; GCN-LABEL: {{^}}multi_use_cost_to_fold_into_src: 2101 ; GCN: {{buffer|flat}}_load_dword [[A:v[0-9]+]] 2102 ; GCN: {{buffer|flat}}_load_dword [[B:v[0-9]+]] 2103 ; GCN: {{buffer|flat}}_load_dword [[C:v[0-9]+]] 2104 ; GCN: {{buffer|flat}}_load_dword [[D:v[0-9]+]] 2105 ; GCN: v_trunc_f32_e32 [[TRUNC_A:v[0-9]+]], [[A]] 2106 ; GCN-DAG: v_fma_f32 [[FMA0:v[0-9]+]], -[[TRUNC_A]], [[B]], [[C]] 2107 ; GCN-DAG: v_mul_f32_e32 [[MUL1:v[0-9]+]], [[TRUNC_A]], [[D]] 2108 ; GCN: buffer_store_dword [[FMA0]] 2109 ; GCN: buffer_store_dword [[MUL1]] 2110 define amdgpu_kernel void @multi_use_cost_to_fold_into_src(float addrspace(1)* %out, float addrspace(1)* %a.ptr, float addrspace(1)* %b.ptr, float addrspace(1)* %c.ptr, float addrspace(1)* %d.ptr) #0 { 2111 %tid = call i32 @llvm.amdgcn.workitem.id.x() 2112 %tid.ext = sext i32 %tid to i64 2113 %a.gep = getelementptr inbounds float, float addrspace(1)* %a.ptr, i64 %tid.ext 2114 %b.gep = getelementptr inbounds float, float addrspace(1)* %b.ptr, i64 %tid.ext 2115 %c.gep = getelementptr inbounds float, float addrspace(1)* %c.ptr, i64 %tid.ext 2116 %d.gep = getelementptr inbounds float, float addrspace(1)* %d.ptr, i64 %tid.ext 2117 %out.gep = getelementptr inbounds float, float addrspace(1)* %out, i64 %tid.ext 2118 %a = load volatile float, float addrspace(1)* %a.gep 2119 %b = load volatile float, float addrspace(1)* %b.gep 2120 %c = load volatile float, float addrspace(1)* %c.gep 2121 %d = load volatile float, float addrspace(1)* %d.gep 2122 2123 %trunc.a = call float @llvm.trunc.f32(float %a) 2124 %trunc.fneg.a = fsub float -0.0, %trunc.a 2125 %fma0 = call float @llvm.fma.f32(float %trunc.fneg.a, float %b, float %c) 2126 %mul1 = fmul float %trunc.a, %d 2127 store volatile float %fma0, float addrspace(1)* %out 2128 store volatile float %mul1, float addrspace(1)* %out 2129 ret void 2130 } 2131 2132 declare i32 @llvm.amdgcn.workitem.id.x() #1 2133 declare float @llvm.fma.f32(float, float, float) #1 2134 declare float @llvm.fmuladd.f32(float, float, float) #1 2135 declare float @llvm.sin.f32(float) #1 2136 declare float @llvm.trunc.f32(float) #1 2137 declare float @llvm.round.f32(float) #1 2138 declare float @llvm.rint.f32(float) #1 2139 declare float @llvm.nearbyint.f32(float) #1 2140 declare float @llvm.canonicalize.f32(float) #1 2141 declare float @llvm.minnum.f32(float, float) #1 2142 declare float @llvm.maxnum.f32(float, float) #1 2143 2144 declare double @llvm.fma.f64(double, double, double) #1 2145 2146 declare float @llvm.amdgcn.sin.f32(float) #1 2147 declare float @llvm.amdgcn.rcp.f32(float) #1 2148 declare float @llvm.amdgcn.rcp.legacy(float) #1 2149 declare float @llvm.amdgcn.fmul.legacy(float, float) #1 2150 declare float @llvm.amdgcn.interp.p1(float, i32, i32, i32) #0 2151 declare float @llvm.amdgcn.interp.p2(float, float, i32, i32, i32) #0 2152 2153 attributes #0 = { nounwind } 2154 attributes #1 = { nounwind readnone } 2155