1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck %s 2 3 declare float @llvm.fmuladd.f32(float, float, float) 4 declare double @llvm.fmuladd.f64(double, double, double) 5 declare i32 @llvm.r600.read.tidig.x() nounwind readnone 6 declare float @llvm.fabs.f32(float) nounwind readnone 7 8 ; CHECK-LABEL: {{^}}fmuladd_f32: 9 ; CHECK: v_mac_f32_e32 {{v[0-9]+, v[0-9]+, v[0-9]+}} 10 11 define void @fmuladd_f32(float addrspace(1)* %out, float addrspace(1)* %in1, 12 float addrspace(1)* %in2, float addrspace(1)* %in3) { 13 %r0 = load float, float addrspace(1)* %in1 14 %r1 = load float, float addrspace(1)* %in2 15 %r2 = load float, float addrspace(1)* %in3 16 %r3 = tail call float @llvm.fmuladd.f32(float %r0, float %r1, float %r2) 17 store float %r3, float addrspace(1)* %out 18 ret void 19 } 20 21 ; CHECK-LABEL: {{^}}fmuladd_f64: 22 ; CHECK: v_fma_f64 {{v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\], v\[[0-9]+:[0-9]+\]}} 23 24 define void @fmuladd_f64(double addrspace(1)* %out, double addrspace(1)* %in1, 25 double addrspace(1)* %in2, double addrspace(1)* %in3) { 26 %r0 = load double, double addrspace(1)* %in1 27 %r1 = load double, double addrspace(1)* %in2 28 %r2 = load double, double addrspace(1)* %in3 29 %r3 = tail call double @llvm.fmuladd.f64(double %r0, double %r1, double %r2) 30 store double %r3, double addrspace(1)* %out 31 ret void 32 } 33 34 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_b_f32 35 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 36 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 37 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 38 ; CHECK: buffer_store_dword [[R2]] 39 define void @fmuladd_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 40 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 41 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 42 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 43 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 44 45 %r1 = load float, float addrspace(1)* %gep.0 46 %r2 = load float, float addrspace(1)* %gep.1 47 48 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2) 49 store float %r3, float addrspace(1)* %gep.out 50 ret void 51 } 52 53 ; CHECK-LABEL: {{^}}fmuladd_a_2.0_b_f32 54 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 55 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 56 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 57 ; CHECK: buffer_store_dword [[R2]] 58 define void @fmuladd_a_2.0_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 59 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 60 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 61 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 62 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 63 64 %r1 = load float, float addrspace(1)* %gep.0 65 %r2 = load float, float addrspace(1)* %gep.1 66 67 %r3 = tail call float @llvm.fmuladd.f32(float %r1, float 2.0, float %r2) 68 store float %r3, float addrspace(1)* %gep.out 69 ret void 70 } 71 72 ; CHECK-LABEL: {{^}}fadd_a_a_b_f32: 73 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 74 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 75 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 76 ; CHECK: buffer_store_dword [[R2]] 77 define void @fadd_a_a_b_f32(float addrspace(1)* %out, 78 float addrspace(1)* %in1, 79 float addrspace(1)* %in2) { 80 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 81 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 82 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 83 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 84 85 %r0 = load float, float addrspace(1)* %gep.0 86 %r1 = load float, float addrspace(1)* %gep.1 87 88 %add.0 = fadd float %r0, %r0 89 %add.1 = fadd float %add.0, %r1 90 store float %add.1, float addrspace(1)* %out 91 ret void 92 } 93 94 ; CHECK-LABEL: {{^}}fadd_b_a_a_f32: 95 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 96 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 97 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 98 ; CHECK: buffer_store_dword [[R2]] 99 define void @fadd_b_a_a_f32(float addrspace(1)* %out, 100 float addrspace(1)* %in1, 101 float addrspace(1)* %in2) { 102 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 103 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 104 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 105 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 106 107 %r0 = load float, float addrspace(1)* %gep.0 108 %r1 = load float, float addrspace(1)* %gep.1 109 110 %add.0 = fadd float %r0, %r0 111 %add.1 = fadd float %r1, %add.0 112 store float %add.1, float addrspace(1)* %out 113 ret void 114 } 115 116 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_a_b_f32 117 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 118 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 119 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] 120 ; CHECK: buffer_store_dword [[R2]] 121 define void @fmuladd_neg_2.0_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 122 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 123 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 124 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 125 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 126 127 %r1 = load float, float addrspace(1)* %gep.0 128 %r2 = load float, float addrspace(1)* %gep.1 129 130 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1, float %r2) 131 store float %r3, float addrspace(1)* %gep.out 132 ret void 133 } 134 135 136 ; CHECK-LABEL: {{^}}fmuladd_neg_2.0_neg_a_b_f32 137 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 138 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 139 ; CHECK: v_mac_f32_e32 [[R2]], 2.0, [[R1]] 140 ; CHECK: buffer_store_dword [[R2]] 141 define void @fmuladd_neg_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 142 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 143 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 144 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 145 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 146 147 %r1 = load float, float addrspace(1)* %gep.0 148 %r2 = load float, float addrspace(1)* %gep.1 149 150 %r1.fneg = fsub float -0.000000e+00, %r1 151 152 %r3 = tail call float @llvm.fmuladd.f32(float -2.0, float %r1.fneg, float %r2) 153 store float %r3, float addrspace(1)* %gep.out 154 ret void 155 } 156 157 158 ; CHECK-LABEL: {{^}}fmuladd_2.0_neg_a_b_f32 159 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 160 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 161 ; CHECK: v_mac_f32_e32 [[R2]], -2.0, [[R1]] 162 ; CHECK: buffer_store_dword [[R2]] 163 define void @fmuladd_2.0_neg_a_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 164 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 165 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 166 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 167 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 168 169 %r1 = load float, float addrspace(1)* %gep.0 170 %r2 = load float, float addrspace(1)* %gep.1 171 172 %r1.fneg = fsub float -0.000000e+00, %r1 173 174 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1.fneg, float %r2) 175 store float %r3, float addrspace(1)* %gep.out 176 ret void 177 } 178 179 180 ; CHECK-LABEL: {{^}}fmuladd_2.0_a_neg_b_f32 181 ; CHECK-DAG: buffer_load_dword [[R1:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 182 ; CHECK-DAG: buffer_load_dword [[R2:v[0-9]+]], {{v\[[0-9]+:[0-9]+\]}}, {{s\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:4 183 ; CHECK: v_mad_f32 [[RESULT:v[0-9]+]], 2.0, [[R1]], -[[R2]] 184 ; CHECK: buffer_store_dword [[RESULT]] 185 define void @fmuladd_2.0_a_neg_b_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 186 %tid = call i32 @llvm.r600.read.tidig.x() nounwind readnone 187 %gep.0 = getelementptr float, float addrspace(1)* %out, i32 %tid 188 %gep.1 = getelementptr float, float addrspace(1)* %gep.0, i32 1 189 %gep.out = getelementptr float, float addrspace(1)* %out, i32 %tid 190 191 %r1 = load float, float addrspace(1)* %gep.0 192 %r2 = load float, float addrspace(1)* %gep.1 193 194 %r2.fneg = fsub float -0.000000e+00, %r2 195 196 %r3 = tail call float @llvm.fmuladd.f32(float 2.0, float %r1, float %r2.fneg) 197 store float %r3, float addrspace(1)* %gep.out 198 ret void 199 } 200