1 ; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-FASTFMAF -check-prefix=SI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs -fp-contract=fast < %s | FileCheck -check-prefix=SI-SLOWFMAF -check-prefix=SI -check-prefix=FUNC %s 3 4 declare i32 @llvm.r600.read.tidig.x() #0 5 declare double @llvm.fabs.f64(double) #0 6 declare double @llvm.fma.f64(double, double, double) #0 7 declare float @llvm.fma.f32(float, float, float) #0 8 9 ; (fadd (fmul x, y), z) -> (fma x, y, z) 10 ; FUNC-LABEL: {{^}}combine_to_fma_f64_0: 11 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 12 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 13 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 14 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 15 ; SI: buffer_store_dwordx2 [[RESULT]] 16 define void @combine_to_fma_f64_0(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 17 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 18 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 19 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 20 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 21 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 22 23 %a = load double, double addrspace(1)* %gep.0 24 %b = load double, double addrspace(1)* %gep.1 25 %c = load double, double addrspace(1)* %gep.2 26 27 %mul = fmul double %a, %b 28 %fma = fadd double %mul, %c 29 store double %fma, double addrspace(1)* %gep.out 30 ret void 31 } 32 33 ; (fadd (fmul x, y), z) -> (fma x, y, z) 34 ; FUNC-LABEL: {{^}}combine_to_fma_f64_0_2use: 35 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 36 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 37 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 38 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 39 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 40 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[D]] 41 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 42 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 43 ; SI: s_endpgm 44 define void @combine_to_fma_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 45 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 46 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 47 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 48 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 49 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 50 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 51 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 52 53 %a = load double, double addrspace(1)* %gep.0 54 %b = load double, double addrspace(1)* %gep.1 55 %c = load double, double addrspace(1)* %gep.2 56 %d = load double, double addrspace(1)* %gep.3 57 58 %mul = fmul double %a, %b 59 %fma0 = fadd double %mul, %c 60 %fma1 = fadd double %mul, %d 61 store double %fma0, double addrspace(1)* %gep.out.0 62 store double %fma1, double addrspace(1)* %gep.out.1 63 ret void 64 } 65 66 ; (fadd x, (fmul y, z)) -> (fma y, z, x) 67 ; FUNC-LABEL: {{^}}combine_to_fma_f64_1: 68 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 69 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 70 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 71 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], [[C]] 72 ; SI: buffer_store_dwordx2 [[RESULT]] 73 define void @combine_to_fma_f64_1(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 74 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 75 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 76 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 77 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 78 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 79 80 %a = load double, double addrspace(1)* %gep.0 81 %b = load double, double addrspace(1)* %gep.1 82 %c = load double, double addrspace(1)* %gep.2 83 84 %mul = fmul double %a, %b 85 %fma = fadd double %c, %mul 86 store double %fma, double addrspace(1)* %gep.out 87 ret void 88 } 89 90 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 91 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_0_f64: 92 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 93 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 94 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 95 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] 96 ; SI: buffer_store_dwordx2 [[RESULT]] 97 define void @combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 98 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 99 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 100 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 101 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 102 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 103 104 %a = load double, double addrspace(1)* %gep.0 105 %b = load double, double addrspace(1)* %gep.1 106 %c = load double, double addrspace(1)* %gep.2 107 108 %mul = fmul double %a, %b 109 %fma = fsub double %mul, %c 110 store double %fma, double addrspace(1)* %gep.out 111 ret void 112 } 113 114 ; (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) 115 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_f64_0_2use: 116 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 117 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 118 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 119 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 120 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[C]] 121 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] 122 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 123 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 124 ; SI: s_endpgm 125 define void @combine_to_fma_fsub_f64_0_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 126 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 127 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 128 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 129 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 130 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 131 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 132 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 133 134 %a = load double, double addrspace(1)* %gep.0 135 %b = load double, double addrspace(1)* %gep.1 136 %c = load double, double addrspace(1)* %gep.2 137 %d = load double, double addrspace(1)* %gep.3 138 139 %mul = fmul double %a, %b 140 %fma0 = fsub double %mul, %c 141 %fma1 = fsub double %mul, %d 142 store double %fma0, double addrspace(1)* %gep.out.0 143 store double %fma1, double addrspace(1)* %gep.out.1 144 ret void 145 } 146 147 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 148 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64: 149 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 150 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 151 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 152 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] 153 ; SI: buffer_store_dwordx2 [[RESULT]] 154 define void @combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 155 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 156 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 157 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 158 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 159 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 160 161 %a = load double, double addrspace(1)* %gep.0 162 %b = load double, double addrspace(1)* %gep.1 163 %c = load double, double addrspace(1)* %gep.2 164 165 %mul = fmul double %a, %b 166 %fma = fsub double %c, %mul 167 store double %fma, double addrspace(1)* %gep.out 168 ret void 169 } 170 171 ; (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) 172 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_1_f64_2use: 173 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 174 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 175 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 176 ; SI-DAG: buffer_load_dwordx2 [[D:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 177 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[C]] 178 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], [[D]] 179 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 180 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 181 ; SI: s_endpgm 182 define void @combine_to_fma_fsub_1_f64_2use(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 183 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 184 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 185 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 186 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 187 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 188 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 189 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 190 191 %a = load double, double addrspace(1)* %gep.0 192 %b = load double, double addrspace(1)* %gep.1 193 %c = load double, double addrspace(1)* %gep.2 194 %d = load double, double addrspace(1)* %gep.3 195 196 %mul = fmul double %a, %b 197 %fma0 = fsub double %c, %mul 198 %fma1 = fsub double %d, %mul 199 store double %fma0, double addrspace(1)* %gep.out.0 200 store double %fma1, double addrspace(1)* %gep.out.1 201 ret void 202 } 203 204 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 205 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64: 206 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 207 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 208 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 209 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 210 ; SI: buffer_store_dwordx2 [[RESULT]] 211 define void @combine_to_fma_fsub_2_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 212 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 213 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 214 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 215 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 216 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 217 218 %a = load double, double addrspace(1)* %gep.0 219 %b = load double, double addrspace(1)* %gep.1 220 %c = load double, double addrspace(1)* %gep.2 221 222 %mul = fmul double %a, %b 223 %mul.neg = fsub double -0.0, %mul 224 %fma = fsub double %mul.neg, %c 225 226 store double %fma, double addrspace(1)* %gep.out 227 ret void 228 } 229 230 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 231 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_neg: 232 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 233 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 234 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 235 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 236 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[D]] 237 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 238 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 239 ; SI: s_endpgm 240 define void @combine_to_fma_fsub_2_f64_2uses_neg(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 241 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 242 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 243 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 244 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 245 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 246 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 247 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 248 249 %a = load double, double addrspace(1)* %gep.0 250 %b = load double, double addrspace(1)* %gep.1 251 %c = load double, double addrspace(1)* %gep.2 252 %d = load double, double addrspace(1)* %gep.3 253 254 %mul = fmul double %a, %b 255 %mul.neg = fsub double -0.0, %mul 256 %fma0 = fsub double %mul.neg, %c 257 %fma1 = fsub double %mul.neg, %d 258 259 store double %fma0, double addrspace(1)* %gep.out.0 260 store double %fma1, double addrspace(1)* %gep.out.1 261 ret void 262 } 263 264 ; (fsub (fneg (fmul x, y)), z) -> (fma (fneg x), y, (fneg z)) 265 ; FUNC-LABEL: {{^}}combine_to_fma_fsub_2_f64_2uses_mul: 266 ; SI-DAG: buffer_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 267 ; SI-DAG: buffer_load_dwordx2 [[B:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 268 ; SI-DAG: buffer_load_dwordx2 [[C:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 269 ; SI-DAG: v_fma_f64 [[RESULT0:v\[[0-9]+:[0-9]+\]]], -[[A]], [[B]], -[[C]] 270 ; SI-DAG: v_fma_f64 [[RESULT1:v\[[0-9]+:[0-9]+\]]], [[A]], [[B]], -[[D]] 271 ; SI-DAG: buffer_store_dwordx2 [[RESULT0]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 272 ; SI-DAG: buffer_store_dwordx2 [[RESULT1]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 273 ; SI: s_endpgm 274 define void @combine_to_fma_fsub_2_f64_2uses_mul(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 275 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 276 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 277 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 278 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 279 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 280 %gep.out.0 = getelementptr double, double addrspace(1)* %out, i32 %tid 281 %gep.out.1 = getelementptr double, double addrspace(1)* %gep.out.0, i32 1 282 283 %a = load double, double addrspace(1)* %gep.0 284 %b = load double, double addrspace(1)* %gep.1 285 %c = load double, double addrspace(1)* %gep.2 286 %d = load double, double addrspace(1)* %gep.3 287 288 %mul = fmul double %a, %b 289 %mul.neg = fsub double -0.0, %mul 290 %fma0 = fsub double %mul.neg, %c 291 %fma1 = fsub double %mul, %d 292 293 store double %fma0, double addrspace(1)* %gep.out.0 294 store double %fma1, double addrspace(1)* %gep.out.1 295 ret void 296 } 297 298 ; fold (fsub (fma x, y, (fmul u, v)), z) -> (fma x, y (fma u, v, (fneg z))) 299 300 ; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_0_f64: 301 ; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 302 ; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 303 ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 304 ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 305 ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} 306 ; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], [[U]], [[V]], -[[Z]] 307 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[X]], [[Y]], [[FMA0]] 308 ; SI: buffer_store_dwordx2 [[RESULT]] 309 define void @aggressive_combine_to_fma_fsub_0_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 310 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 311 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 312 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 313 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 314 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 315 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 316 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 317 318 %x = load double, double addrspace(1)* %gep.0 319 %y = load double, double addrspace(1)* %gep.1 320 %z = load double, double addrspace(1)* %gep.2 321 %u = load double, double addrspace(1)* %gep.3 322 %v = load double, double addrspace(1)* %gep.4 323 324 %tmp0 = fmul double %u, %v 325 %tmp1 = call double @llvm.fma.f64(double %x, double %y, double %tmp0) #0 326 %tmp2 = fsub double %tmp1, %z 327 328 store double %tmp2, double addrspace(1)* %gep.out 329 ret void 330 } 331 332 ; fold (fsub x, (fma y, z, (fmul u, v))) 333 ; -> (fma (fneg y), z, (fma (fneg u), v, x)) 334 335 ; FUNC-LABEL: {{^}}aggressive_combine_to_fma_fsub_1_f64: 336 ; SI-DAG: buffer_load_dwordx2 [[X:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64{{$}} 337 ; SI-DAG: buffer_load_dwordx2 [[Y:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:8{{$}} 338 ; SI-DAG: buffer_load_dwordx2 [[Z:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:16{{$}} 339 ; SI-DAG: buffer_load_dwordx2 [[U:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:24{{$}} 340 ; SI-DAG: buffer_load_dwordx2 [[V:v\[[0-9]+:[0-9]+\]]], v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 addr64 offset:32{{$}} 341 ; SI: v_fma_f64 [[FMA0:v\[[0-9]+:[0-9]+\]]], -[[U]], [[V]], [[X]] 342 ; SI: v_fma_f64 [[RESULT:v\[[0-9]+:[0-9]+\]]], -[[Y]], [[Z]], [[FMA0]] 343 ; SI: buffer_store_dwordx2 [[RESULT]] 344 define void @aggressive_combine_to_fma_fsub_1_f64(double addrspace(1)* noalias %out, double addrspace(1)* noalias %in) #1 { 345 %tid = tail call i32 @llvm.r600.read.tidig.x() #0 346 %gep.0 = getelementptr double, double addrspace(1)* %in, i32 %tid 347 %gep.1 = getelementptr double, double addrspace(1)* %gep.0, i32 1 348 %gep.2 = getelementptr double, double addrspace(1)* %gep.0, i32 2 349 %gep.3 = getelementptr double, double addrspace(1)* %gep.0, i32 3 350 %gep.4 = getelementptr double, double addrspace(1)* %gep.0, i32 4 351 %gep.out = getelementptr double, double addrspace(1)* %out, i32 %tid 352 353 %x = load double, double addrspace(1)* %gep.0 354 %y = load double, double addrspace(1)* %gep.1 355 %z = load double, double addrspace(1)* %gep.2 356 %u = load double, double addrspace(1)* %gep.3 357 %v = load double, double addrspace(1)* %gep.4 358 359 %tmp0 = fmul double %u, %v 360 %tmp1 = call double @llvm.fma.f64(double %y, double %z, double %tmp0) #0 361 %tmp2 = fsub double %x, %tmp1 362 363 store double %tmp2, double addrspace(1)* %gep.out 364 ret void 365 } 366 367 ; 368 ; Patterns (+ fneg variants): mul(add(1.0,x),y), mul(sub(1.0,x),y), mul(sub(x,1.0),y) 369 ; 370 371 ; FUNC-LABEL: {{^}}test_f32_mul_add_x_one_y: 372 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] 373 define void @test_f32_mul_add_x_one_y(float addrspace(1)* %out, 374 float addrspace(1)* %in1, 375 float addrspace(1)* %in2) { 376 %x = load float, float addrspace(1)* %in1 377 %y = load float, float addrspace(1)* %in2 378 %a = fadd float %x, 1.0 379 %m = fmul float %a, %y 380 store float %m, float addrspace(1)* %out 381 ret void 382 } 383 384 ; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_one: 385 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY:v[0-9]]], [[VX:v[0-9]]] 386 define void @test_f32_mul_y_add_x_one(float addrspace(1)* %out, 387 float addrspace(1)* %in1, 388 float addrspace(1)* %in2) { 389 %x = load float, float addrspace(1)* %in1 390 %y = load float, float addrspace(1)* %in2 391 %a = fadd float %x, 1.0 392 %m = fmul float %y, %a 393 store float %m, float addrspace(1)* %out 394 ret void 395 } 396 397 ; FUNC-LABEL: {{^}}test_f32_mul_add_x_negone_y: 398 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] 399 define void @test_f32_mul_add_x_negone_y(float addrspace(1)* %out, 400 float addrspace(1)* %in1, 401 float addrspace(1)* %in2) { 402 %x = load float, float addrspace(1)* %in1 403 %y = load float, float addrspace(1)* %in2 404 %a = fadd float %x, -1.0 405 %m = fmul float %a, %y 406 store float %m, float addrspace(1)* %out 407 ret void 408 } 409 410 ; FUNC-LABEL: {{^}}test_f32_mul_y_add_x_negone: 411 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] 412 define void @test_f32_mul_y_add_x_negone(float addrspace(1)* %out, 413 float addrspace(1)* %in1, 414 float addrspace(1)* %in2) { 415 %x = load float, float addrspace(1)* %in1 416 %y = load float, float addrspace(1)* %in2 417 %a = fadd float %x, -1.0 418 %m = fmul float %y, %a 419 store float %m, float addrspace(1)* %out 420 ret void 421 } 422 423 ; FUNC-LABEL: {{^}}test_f32_mul_sub_one_x_y: 424 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] 425 define void @test_f32_mul_sub_one_x_y(float addrspace(1)* %out, 426 float addrspace(1)* %in1, 427 float addrspace(1)* %in2) { 428 %x = load float, float addrspace(1)* %in1 429 %y = load float, float addrspace(1)* %in2 430 %s = fsub float 1.0, %x 431 %m = fmul float %s, %y 432 store float %m, float addrspace(1)* %out 433 ret void 434 } 435 436 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_one_x: 437 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], [[VY]] 438 define void @test_f32_mul_y_sub_one_x(float addrspace(1)* %out, 439 float addrspace(1)* %in1, 440 float addrspace(1)* %in2) { 441 %x = load float, float addrspace(1)* %in1 442 %y = load float, float addrspace(1)* %in2 443 %s = fsub float 1.0, %x 444 %m = fmul float %y, %s 445 store float %m, float addrspace(1)* %out 446 ret void 447 } 448 449 ; FUNC-LABEL: {{^}}test_f32_mul_sub_negone_x_y: 450 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] 451 define void @test_f32_mul_sub_negone_x_y(float addrspace(1)* %out, 452 float addrspace(1)* %in1, 453 float addrspace(1)* %in2) { 454 %x = load float, float addrspace(1)* %in1 455 %y = load float, float addrspace(1)* %in2 456 %s = fsub float -1.0, %x 457 %m = fmul float %s, %y 458 store float %m, float addrspace(1)* %out 459 ret void 460 } 461 462 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_negone_x: 463 ; SI: v_mad_f32 [[VX:v[0-9]]], -[[VX]], [[VY:v[0-9]]], -[[VY]] 464 define void @test_f32_mul_y_sub_negone_x(float addrspace(1)* %out, 465 float addrspace(1)* %in1, 466 float addrspace(1)* %in2) { 467 %x = load float, float addrspace(1)* %in1 468 %y = load float, float addrspace(1)* %in2 469 %s = fsub float -1.0, %x 470 %m = fmul float %y, %s 471 store float %m, float addrspace(1)* %out 472 ret void 473 } 474 475 ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_one_y: 476 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] 477 define void @test_f32_mul_sub_x_one_y(float addrspace(1)* %out, 478 float addrspace(1)* %in1, 479 float addrspace(1)* %in2) { 480 %x = load float, float addrspace(1)* %in1 481 %y = load float, float addrspace(1)* %in2 482 %s = fsub float %x, 1.0 483 %m = fmul float %s, %y 484 store float %m, float addrspace(1)* %out 485 ret void 486 } 487 488 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_one: 489 ; SI: v_mad_f32 [[VX:v[0-9]]], [[VX]], [[VY:v[0-9]]], -[[VY]] 490 define void @test_f32_mul_y_sub_x_one(float addrspace(1)* %out, 491 float addrspace(1)* %in1, 492 float addrspace(1)* %in2) { 493 %x = load float, float addrspace(1)* %in1 494 %y = load float, float addrspace(1)* %in2 495 %s = fsub float %x, 1.0 496 %m = fmul float %y, %s 497 store float %m, float addrspace(1)* %out 498 ret void 499 } 500 501 ; FUNC-LABEL: {{^}}test_f32_mul_sub_x_negone_y: 502 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] 503 define void @test_f32_mul_sub_x_negone_y(float addrspace(1)* %out, 504 float addrspace(1)* %in1, 505 float addrspace(1)* %in2) { 506 %x = load float, float addrspace(1)* %in1 507 %y = load float, float addrspace(1)* %in2 508 %s = fsub float %x, -1.0 509 %m = fmul float %s, %y 510 store float %m, float addrspace(1)* %out 511 ret void 512 } 513 514 ; FUNC-LABEL: {{^}}test_f32_mul_y_sub_x_negone: 515 ; SI: v_mac_f32_e32 [[VY:v[0-9]]], [[VY]], [[VX:v[0-9]]] 516 define void @test_f32_mul_y_sub_x_negone(float addrspace(1)* %out, 517 float addrspace(1)* %in1, 518 float addrspace(1)* %in2) { 519 %x = load float, float addrspace(1)* %in1 520 %y = load float, float addrspace(1)* %in2 521 %s = fsub float %x, -1.0 522 %m = fmul float %y, %s 523 store float %m, float addrspace(1)* %out 524 ret void 525 } 526 527 ; 528 ; Interpolation Patterns: add(mul(x,t),mul(sub(1.0,t),y)) 529 ; 530 531 ; FUNC-LABEL: {{^}}test_f32_interp: 532 ; SI: v_mad_f32 [[VR:v[0-9]]], -[[VT:v[0-9]]], [[VY:v[0-9]]], [[VY]] 533 ; SI: v_mac_f32_e32 [[VR]], [[VT]], [[VX:v[0-9]]] 534 define void @test_f32_interp(float addrspace(1)* %out, 535 float addrspace(1)* %in1, 536 float addrspace(1)* %in2, 537 float addrspace(1)* %in3) { 538 %x = load float, float addrspace(1)* %in1 539 %y = load float, float addrspace(1)* %in2 540 %t = load float, float addrspace(1)* %in3 541 %t1 = fsub float 1.0, %t 542 %tx = fmul float %x, %t 543 %ty = fmul float %y, %t1 544 %r = fadd float %tx, %ty 545 store float %r, float addrspace(1)* %out 546 ret void 547 } 548 549 ; FUNC-LABEL: {{^}}test_f64_interp: 550 ; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], -[[VT:v\[[0-9]+:[0-9]+\]]], [[VY:v\[[0-9]+:[0-9]+\]]], [[VY]] 551 ; SI: v_fma_f64 [[VR:v\[[0-9]+:[0-9]+\]]], [[VX:v\[[0-9]+:[0-9]+\]]], [[VT]], [[VR]] 552 define void @test_f64_interp(double addrspace(1)* %out, 553 double addrspace(1)* %in1, 554 double addrspace(1)* %in2, 555 double addrspace(1)* %in3) { 556 %x = load double, double addrspace(1)* %in1 557 %y = load double, double addrspace(1)* %in2 558 %t = load double, double addrspace(1)* %in3 559 %t1 = fsub double 1.0, %t 560 %tx = fmul double %x, %t 561 %ty = fmul double %y, %t1 562 %r = fadd double %tx, %ty 563 store double %r, double addrspace(1)* %out 564 ret void 565 } 566 567 attributes #0 = { nounwind readnone } 568 attributes #1 = { nounwind } 569