1 ; RUN: llc < %s -mcpu=core-avx2 | FileCheck %s 2 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" 3 target triple = "x86_64-apple-macosx10.10.0" 4 5 ; CHECK-LABEL: fmaddsubpd_loop 6 ; CHECK: [[BODYLBL:LBB.+]]: 7 ; CHECK: vfmaddsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 8 ; CHECK: [[INCLBL:LBB.+]]: 9 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 10 ; CHECK: cmpl {{%.+}}, [[INDREG]] 11 ; CHECK: jl [[BODYLBL]] 12 define <4 x double> @fmaddsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 13 entry: 14 br label %for.cond 15 16 for.cond: 17 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 18 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 19 %cmp = icmp slt i32 %i.0, %iter 20 br i1 %cmp, label %for.body, label %for.end 21 22 for.body: 23 br label %for.inc 24 25 for.inc: 26 %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 27 %inc = add nsw i32 %i.0, 1 28 br label %for.cond 29 30 for.end: 31 ret <4 x double> %c.addr.0 32 } 33 34 ; CHECK-LABEL: fmsubaddpd_loop 35 ; CHECK: [[BODYLBL:LBB.+]]: 36 ; CHECK: vfmsubadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 37 ; CHECK: [[INCLBL:LBB.+]]: 38 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 39 ; CHECK: cmpl {{%.+}}, [[INDREG]] 40 ; CHECK: jl [[BODYLBL]] 41 define <4 x double> @fmsubaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 42 entry: 43 br label %for.cond 44 45 for.cond: 46 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 47 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 48 %cmp = icmp slt i32 %i.0, %iter 49 br i1 %cmp, label %for.body, label %for.end 50 51 for.body: 52 br label %for.inc 53 54 for.inc: 55 %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 56 %inc = add nsw i32 %i.0, 1 57 br label %for.cond 58 59 for.end: 60 ret <4 x double> %c.addr.0 61 } 62 63 ; CHECK-LABEL: fmaddpd_loop 64 ; CHECK: [[BODYLBL:LBB.+]]: 65 ; CHECK: vfmadd231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 66 ; CHECK: [[INCLBL:LBB.+]]: 67 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 68 ; CHECK: cmpl {{%.+}}, [[INDREG]] 69 ; CHECK: jl [[BODYLBL]] 70 define <4 x double> @fmaddpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 71 entry: 72 br label %for.cond 73 74 for.cond: 75 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 76 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 77 %cmp = icmp slt i32 %i.0, %iter 78 br i1 %cmp, label %for.body, label %for.end 79 80 for.body: 81 br label %for.inc 82 83 for.inc: 84 %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 85 %inc = add nsw i32 %i.0, 1 86 br label %for.cond 87 88 for.end: 89 ret <4 x double> %c.addr.0 90 } 91 92 ; CHECK-LABEL: fmsubpd_loop 93 ; CHECK: [[BODYLBL:LBB.+]]: 94 ; CHECK: vfmsub231pd %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 95 ; CHECK: [[INCLBL:LBB.+]]: 96 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 97 ; CHECK: cmpl {{%.+}}, [[INDREG]] 98 ; CHECK: jl [[BODYLBL]] 99 define <4 x double> @fmsubpd_loop(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 100 entry: 101 br label %for.cond 102 103 for.cond: 104 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 105 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 106 %cmp = icmp slt i32 %i.0, %iter 107 br i1 %cmp, label %for.body, label %for.end 108 109 for.body: 110 br label %for.inc 111 112 for.inc: 113 %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 114 %inc = add nsw i32 %i.0, 1 115 br label %for.cond 116 117 for.end: 118 ret <4 x double> %c.addr.0 119 } 120 121 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 122 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 123 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 124 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 125 126 127 ; CHECK-LABEL: fmaddsubps_loop 128 ; CHECK: [[BODYLBL:LBB.+]]: 129 ; CHECK: vfmaddsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 130 ; CHECK: [[INCLBL:LBB.+]]: 131 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 132 ; CHECK: cmpl {{%.+}}, [[INDREG]] 133 ; CHECK: jl [[BODYLBL]] 134 define <8 x float> @fmaddsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 135 entry: 136 br label %for.cond 137 138 for.cond: 139 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 140 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 141 %cmp = icmp slt i32 %i.0, %iter 142 br i1 %cmp, label %for.body, label %for.end 143 144 for.body: 145 br label %for.inc 146 147 for.inc: 148 %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 149 %inc = add nsw i32 %i.0, 1 150 br label %for.cond 151 152 for.end: 153 ret <8 x float> %c.addr.0 154 } 155 156 ; CHECK-LABEL: fmsubaddps_loop 157 ; CHECK: [[BODYLBL:LBB.+]]: 158 ; CHECK: vfmsubadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 159 ; CHECK: [[INCLBL:LBB.+]]: 160 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 161 ; CHECK: cmpl {{%.+}}, [[INDREG]] 162 ; CHECK: jl [[BODYLBL]] 163 define <8 x float> @fmsubaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 164 entry: 165 br label %for.cond 166 167 for.cond: 168 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 169 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 170 %cmp = icmp slt i32 %i.0, %iter 171 br i1 %cmp, label %for.body, label %for.end 172 173 for.body: 174 br label %for.inc 175 176 for.inc: 177 %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 178 %inc = add nsw i32 %i.0, 1 179 br label %for.cond 180 181 for.end: 182 ret <8 x float> %c.addr.0 183 } 184 185 ; CHECK-LABEL: fmaddps_loop 186 ; CHECK: [[BODYLBL:LBB.+]]: 187 ; CHECK: vfmadd231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 188 ; CHECK: [[INCLBL:LBB.+]]: 189 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 190 ; CHECK: cmpl {{%.+}}, [[INDREG]] 191 ; CHECK: jl [[BODYLBL]] 192 define <8 x float> @fmaddps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 193 entry: 194 br label %for.cond 195 196 for.cond: 197 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 198 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 199 %cmp = icmp slt i32 %i.0, %iter 200 br i1 %cmp, label %for.body, label %for.end 201 202 for.body: 203 br label %for.inc 204 205 for.inc: 206 %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 207 %inc = add nsw i32 %i.0, 1 208 br label %for.cond 209 210 for.end: 211 ret <8 x float> %c.addr.0 212 } 213 214 ; CHECK-LABEL: fmsubps_loop 215 ; CHECK: [[BODYLBL:LBB.+]]: 216 ; CHECK: vfmsub231ps %ymm{{[0-9]+}}, %ymm{{[0-9]+}}, %ymm{{[0-9]+}} 217 ; CHECK: [[INCLBL:LBB.+]]: 218 ; CHECK: addl $1, [[INDREG:%[a-z0-9]+]] 219 ; CHECK: cmpl {{%.+}}, [[INDREG]] 220 ; CHECK: jl [[BODYLBL]] 221 define <8 x float> @fmsubps_loop(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 222 entry: 223 br label %for.cond 224 225 for.cond: 226 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 227 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 228 %cmp = icmp slt i32 %i.0, %iter 229 br i1 %cmp, label %for.body, label %for.end 230 231 for.body: 232 br label %for.inc 233 234 for.inc: 235 %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 236 %inc = add nsw i32 %i.0, 1 237 br label %for.cond 238 239 for.end: 240 ret <8 x float> %c.addr.0 241 } 242 243 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 244 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 245 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 246 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 247