1 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s 2 3 ; CHECK-LABEL: fmaddsubpd_loop_128: 4 ; CHECK: vfmaddsub231pd %xmm1, %xmm0, %xmm2 5 ; CHECK: vmovaps %xmm2, %xmm0 6 ; CHECK-NEXT: retq 7 define <2 x double> @fmaddsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 8 entry: 9 br label %for.cond 10 11 for.cond: 12 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 13 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 14 %cmp = icmp slt i32 %i.0, %iter 15 br i1 %cmp, label %for.body, label %for.end 16 17 for.body: 18 br label %for.inc 19 20 for.inc: 21 %0 = call <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 22 %inc = add nsw i32 %i.0, 1 23 br label %for.cond 24 25 for.end: 26 ret <2 x double> %c.addr.0 27 } 28 29 ; CHECK-LABEL: fmsubaddpd_loop_128: 30 ; CHECK: vfmsubadd231pd %xmm1, %xmm0, %xmm2 31 ; CHECK: vmovaps %xmm2, %xmm0 32 ; CHECK-NEXT: retq 33 define <2 x double> @fmsubaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 34 entry: 35 br label %for.cond 36 37 for.cond: 38 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 39 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 40 %cmp = icmp slt i32 %i.0, %iter 41 br i1 %cmp, label %for.body, label %for.end 42 43 for.body: 44 br label %for.inc 45 46 for.inc: 47 %0 = call <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 48 %inc = add nsw i32 %i.0, 1 49 br label %for.cond 50 51 for.end: 52 ret <2 x double> %c.addr.0 53 } 54 55 ; CHECK-LABEL: fmaddpd_loop_128: 56 ; CHECK: vfmadd231pd %xmm1, %xmm0, %xmm2 57 ; CHECK: vmovaps %xmm2, %xmm0 58 ; CHECK-NEXT: retq 59 define <2 x double> @fmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 60 entry: 61 br label %for.cond 62 63 for.cond: 64 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 65 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 66 %cmp = icmp slt i32 %i.0, %iter 67 br i1 %cmp, label %for.body, label %for.end 68 69 for.body: 70 br label %for.inc 71 72 for.inc: 73 %0 = call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 74 %inc = add nsw i32 %i.0, 1 75 br label %for.cond 76 77 for.end: 78 ret <2 x double> %c.addr.0 79 } 80 81 ; CHECK-LABEL: fmsubpd_loop_128: 82 ; CHECK: vfmsub231pd %xmm1, %xmm0, %xmm2 83 ; CHECK: vmovaps %xmm2, %xmm0 84 ; CHECK-NEXT: retq 85 define <2 x double> @fmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 86 entry: 87 br label %for.cond 88 89 for.cond: 90 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 91 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 92 %cmp = icmp slt i32 %i.0, %iter 93 br i1 %cmp, label %for.body, label %for.end 94 95 for.body: 96 br label %for.inc 97 98 for.inc: 99 %0 = call <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 100 %inc = add nsw i32 %i.0, 1 101 br label %for.cond 102 103 for.end: 104 ret <2 x double> %c.addr.0 105 } 106 107 ; CHECK-LABEL: fnmaddpd_loop_128: 108 ; CHECK: vfnmadd231pd %xmm1, %xmm0, %xmm2 109 ; CHECK: vmovaps %xmm2, %xmm0 110 ; CHECK-NEXT: retq 111 define <2 x double> @fnmaddpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 112 entry: 113 br label %for.cond 114 115 for.cond: 116 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 117 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 118 %cmp = icmp slt i32 %i.0, %iter 119 br i1 %cmp, label %for.body, label %for.end 120 121 for.body: 122 br label %for.inc 123 124 for.inc: 125 %0 = call <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 126 %inc = add nsw i32 %i.0, 1 127 br label %for.cond 128 129 for.end: 130 ret <2 x double> %c.addr.0 131 } 132 133 ; CHECK-LABEL: fnmsubpd_loop_128: 134 ; CHECK: vfnmsub231pd %xmm1, %xmm0, %xmm2 135 ; CHECK: vmovaps %xmm2, %xmm0 136 ; CHECK-NEXT: retq 137 define <2 x double> @fnmsubpd_loop_128(i32 %iter, <2 x double> %a, <2 x double> %b, <2 x double> %c) { 138 entry: 139 br label %for.cond 140 141 for.cond: 142 %c.addr.0 = phi <2 x double> [ %c, %entry ], [ %0, %for.inc ] 143 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 144 %cmp = icmp slt i32 %i.0, %iter 145 br i1 %cmp, label %for.body, label %for.end 146 147 for.body: 148 br label %for.inc 149 150 for.inc: 151 %0 = call <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c.addr.0) 152 %inc = add nsw i32 %i.0, 1 153 br label %for.cond 154 155 for.end: 156 ret <2 x double> %c.addr.0 157 } 158 159 declare <2 x double> @llvm.x86.fma.vfmaddsub.pd(<2 x double>, <2 x double>, <2 x double>) 160 declare <2 x double> @llvm.x86.fma.vfmsubadd.pd(<2 x double>, <2 x double>, <2 x double>) 161 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double>, <2 x double>, <2 x double>) 162 declare <2 x double> @llvm.x86.fma.vfmsub.pd(<2 x double>, <2 x double>, <2 x double>) 163 declare <2 x double> @llvm.x86.fma.vfnmadd.pd(<2 x double>, <2 x double>, <2 x double>) 164 declare <2 x double> @llvm.x86.fma.vfnmsub.pd(<2 x double>, <2 x double>, <2 x double>) 165 166 167 ; CHECK-LABEL: fmaddsubps_loop_128: 168 ; CHECK: vfmaddsub231ps %xmm1, %xmm0, %xmm2 169 ; CHECK: vmovaps %xmm2, %xmm0 170 ; CHECK-NEXT: retq 171 define <4 x float> @fmaddsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 172 entry: 173 br label %for.cond 174 175 for.cond: 176 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 177 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 178 %cmp = icmp slt i32 %i.0, %iter 179 br i1 %cmp, label %for.body, label %for.end 180 181 for.body: 182 br label %for.inc 183 184 for.inc: 185 %0 = call <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 186 %inc = add nsw i32 %i.0, 1 187 br label %for.cond 188 189 for.end: 190 ret <4 x float> %c.addr.0 191 } 192 193 ; CHECK-LABEL: fmsubaddps_loop_128: 194 ; CHECK: vfmsubadd231ps %xmm1, %xmm0, %xmm2 195 ; CHECK: vmovaps %xmm2, %xmm0 196 ; CHECK-NEXT: retq 197 define <4 x float> @fmsubaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 198 entry: 199 br label %for.cond 200 201 for.cond: 202 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 203 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 204 %cmp = icmp slt i32 %i.0, %iter 205 br i1 %cmp, label %for.body, label %for.end 206 207 for.body: 208 br label %for.inc 209 210 for.inc: 211 %0 = call <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 212 %inc = add nsw i32 %i.0, 1 213 br label %for.cond 214 215 for.end: 216 ret <4 x float> %c.addr.0 217 } 218 219 ; CHECK-LABEL: fmaddps_loop_128: 220 ; CHECK: vfmadd231ps %xmm1, %xmm0, %xmm2 221 ; CHECK: vmovaps %xmm2, %xmm0 222 ; CHECK-NEXT: retq 223 define <4 x float> @fmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 224 entry: 225 br label %for.cond 226 227 for.cond: 228 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 229 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 230 %cmp = icmp slt i32 %i.0, %iter 231 br i1 %cmp, label %for.body, label %for.end 232 233 for.body: 234 br label %for.inc 235 236 for.inc: 237 %0 = call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 238 %inc = add nsw i32 %i.0, 1 239 br label %for.cond 240 241 for.end: 242 ret <4 x float> %c.addr.0 243 } 244 245 ; CHECK-LABEL: fmsubps_loop_128: 246 ; CHECK: vfmsub231ps %xmm1, %xmm0, %xmm2 247 ; CHECK: vmovaps %xmm2, %xmm0 248 ; CHECK-NEXT: retq 249 define <4 x float> @fmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 250 entry: 251 br label %for.cond 252 253 for.cond: 254 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 255 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 256 %cmp = icmp slt i32 %i.0, %iter 257 br i1 %cmp, label %for.body, label %for.end 258 259 for.body: 260 br label %for.inc 261 262 for.inc: 263 %0 = call <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 264 %inc = add nsw i32 %i.0, 1 265 br label %for.cond 266 267 for.end: 268 ret <4 x float> %c.addr.0 269 } 270 271 ; CHECK-LABEL: fnmaddps_loop_128: 272 ; CHECK: vfnmadd231ps %xmm1, %xmm0, %xmm2 273 ; CHECK: vmovaps %xmm2, %xmm0 274 ; CHECK-NEXT: retq 275 define <4 x float> @fnmaddps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 276 entry: 277 br label %for.cond 278 279 for.cond: 280 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 281 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 282 %cmp = icmp slt i32 %i.0, %iter 283 br i1 %cmp, label %for.body, label %for.end 284 285 for.body: 286 br label %for.inc 287 288 for.inc: 289 %0 = call <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 290 %inc = add nsw i32 %i.0, 1 291 br label %for.cond 292 293 for.end: 294 ret <4 x float> %c.addr.0 295 } 296 297 ; CHECK-LABEL: fnmsubps_loop_128: 298 ; CHECK: vfnmsub231ps %xmm1, %xmm0, %xmm2 299 ; CHECK: vmovaps %xmm2, %xmm0 300 ; CHECK-NEXT: retq 301 define <4 x float> @fnmsubps_loop_128(i32 %iter, <4 x float> %a, <4 x float> %b, <4 x float> %c) { 302 entry: 303 br label %for.cond 304 305 for.cond: 306 %c.addr.0 = phi <4 x float> [ %c, %entry ], [ %0, %for.inc ] 307 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 308 %cmp = icmp slt i32 %i.0, %iter 309 br i1 %cmp, label %for.body, label %for.end 310 311 for.body: 312 br label %for.inc 313 314 for.inc: 315 %0 = call <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c.addr.0) 316 %inc = add nsw i32 %i.0, 1 317 br label %for.cond 318 319 for.end: 320 ret <4 x float> %c.addr.0 321 } 322 323 declare <4 x float> @llvm.x86.fma.vfmaddsub.ps(<4 x float>, <4 x float>, <4 x float>) 324 declare <4 x float> @llvm.x86.fma.vfmsubadd.ps(<4 x float>, <4 x float>, <4 x float>) 325 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float>, <4 x float>, <4 x float>) 326 declare <4 x float> @llvm.x86.fma.vfmsub.ps(<4 x float>, <4 x float>, <4 x float>) 327 declare <4 x float> @llvm.x86.fma.vfnmadd.ps(<4 x float>, <4 x float>, <4 x float>) 328 declare <4 x float> @llvm.x86.fma.vfnmsub.ps(<4 x float>, <4 x float>, <4 x float>) 329 330 ; CHECK-LABEL: fmaddsubpd_loop_256: 331 ; CHECK: vfmaddsub231pd %ymm1, %ymm0, %ymm2 332 ; CHECK: vmovaps %ymm2, %ymm0 333 ; CHECK-NEXT: retq 334 define <4 x double> @fmaddsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 335 entry: 336 br label %for.cond 337 338 for.cond: 339 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 340 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 341 %cmp = icmp slt i32 %i.0, %iter 342 br i1 %cmp, label %for.body, label %for.end 343 344 for.body: 345 br label %for.inc 346 347 for.inc: 348 %0 = call <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 349 %inc = add nsw i32 %i.0, 1 350 br label %for.cond 351 352 for.end: 353 ret <4 x double> %c.addr.0 354 } 355 356 ; CHECK-LABEL: fmsubaddpd_loop_256: 357 ; CHECK: vfmsubadd231pd %ymm1, %ymm0, %ymm2 358 ; CHECK: vmovaps %ymm2, %ymm0 359 ; CHECK-NEXT: retq 360 define <4 x double> @fmsubaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 361 entry: 362 br label %for.cond 363 364 for.cond: 365 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 366 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 367 %cmp = icmp slt i32 %i.0, %iter 368 br i1 %cmp, label %for.body, label %for.end 369 370 for.body: 371 br label %for.inc 372 373 for.inc: 374 %0 = call <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 375 %inc = add nsw i32 %i.0, 1 376 br label %for.cond 377 378 for.end: 379 ret <4 x double> %c.addr.0 380 } 381 382 ; CHECK-LABEL: fmaddpd_loop_256: 383 ; CHECK: vfmadd231pd %ymm1, %ymm0, %ymm2 384 ; CHECK: vmovaps %ymm2, %ymm0 385 ; CHECK-NEXT: retq 386 define <4 x double> @fmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 387 entry: 388 br label %for.cond 389 390 for.cond: 391 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 392 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 393 %cmp = icmp slt i32 %i.0, %iter 394 br i1 %cmp, label %for.body, label %for.end 395 396 for.body: 397 br label %for.inc 398 399 for.inc: 400 %0 = call <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 401 %inc = add nsw i32 %i.0, 1 402 br label %for.cond 403 404 for.end: 405 ret <4 x double> %c.addr.0 406 } 407 408 ; CHECK-LABEL: fmsubpd_loop_256: 409 ; CHECK: vfmsub231pd %ymm1, %ymm0, %ymm2 410 ; CHECK: vmovaps %ymm2, %ymm0 411 ; CHECK-NEXT: retq 412 define <4 x double> @fmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 413 entry: 414 br label %for.cond 415 416 for.cond: 417 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 418 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 419 %cmp = icmp slt i32 %i.0, %iter 420 br i1 %cmp, label %for.body, label %for.end 421 422 for.body: 423 br label %for.inc 424 425 for.inc: 426 %0 = call <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 427 %inc = add nsw i32 %i.0, 1 428 br label %for.cond 429 430 for.end: 431 ret <4 x double> %c.addr.0 432 } 433 434 ; CHECK-LABEL: fnmaddpd_loop_256: 435 ; CHECK: vfnmadd231pd %ymm1, %ymm0, %ymm2 436 ; CHECK: vmovaps %ymm2, %ymm0 437 ; CHECK-NEXT: retq 438 define <4 x double> @fnmaddpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 439 entry: 440 br label %for.cond 441 442 for.cond: 443 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 444 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 445 %cmp = icmp slt i32 %i.0, %iter 446 br i1 %cmp, label %for.body, label %for.end 447 448 for.body: 449 br label %for.inc 450 451 for.inc: 452 %0 = call <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 453 %inc = add nsw i32 %i.0, 1 454 br label %for.cond 455 456 for.end: 457 ret <4 x double> %c.addr.0 458 } 459 460 ; CHECK-LABEL: fnmsubpd_loop_256: 461 ; CHECK: vfnmsub231pd %ymm1, %ymm0, %ymm2 462 ; CHECK: vmovaps %ymm2, %ymm0 463 ; CHECK-NEXT: retq 464 define <4 x double> @fnmsubpd_loop_256(i32 %iter, <4 x double> %a, <4 x double> %b, <4 x double> %c) { 465 entry: 466 br label %for.cond 467 468 for.cond: 469 %c.addr.0 = phi <4 x double> [ %c, %entry ], [ %0, %for.inc ] 470 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 471 %cmp = icmp slt i32 %i.0, %iter 472 br i1 %cmp, label %for.body, label %for.end 473 474 for.body: 475 br label %for.inc 476 477 for.inc: 478 %0 = call <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double> %a, <4 x double> %b, <4 x double> %c.addr.0) 479 %inc = add nsw i32 %i.0, 1 480 br label %for.cond 481 482 for.end: 483 ret <4 x double> %c.addr.0 484 } 485 486 declare <4 x double> @llvm.x86.fma.vfmaddsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 487 declare <4 x double> @llvm.x86.fma.vfmsubadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 488 declare <4 x double> @llvm.x86.fma.vfmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 489 declare <4 x double> @llvm.x86.fma.vfmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 490 declare <4 x double> @llvm.x86.fma.vfnmadd.pd.256(<4 x double>, <4 x double>, <4 x double>) 491 declare <4 x double> @llvm.x86.fma.vfnmsub.pd.256(<4 x double>, <4 x double>, <4 x double>) 492 493 494 ; CHECK-LABEL: fmaddsubps_loop_256: 495 ; CHECK: vfmaddsub231ps %ymm1, %ymm0, %ymm2 496 ; CHECK: vmovaps %ymm2, %ymm0 497 ; CHECK-NEXT: retq 498 define <8 x float> @fmaddsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 499 entry: 500 br label %for.cond 501 502 for.cond: 503 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 504 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 505 %cmp = icmp slt i32 %i.0, %iter 506 br i1 %cmp, label %for.body, label %for.end 507 508 for.body: 509 br label %for.inc 510 511 for.inc: 512 %0 = call <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 513 %inc = add nsw i32 %i.0, 1 514 br label %for.cond 515 516 for.end: 517 ret <8 x float> %c.addr.0 518 } 519 520 ; CHECK-LABEL: fmsubaddps_loop_256: 521 ; CHECK: vfmsubadd231ps %ymm1, %ymm0, %ymm2 522 ; CHECK: vmovaps %ymm2, %ymm0 523 ; CHECK-NEXT: retq 524 define <8 x float> @fmsubaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 525 entry: 526 br label %for.cond 527 528 for.cond: 529 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 530 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 531 %cmp = icmp slt i32 %i.0, %iter 532 br i1 %cmp, label %for.body, label %for.end 533 534 for.body: 535 br label %for.inc 536 537 for.inc: 538 %0 = call <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 539 %inc = add nsw i32 %i.0, 1 540 br label %for.cond 541 542 for.end: 543 ret <8 x float> %c.addr.0 544 } 545 546 ; CHECK-LABEL: fmaddps_loop_256: 547 ; CHECK: vfmadd231ps %ymm1, %ymm0, %ymm2 548 ; CHECK: vmovaps %ymm2, %ymm0 549 ; CHECK-NEXT: retq 550 define <8 x float> @fmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 551 entry: 552 br label %for.cond 553 554 for.cond: 555 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 556 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 557 %cmp = icmp slt i32 %i.0, %iter 558 br i1 %cmp, label %for.body, label %for.end 559 560 for.body: 561 br label %for.inc 562 563 for.inc: 564 %0 = call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 565 %inc = add nsw i32 %i.0, 1 566 br label %for.cond 567 568 for.end: 569 ret <8 x float> %c.addr.0 570 } 571 572 ; CHECK-LABEL: fmsubps_loop_256: 573 ; CHECK: vfmsub231ps %ymm1, %ymm0, %ymm2 574 ; CHECK: vmovaps %ymm2, %ymm0 575 ; CHECK-NEXT: retq 576 define <8 x float> @fmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 577 entry: 578 br label %for.cond 579 580 for.cond: 581 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 582 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 583 %cmp = icmp slt i32 %i.0, %iter 584 br i1 %cmp, label %for.body, label %for.end 585 586 for.body: 587 br label %for.inc 588 589 for.inc: 590 %0 = call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 591 %inc = add nsw i32 %i.0, 1 592 br label %for.cond 593 594 for.end: 595 ret <8 x float> %c.addr.0 596 } 597 598 ; CHECK-LABEL: fnmaddps_loop_256: 599 ; CHECK: vfnmadd231ps %ymm1, %ymm0, %ymm2 600 ; CHECK: vmovaps %ymm2, %ymm0 601 ; CHECK-NEXT: retq 602 define <8 x float> @fnmaddps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 603 entry: 604 br label %for.cond 605 606 for.cond: 607 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 608 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 609 %cmp = icmp slt i32 %i.0, %iter 610 br i1 %cmp, label %for.body, label %for.end 611 612 for.body: 613 br label %for.inc 614 615 for.inc: 616 %0 = call <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 617 %inc = add nsw i32 %i.0, 1 618 br label %for.cond 619 620 for.end: 621 ret <8 x float> %c.addr.0 622 } 623 624 ; CHECK-LABEL: fnmsubps_loop_256: 625 ; CHECK: vfnmsub231ps %ymm1, %ymm0, %ymm2 626 ; CHECK: vmovaps %ymm2, %ymm0 627 ; CHECK-NEXT: retq 628 define <8 x float> @fnmsubps_loop_256(i32 %iter, <8 x float> %a, <8 x float> %b, <8 x float> %c) { 629 entry: 630 br label %for.cond 631 632 for.cond: 633 %c.addr.0 = phi <8 x float> [ %c, %entry ], [ %0, %for.inc ] 634 %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.inc ] 635 %cmp = icmp slt i32 %i.0, %iter 636 br i1 %cmp, label %for.body, label %for.end 637 638 for.body: 639 br label %for.inc 640 641 for.inc: 642 %0 = call <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c.addr.0) 643 %inc = add nsw i32 %i.0, 1 644 br label %for.cond 645 646 for.end: 647 ret <8 x float> %c.addr.0 648 } 649 650 declare <8 x float> @llvm.x86.fma.vfmaddsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 651 declare <8 x float> @llvm.x86.fma.vfmsubadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 652 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 653 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 654 declare <8 x float> @llvm.x86.fma.vfnmadd.ps.256(<8 x float>, <8 x float>, <8 x float>) 655 declare <8 x float> @llvm.x86.fma.vfnmsub.ps.256(<8 x float>, <8 x float>, <8 x float>) 656