1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX 3 4 ; Verify that the first two adds are independent regardless of how the inputs are 5 ; commuted. The destination registers are used as source registers for the third add. 6 7 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 8 ; SSE-LABEL: reassociate_adds1: 9 ; SSE: # BB#0: 10 ; SSE-NEXT: addss %xmm1, %xmm0 11 ; SSE-NEXT: addss %xmm3, %xmm2 12 ; SSE-NEXT: addss %xmm2, %xmm0 13 ; SSE-NEXT: retq 14 ; 15 ; AVX-LABEL: reassociate_adds1: 16 ; AVX: # BB#0: 17 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 18 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 19 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 20 ; AVX-NEXT: retq 21 %t0 = fadd float %x0, %x1 22 %t1 = fadd float %t0, %x2 23 %t2 = fadd float %t1, %x3 24 ret float %t2 25 } 26 27 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 28 ; SSE-LABEL: reassociate_adds2: 29 ; SSE: # BB#0: 30 ; SSE-NEXT: addss %xmm1, %xmm0 31 ; SSE-NEXT: addss %xmm3, %xmm2 32 ; SSE-NEXT: addss %xmm2, %xmm0 33 ; SSE-NEXT: retq 34 ; 35 ; AVX-LABEL: reassociate_adds2: 36 ; AVX: # BB#0: 37 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 38 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 39 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 40 ; AVX-NEXT: retq 41 %t0 = fadd float %x0, %x1 42 %t1 = fadd float %x2, %t0 43 %t2 = fadd float %t1, %x3 44 ret float %t2 45 } 46 47 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 48 ; SSE-LABEL: reassociate_adds3: 49 ; SSE: # BB#0: 50 ; SSE-NEXT: addss %xmm1, %xmm0 51 ; SSE-NEXT: addss %xmm3, %xmm2 52 ; SSE-NEXT: addss %xmm2, %xmm0 53 ; SSE-NEXT: retq 54 ; 55 ; AVX-LABEL: reassociate_adds3: 56 ; AVX: # BB#0: 57 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 58 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 59 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 60 ; AVX-NEXT: retq 61 %t0 = fadd float %x0, %x1 62 %t1 = fadd float %t0, %x2 63 %t2 = fadd float %x3, %t1 64 ret float %t2 65 } 66 67 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 68 ; SSE-LABEL: reassociate_adds4: 69 ; SSE: # BB#0: 70 ; SSE-NEXT: addss %xmm1, %xmm0 71 ; SSE-NEXT: addss %xmm3, %xmm2 72 ; SSE-NEXT: addss %xmm2, %xmm0 73 ; SSE-NEXT: retq 74 ; 75 ; AVX-LABEL: reassociate_adds4: 76 ; AVX: # BB#0: 77 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 78 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 79 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 80 ; AVX-NEXT: retq 81 %t0 = fadd float %x0, %x1 82 %t1 = fadd float %x2, %t0 83 %t2 = fadd float %x3, %t1 84 ret float %t2 85 } 86 87 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 88 ; produced because that would cost more compile time. 89 90 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 91 ; SSE-LABEL: reassociate_adds5: 92 ; SSE: # BB#0: 93 ; SSE-NEXT: addss %xmm1, %xmm0 94 ; SSE-NEXT: addss %xmm3, %xmm2 95 ; SSE-NEXT: addss %xmm2, %xmm0 96 ; SSE-NEXT: addss %xmm5, %xmm4 97 ; SSE-NEXT: addss %xmm6, %xmm4 98 ; SSE-NEXT: addss %xmm4, %xmm0 99 ; SSE-NEXT: addss %xmm7, %xmm0 100 ; SSE-NEXT: retq 101 ; 102 ; AVX-LABEL: reassociate_adds5: 103 ; AVX: # BB#0: 104 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 105 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 106 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 107 ; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 108 ; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 109 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 110 ; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 111 ; AVX-NEXT: retq 112 %t0 = fadd float %x0, %x1 113 %t1 = fadd float %t0, %x2 114 %t2 = fadd float %t1, %x3 115 %t3 = fadd float %t2, %x4 116 %t4 = fadd float %t3, %x5 117 %t5 = fadd float %t4, %x6 118 %t6 = fadd float %t5, %x7 119 ret float %t6 120 } 121 122 ; Verify that we only need two associative operations to reassociate the operands. 123 ; Also, we should reassociate such that the result of the high latency division 124 ; is used by the final 'add' rather than reassociating the %x3 operand with the 125 ; division. The latter reassociation would not improve anything. 126 127 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 128 ; SSE-LABEL: reassociate_adds6: 129 ; SSE: # BB#0: 130 ; SSE-NEXT: divss %xmm1, %xmm0 131 ; SSE-NEXT: addss %xmm3, %xmm2 132 ; SSE-NEXT: addss %xmm2, %xmm0 133 ; SSE-NEXT: retq 134 ; 135 ; AVX-LABEL: reassociate_adds6: 136 ; AVX: # BB#0: 137 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 138 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 139 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 140 ; AVX-NEXT: retq 141 %t0 = fdiv float %x0, %x1 142 %t1 = fadd float %x2, %t0 143 %t2 = fadd float %x3, %t1 144 ret float %t2 145 } 146 147 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated. 148 149 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 150 ; SSE-LABEL: reassociate_muls1: 151 ; SSE: # BB#0: 152 ; SSE-NEXT: divss %xmm1, %xmm0 153 ; SSE-NEXT: mulss %xmm3, %xmm2 154 ; SSE-NEXT: mulss %xmm2, %xmm0 155 ; SSE-NEXT: retq 156 ; 157 ; AVX-LABEL: reassociate_muls1: 158 ; AVX: # BB#0: 159 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 160 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 161 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 162 ; AVX-NEXT: retq 163 %t0 = fdiv float %x0, %x1 164 %t1 = fmul float %x2, %t0 165 %t2 = fmul float %x3, %t1 166 ret float %t2 167 } 168 169 ; Verify that SSE and AVX scalar double-precision adds are reassociated. 170 171 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 172 ; SSE-LABEL: reassociate_adds_double: 173 ; SSE: # BB#0: 174 ; SSE-NEXT: divsd %xmm1, %xmm0 175 ; SSE-NEXT: addsd %xmm3, %xmm2 176 ; SSE-NEXT: addsd %xmm2, %xmm0 177 ; SSE-NEXT: retq 178 ; 179 ; AVX-LABEL: reassociate_adds_double: 180 ; AVX: # BB#0: 181 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 182 ; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1 183 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 184 ; AVX-NEXT: retq 185 %t0 = fdiv double %x0, %x1 186 %t1 = fadd double %x2, %t0 187 %t2 = fadd double %x3, %t1 188 ret double %t2 189 } 190 191 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated. 192 193 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 194 ; SSE-LABEL: reassociate_muls_double: 195 ; SSE: # BB#0: 196 ; SSE-NEXT: divsd %xmm1, %xmm0 197 ; SSE-NEXT: mulsd %xmm3, %xmm2 198 ; SSE-NEXT: mulsd %xmm2, %xmm0 199 ; SSE-NEXT: retq 200 ; 201 ; AVX-LABEL: reassociate_muls_double: 202 ; AVX: # BB#0: 203 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 204 ; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1 205 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 206 ; AVX-NEXT: retq 207 %t0 = fdiv double %x0, %x1 208 %t1 = fmul double %x2, %t0 209 %t2 = fmul double %x3, %t1 210 ret double %t2 211 } 212 213 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated. 214 215 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 216 ; SSE-LABEL: reassociate_adds_v4f32: 217 ; SSE: # BB#0: 218 ; SSE-NEXT: mulps %xmm1, %xmm0 219 ; SSE-NEXT: addps %xmm3, %xmm2 220 ; SSE-NEXT: addps %xmm2, %xmm0 221 ; SSE-NEXT: retq 222 ; 223 ; AVX-LABEL: reassociate_adds_v4f32: 224 ; AVX: # BB#0: 225 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 226 ; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1 227 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 228 ; AVX-NEXT: retq 229 %t0 = fmul <4 x float> %x0, %x1 230 %t1 = fadd <4 x float> %x2, %t0 231 %t2 = fadd <4 x float> %x3, %t1 232 ret <4 x float> %t2 233 } 234 235 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated. 236 237 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 238 ; SSE-LABEL: reassociate_adds_v2f64: 239 ; SSE: # BB#0: 240 ; SSE-NEXT: mulpd %xmm1, %xmm0 241 ; SSE-NEXT: addpd %xmm3, %xmm2 242 ; SSE-NEXT: addpd %xmm2, %xmm0 243 ; SSE-NEXT: retq 244 ; 245 ; AVX-LABEL: reassociate_adds_v2f64: 246 ; AVX: # BB#0: 247 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 248 ; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1 249 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 250 ; AVX-NEXT: retq 251 %t0 = fmul <2 x double> %x0, %x1 252 %t1 = fadd <2 x double> %x2, %t0 253 %t2 = fadd <2 x double> %x3, %t1 254 ret <2 x double> %t2 255 } 256 257 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated. 258 259 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 260 ; SSE-LABEL: reassociate_muls_v4f32: 261 ; SSE: # BB#0: 262 ; SSE-NEXT: addps %xmm1, %xmm0 263 ; SSE-NEXT: mulps %xmm3, %xmm2 264 ; SSE-NEXT: mulps %xmm2, %xmm0 265 ; SSE-NEXT: retq 266 ; 267 ; AVX-LABEL: reassociate_muls_v4f32: 268 ; AVX: # BB#0: 269 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 270 ; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1 271 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 272 ; AVX-NEXT: retq 273 %t0 = fadd <4 x float> %x0, %x1 274 %t1 = fmul <4 x float> %x2, %t0 275 %t2 = fmul <4 x float> %x3, %t1 276 ret <4 x float> %t2 277 } 278 279 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated. 280 281 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 282 ; SSE-LABEL: reassociate_muls_v2f64: 283 ; SSE: # BB#0: 284 ; SSE-NEXT: addpd %xmm1, %xmm0 285 ; SSE-NEXT: mulpd %xmm3, %xmm2 286 ; SSE-NEXT: mulpd %xmm2, %xmm0 287 ; SSE-NEXT: retq 288 ; 289 ; AVX-LABEL: reassociate_muls_v2f64: 290 ; AVX: # BB#0: 291 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 292 ; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1 293 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 294 ; AVX-NEXT: retq 295 %t0 = fadd <2 x double> %x0, %x1 296 %t1 = fmul <2 x double> %x2, %t0 297 %t2 = fmul <2 x double> %x3, %t1 298 ret <2 x double> %t2 299 } 300 301 ; Verify that AVX 256-bit vector single-precision adds are reassociated. 302 303 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 304 ; AVX-LABEL: reassociate_adds_v8f32: 305 ; AVX: # BB#0: 306 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 307 ; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1 308 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 309 ; AVX-NEXT: retq 310 %t0 = fmul <8 x float> %x0, %x1 311 %t1 = fadd <8 x float> %x2, %t0 312 %t2 = fadd <8 x float> %x3, %t1 313 ret <8 x float> %t2 314 } 315 316 ; Verify that AVX 256-bit vector double-precision adds are reassociated. 317 318 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 319 ; AVX-LABEL: reassociate_adds_v4f64: 320 ; AVX: # BB#0: 321 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 322 ; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1 323 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 324 ; AVX-NEXT: retq 325 %t0 = fmul <4 x double> %x0, %x1 326 %t1 = fadd <4 x double> %x2, %t0 327 %t2 = fadd <4 x double> %x3, %t1 328 ret <4 x double> %t2 329 } 330 331 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated. 332 333 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 334 ; AVX-LABEL: reassociate_muls_v8f32: 335 ; AVX: # BB#0: 336 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 337 ; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1 338 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 339 ; AVX-NEXT: retq 340 %t0 = fadd <8 x float> %x0, %x1 341 %t1 = fmul <8 x float> %x2, %t0 342 %t2 = fmul <8 x float> %x3, %t1 343 ret <8 x float> %t2 344 } 345 346 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated. 347 348 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 349 ; AVX-LABEL: reassociate_muls_v4f64: 350 ; AVX: # BB#0: 351 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 352 ; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1 353 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 354 ; AVX-NEXT: retq 355 %t0 = fadd <4 x double> %x0, %x1 356 %t1 = fmul <4 x double> %x2, %t0 357 %t2 = fmul <4 x double> %x3, %t1 358 ret <4 x double> %t2 359 } 360 361 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated. 362 363 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) { 364 ; SSE-LABEL: reassociate_mins_single: 365 ; SSE: # BB#0: 366 ; SSE-NEXT: divss %xmm1, %xmm0 367 ; SSE-NEXT: minss %xmm3, %xmm2 368 ; SSE-NEXT: minss %xmm2, %xmm0 369 ; SSE-NEXT: retq 370 ; 371 ; AVX-LABEL: reassociate_mins_single: 372 ; AVX: # BB#0: 373 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 374 ; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1 375 ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 376 ; AVX-NEXT: retq 377 %t0 = fdiv float %x0, %x1 378 %cmp1 = fcmp olt float %x2, %t0 379 %sel1 = select i1 %cmp1, float %x2, float %t0 380 %cmp2 = fcmp olt float %x3, %sel1 381 %sel2 = select i1 %cmp2, float %x3, float %sel1 382 ret float %sel2 383 } 384 385 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated. 386 387 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) { 388 ; SSE-LABEL: reassociate_maxs_single: 389 ; SSE: # BB#0: 390 ; SSE-NEXT: divss %xmm1, %xmm0 391 ; SSE-NEXT: maxss %xmm3, %xmm2 392 ; SSE-NEXT: maxss %xmm2, %xmm0 393 ; SSE-NEXT: retq 394 ; 395 ; AVX-LABEL: reassociate_maxs_single: 396 ; AVX: # BB#0: 397 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 398 ; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1 399 ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 400 ; AVX-NEXT: retq 401 %t0 = fdiv float %x0, %x1 402 %cmp1 = fcmp ogt float %x2, %t0 403 %sel1 = select i1 %cmp1, float %x2, float %t0 404 %cmp2 = fcmp ogt float %x3, %sel1 405 %sel2 = select i1 %cmp2, float %x3, float %sel1 406 ret float %sel2 407 } 408 409 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated. 410 411 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) { 412 ; SSE-LABEL: reassociate_mins_double: 413 ; SSE: # BB#0: 414 ; SSE-NEXT: divsd %xmm1, %xmm0 415 ; SSE-NEXT: minsd %xmm3, %xmm2 416 ; SSE-NEXT: minsd %xmm2, %xmm0 417 ; SSE-NEXT: retq 418 ; 419 ; AVX-LABEL: reassociate_mins_double: 420 ; AVX: # BB#0: 421 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 422 ; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1 423 ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 424 ; AVX-NEXT: retq 425 %t0 = fdiv double %x0, %x1 426 %cmp1 = fcmp olt double %x2, %t0 427 %sel1 = select i1 %cmp1, double %x2, double %t0 428 %cmp2 = fcmp olt double %x3, %sel1 429 %sel2 = select i1 %cmp2, double %x3, double %sel1 430 ret double %sel2 431 } 432 433 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated. 434 435 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) { 436 ; SSE-LABEL: reassociate_maxs_double: 437 ; SSE: # BB#0: 438 ; SSE-NEXT: divsd %xmm1, %xmm0 439 ; SSE-NEXT: maxsd %xmm3, %xmm2 440 ; SSE-NEXT: maxsd %xmm2, %xmm0 441 ; SSE-NEXT: retq 442 ; 443 ; AVX-LABEL: reassociate_maxs_double: 444 ; AVX: # BB#0: 445 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 446 ; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1 447 ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 448 ; AVX-NEXT: retq 449 %t0 = fdiv double %x0, %x1 450 %cmp1 = fcmp ogt double %x2, %t0 451 %sel1 = select i1 %cmp1, double %x2, double %t0 452 %cmp2 = fcmp ogt double %x3, %sel1 453 %sel2 = select i1 %cmp2, double %x3, double %sel1 454 ret double %sel2 455 } 456 457 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated. 458 459 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 460 ; SSE-LABEL: reassociate_mins_v4f32: 461 ; SSE: # BB#0: 462 ; SSE-NEXT: addps %xmm1, %xmm0 463 ; SSE-NEXT: minps %xmm3, %xmm2 464 ; SSE-NEXT: minps %xmm2, %xmm0 465 ; SSE-NEXT: retq 466 ; 467 ; AVX-LABEL: reassociate_mins_v4f32: 468 ; AVX: # BB#0: 469 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 470 ; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1 471 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 472 ; AVX-NEXT: retq 473 %t0 = fadd <4 x float> %x0, %x1 474 %cmp1 = fcmp olt <4 x float> %x2, %t0 475 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 476 %cmp2 = fcmp olt <4 x float> %x3, %sel1 477 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 478 ret <4 x float> %sel2 479 } 480 481 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated. 482 483 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 484 ; SSE-LABEL: reassociate_maxs_v4f32: 485 ; SSE: # BB#0: 486 ; SSE-NEXT: addps %xmm1, %xmm0 487 ; SSE-NEXT: maxps %xmm3, %xmm2 488 ; SSE-NEXT: maxps %xmm2, %xmm0 489 ; SSE-NEXT: retq 490 ; 491 ; AVX-LABEL: reassociate_maxs_v4f32: 492 ; AVX: # BB#0: 493 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 494 ; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1 495 ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 496 ; AVX-NEXT: retq 497 %t0 = fadd <4 x float> %x0, %x1 498 %cmp1 = fcmp ogt <4 x float> %x2, %t0 499 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 500 %cmp2 = fcmp ogt <4 x float> %x3, %sel1 501 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 502 ret <4 x float> %sel2 503 } 504 505 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated. 506 507 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 508 ; SSE-LABEL: reassociate_mins_v2f64: 509 ; SSE: # BB#0: 510 ; SSE-NEXT: addpd %xmm1, %xmm0 511 ; SSE-NEXT: minpd %xmm3, %xmm2 512 ; SSE-NEXT: minpd %xmm2, %xmm0 513 ; SSE-NEXT: retq 514 ; 515 ; AVX-LABEL: reassociate_mins_v2f64: 516 ; AVX: # BB#0: 517 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 518 ; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1 519 ; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 520 ; AVX-NEXT: retq 521 %t0 = fadd <2 x double> %x0, %x1 522 %cmp1 = fcmp olt <2 x double> %x2, %t0 523 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 524 %cmp2 = fcmp olt <2 x double> %x3, %sel1 525 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 526 ret <2 x double> %sel2 527 } 528 529 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated. 530 531 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 532 ; SSE-LABEL: reassociate_maxs_v2f64: 533 ; SSE: # BB#0: 534 ; SSE-NEXT: addpd %xmm1, %xmm0 535 ; SSE-NEXT: maxpd %xmm3, %xmm2 536 ; SSE-NEXT: maxpd %xmm2, %xmm0 537 ; SSE-NEXT: retq 538 ; 539 ; AVX-LABEL: reassociate_maxs_v2f64: 540 ; AVX: # BB#0: 541 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 542 ; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1 543 ; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 544 ; AVX-NEXT: retq 545 %t0 = fadd <2 x double> %x0, %x1 546 %cmp1 = fcmp ogt <2 x double> %x2, %t0 547 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 548 %cmp2 = fcmp ogt <2 x double> %x3, %sel1 549 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 550 ret <2 x double> %sel2 551 } 552 553 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated. 554 555 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 556 ; AVX-LABEL: reassociate_mins_v8f32: 557 ; AVX: # BB#0: 558 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 559 ; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1 560 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 561 ; AVX-NEXT: retq 562 %t0 = fadd <8 x float> %x0, %x1 563 %cmp1 = fcmp olt <8 x float> %x2, %t0 564 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 565 %cmp2 = fcmp olt <8 x float> %x3, %sel1 566 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 567 ret <8 x float> %sel2 568 } 569 570 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated. 571 572 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 573 ; AVX-LABEL: reassociate_maxs_v8f32: 574 ; AVX: # BB#0: 575 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 576 ; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1 577 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 578 ; AVX-NEXT: retq 579 %t0 = fadd <8 x float> %x0, %x1 580 %cmp1 = fcmp ogt <8 x float> %x2, %t0 581 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 582 %cmp2 = fcmp ogt <8 x float> %x3, %sel1 583 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 584 ret <8 x float> %sel2 585 } 586 587 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated. 588 589 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 590 ; AVX-LABEL: reassociate_mins_v4f64: 591 ; AVX: # BB#0: 592 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 593 ; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1 594 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 595 ; AVX-NEXT: retq 596 %t0 = fadd <4 x double> %x0, %x1 597 %cmp1 = fcmp olt <4 x double> %x2, %t0 598 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 599 %cmp2 = fcmp olt <4 x double> %x3, %sel1 600 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 601 ret <4 x double> %sel2 602 } 603 604 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated. 605 606 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 607 ; AVX-LABEL: reassociate_maxs_v4f64: 608 ; AVX: # BB#0: 609 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 610 ; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1 611 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 612 ; AVX-NEXT: retq 613 %t0 = fadd <4 x double> %x0, %x1 614 %cmp1 = fcmp ogt <4 x double> %x2, %t0 615 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 616 %cmp2 = fcmp ogt <4 x double> %x3, %sel1 617 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 618 ret <4 x double> %sel2 619 } 620 621 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 622 ; Verify that reassociation is not happening needlessly or wrongly. 623 624 declare double @bar() 625 626 define double @reassociate_adds_from_calls() { 627 ; AVX-LABEL: reassociate_adds_from_calls: 628 ; AVX: callq bar 629 ; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 630 ; AVX-NEXT: callq bar 631 ; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 632 ; AVX-NEXT: callq bar 633 ; AVX-NEXT: vmovsd %xmm0, (%rsp) 634 ; AVX-NEXT: callq bar 635 ; AVX-NEXT: vmovsd 8(%rsp), %xmm1 636 ; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 637 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 638 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 639 640 %x0 = call double @bar() 641 %x1 = call double @bar() 642 %x2 = call double @bar() 643 %x3 = call double @bar() 644 %t0 = fadd double %x0, %x1 645 %t1 = fadd double %t0, %x2 646 %t2 = fadd double %t1, %x3 647 ret double %t2 648 } 649 650 define double @already_reassociated() { 651 ; AVX-LABEL: already_reassociated: 652 ; AVX: callq bar 653 ; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 654 ; AVX-NEXT: callq bar 655 ; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 656 ; AVX-NEXT: callq bar 657 ; AVX-NEXT: vmovsd %xmm0, (%rsp) 658 ; AVX-NEXT: callq bar 659 ; AVX-NEXT: vmovsd 8(%rsp), %xmm1 660 ; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 661 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 662 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 663 664 %x0 = call double @bar() 665 %x1 = call double @bar() 666 %x2 = call double @bar() 667 %x3 = call double @bar() 668 %t0 = fadd double %x0, %x1 669 %t1 = fadd double %x2, %x3 670 %t2 = fadd double %t0, %t1 671 ret double %t2 672 } 673 674