1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE 2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=AVX 3 4 ; Incremental updates of the instruction depths should be enough for this test 5 ; case. 6 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE 7 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX 8 9 ; Verify that the first two adds are independent regardless of how the inputs are 10 ; commuted. The destination registers are used as source registers for the third add. 11 12 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) { 13 ; SSE-LABEL: reassociate_adds1: 14 ; SSE: # %bb.0: 15 ; SSE-NEXT: addss %xmm1, %xmm0 16 ; SSE-NEXT: addss %xmm3, %xmm2 17 ; SSE-NEXT: addss %xmm2, %xmm0 18 ; SSE-NEXT: retq 19 ; 20 ; AVX-LABEL: reassociate_adds1: 21 ; AVX: # %bb.0: 22 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 23 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 24 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 25 ; AVX-NEXT: retq 26 %t0 = fadd float %x0, %x1 27 %t1 = fadd float %t0, %x2 28 %t2 = fadd float %t1, %x3 29 ret float %t2 30 } 31 32 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) { 33 ; SSE-LABEL: reassociate_adds2: 34 ; SSE: # %bb.0: 35 ; SSE-NEXT: addss %xmm1, %xmm0 36 ; SSE-NEXT: addss %xmm3, %xmm2 37 ; SSE-NEXT: addss %xmm2, %xmm0 38 ; SSE-NEXT: retq 39 ; 40 ; AVX-LABEL: reassociate_adds2: 41 ; AVX: # %bb.0: 42 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 43 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 44 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 45 ; AVX-NEXT: retq 46 %t0 = fadd float %x0, %x1 47 %t1 = fadd float %x2, %t0 48 %t2 = fadd float %t1, %x3 49 ret float %t2 50 } 51 52 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) { 53 ; SSE-LABEL: reassociate_adds3: 54 ; SSE: # %bb.0: 55 ; SSE-NEXT: addss %xmm1, %xmm0 56 ; SSE-NEXT: addss %xmm3, %xmm2 57 ; SSE-NEXT: addss %xmm2, %xmm0 58 ; SSE-NEXT: retq 59 ; 60 ; AVX-LABEL: reassociate_adds3: 61 ; AVX: # %bb.0: 62 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 63 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 64 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 65 ; AVX-NEXT: retq 66 %t0 = fadd float %x0, %x1 67 %t1 = fadd float %t0, %x2 68 %t2 = fadd float %x3, %t1 69 ret float %t2 70 } 71 72 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) { 73 ; SSE-LABEL: reassociate_adds4: 74 ; SSE: # %bb.0: 75 ; SSE-NEXT: addss %xmm1, %xmm0 76 ; SSE-NEXT: addss %xmm3, %xmm2 77 ; SSE-NEXT: addss %xmm2, %xmm0 78 ; SSE-NEXT: retq 79 ; 80 ; AVX-LABEL: reassociate_adds4: 81 ; AVX: # %bb.0: 82 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 83 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 84 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 85 ; AVX-NEXT: retq 86 %t0 = fadd float %x0, %x1 87 %t1 = fadd float %x2, %t0 88 %t2 = fadd float %x3, %t1 89 ret float %t2 90 } 91 92 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not 93 ; produced because that would cost more compile time. 94 95 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) { 96 ; SSE-LABEL: reassociate_adds5: 97 ; SSE: # %bb.0: 98 ; SSE-NEXT: addss %xmm1, %xmm0 99 ; SSE-NEXT: addss %xmm3, %xmm2 100 ; SSE-NEXT: addss %xmm2, %xmm0 101 ; SSE-NEXT: addss %xmm5, %xmm4 102 ; SSE-NEXT: addss %xmm6, %xmm4 103 ; SSE-NEXT: addss %xmm4, %xmm0 104 ; SSE-NEXT: addss %xmm7, %xmm0 105 ; SSE-NEXT: retq 106 ; 107 ; AVX-LABEL: reassociate_adds5: 108 ; AVX: # %bb.0: 109 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 110 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 111 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 112 ; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1 113 ; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1 114 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 115 ; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0 116 ; AVX-NEXT: retq 117 %t0 = fadd float %x0, %x1 118 %t1 = fadd float %t0, %x2 119 %t2 = fadd float %t1, %x3 120 %t3 = fadd float %t2, %x4 121 %t4 = fadd float %t3, %x5 122 %t5 = fadd float %t4, %x6 123 %t6 = fadd float %t5, %x7 124 ret float %t6 125 } 126 127 ; Verify that we only need two associative operations to reassociate the operands. 128 ; Also, we should reassociate such that the result of the high latency division 129 ; is used by the final 'add' rather than reassociating the %x3 operand with the 130 ; division. The latter reassociation would not improve anything. 131 132 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) { 133 ; SSE-LABEL: reassociate_adds6: 134 ; SSE: # %bb.0: 135 ; SSE-NEXT: divss %xmm1, %xmm0 136 ; SSE-NEXT: addss %xmm3, %xmm2 137 ; SSE-NEXT: addss %xmm2, %xmm0 138 ; SSE-NEXT: retq 139 ; 140 ; AVX-LABEL: reassociate_adds6: 141 ; AVX: # %bb.0: 142 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 143 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1 144 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0 145 ; AVX-NEXT: retq 146 %t0 = fdiv float %x0, %x1 147 %t1 = fadd float %x2, %t0 148 %t2 = fadd float %x3, %t1 149 ret float %t2 150 } 151 152 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated. 153 154 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) { 155 ; SSE-LABEL: reassociate_muls1: 156 ; SSE: # %bb.0: 157 ; SSE-NEXT: divss %xmm1, %xmm0 158 ; SSE-NEXT: mulss %xmm3, %xmm2 159 ; SSE-NEXT: mulss %xmm2, %xmm0 160 ; SSE-NEXT: retq 161 ; 162 ; AVX-LABEL: reassociate_muls1: 163 ; AVX: # %bb.0: 164 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 165 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1 166 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0 167 ; AVX-NEXT: retq 168 %t0 = fdiv float %x0, %x1 169 %t1 = fmul float %x2, %t0 170 %t2 = fmul float %x3, %t1 171 ret float %t2 172 } 173 174 ; Verify that SSE and AVX scalar double-precision adds are reassociated. 175 176 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) { 177 ; SSE-LABEL: reassociate_adds_double: 178 ; SSE: # %bb.0: 179 ; SSE-NEXT: divsd %xmm1, %xmm0 180 ; SSE-NEXT: addsd %xmm3, %xmm2 181 ; SSE-NEXT: addsd %xmm2, %xmm0 182 ; SSE-NEXT: retq 183 ; 184 ; AVX-LABEL: reassociate_adds_double: 185 ; AVX: # %bb.0: 186 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 187 ; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1 188 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0 189 ; AVX-NEXT: retq 190 %t0 = fdiv double %x0, %x1 191 %t1 = fadd double %x2, %t0 192 %t2 = fadd double %x3, %t1 193 ret double %t2 194 } 195 196 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated. 197 198 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) { 199 ; SSE-LABEL: reassociate_muls_double: 200 ; SSE: # %bb.0: 201 ; SSE-NEXT: divsd %xmm1, %xmm0 202 ; SSE-NEXT: mulsd %xmm3, %xmm2 203 ; SSE-NEXT: mulsd %xmm2, %xmm0 204 ; SSE-NEXT: retq 205 ; 206 ; AVX-LABEL: reassociate_muls_double: 207 ; AVX: # %bb.0: 208 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 209 ; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1 210 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0 211 ; AVX-NEXT: retq 212 %t0 = fdiv double %x0, %x1 213 %t1 = fmul double %x2, %t0 214 %t2 = fmul double %x3, %t1 215 ret double %t2 216 } 217 218 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated. 219 220 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 221 ; SSE-LABEL: reassociate_adds_v4f32: 222 ; SSE: # %bb.0: 223 ; SSE-NEXT: mulps %xmm1, %xmm0 224 ; SSE-NEXT: addps %xmm3, %xmm2 225 ; SSE-NEXT: addps %xmm2, %xmm0 226 ; SSE-NEXT: retq 227 ; 228 ; AVX-LABEL: reassociate_adds_v4f32: 229 ; AVX: # %bb.0: 230 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 231 ; AVX-NEXT: vaddps %xmm3, %xmm2, %xmm1 232 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 233 ; AVX-NEXT: retq 234 %t0 = fmul <4 x float> %x0, %x1 235 %t1 = fadd <4 x float> %x2, %t0 236 %t2 = fadd <4 x float> %x3, %t1 237 ret <4 x float> %t2 238 } 239 240 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated. 241 242 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 243 ; SSE-LABEL: reassociate_adds_v2f64: 244 ; SSE: # %bb.0: 245 ; SSE-NEXT: mulpd %xmm1, %xmm0 246 ; SSE-NEXT: addpd %xmm3, %xmm2 247 ; SSE-NEXT: addpd %xmm2, %xmm0 248 ; SSE-NEXT: retq 249 ; 250 ; AVX-LABEL: reassociate_adds_v2f64: 251 ; AVX: # %bb.0: 252 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 253 ; AVX-NEXT: vaddpd %xmm3, %xmm2, %xmm1 254 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 255 ; AVX-NEXT: retq 256 %t0 = fmul <2 x double> %x0, %x1 257 %t1 = fadd <2 x double> %x2, %t0 258 %t2 = fadd <2 x double> %x3, %t1 259 ret <2 x double> %t2 260 } 261 262 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated. 263 264 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 265 ; SSE-LABEL: reassociate_muls_v4f32: 266 ; SSE: # %bb.0: 267 ; SSE-NEXT: addps %xmm1, %xmm0 268 ; SSE-NEXT: mulps %xmm3, %xmm2 269 ; SSE-NEXT: mulps %xmm2, %xmm0 270 ; SSE-NEXT: retq 271 ; 272 ; AVX-LABEL: reassociate_muls_v4f32: 273 ; AVX: # %bb.0: 274 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 275 ; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1 276 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0 277 ; AVX-NEXT: retq 278 %t0 = fadd <4 x float> %x0, %x1 279 %t1 = fmul <4 x float> %x2, %t0 280 %t2 = fmul <4 x float> %x3, %t1 281 ret <4 x float> %t2 282 } 283 284 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated. 285 286 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 287 ; SSE-LABEL: reassociate_muls_v2f64: 288 ; SSE: # %bb.0: 289 ; SSE-NEXT: addpd %xmm1, %xmm0 290 ; SSE-NEXT: mulpd %xmm3, %xmm2 291 ; SSE-NEXT: mulpd %xmm2, %xmm0 292 ; SSE-NEXT: retq 293 ; 294 ; AVX-LABEL: reassociate_muls_v2f64: 295 ; AVX: # %bb.0: 296 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 297 ; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1 298 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0 299 ; AVX-NEXT: retq 300 %t0 = fadd <2 x double> %x0, %x1 301 %t1 = fmul <2 x double> %x2, %t0 302 %t2 = fmul <2 x double> %x3, %t1 303 ret <2 x double> %t2 304 } 305 306 ; Verify that AVX 256-bit vector single-precision adds are reassociated. 307 308 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 309 ; AVX-LABEL: reassociate_adds_v8f32: 310 ; AVX: # %bb.0: 311 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 312 ; AVX-NEXT: vaddps %ymm3, %ymm2, %ymm1 313 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 314 ; AVX-NEXT: retq 315 %t0 = fmul <8 x float> %x0, %x1 316 %t1 = fadd <8 x float> %x2, %t0 317 %t2 = fadd <8 x float> %x3, %t1 318 ret <8 x float> %t2 319 } 320 321 ; Verify that AVX 256-bit vector double-precision adds are reassociated. 322 323 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 324 ; AVX-LABEL: reassociate_adds_v4f64: 325 ; AVX: # %bb.0: 326 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 327 ; AVX-NEXT: vaddpd %ymm3, %ymm2, %ymm1 328 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 329 ; AVX-NEXT: retq 330 %t0 = fmul <4 x double> %x0, %x1 331 %t1 = fadd <4 x double> %x2, %t0 332 %t2 = fadd <4 x double> %x3, %t1 333 ret <4 x double> %t2 334 } 335 336 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated. 337 338 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 339 ; AVX-LABEL: reassociate_muls_v8f32: 340 ; AVX: # %bb.0: 341 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 342 ; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1 343 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0 344 ; AVX-NEXT: retq 345 %t0 = fadd <8 x float> %x0, %x1 346 %t1 = fmul <8 x float> %x2, %t0 347 %t2 = fmul <8 x float> %x3, %t1 348 ret <8 x float> %t2 349 } 350 351 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated. 352 353 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 354 ; AVX-LABEL: reassociate_muls_v4f64: 355 ; AVX: # %bb.0: 356 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 357 ; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1 358 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0 359 ; AVX-NEXT: retq 360 %t0 = fadd <4 x double> %x0, %x1 361 %t1 = fmul <4 x double> %x2, %t0 362 %t2 = fmul <4 x double> %x3, %t1 363 ret <4 x double> %t2 364 } 365 366 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated. 367 368 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) { 369 ; SSE-LABEL: reassociate_mins_single: 370 ; SSE: # %bb.0: 371 ; SSE-NEXT: divss %xmm1, %xmm0 372 ; SSE-NEXT: minss %xmm3, %xmm2 373 ; SSE-NEXT: minss %xmm2, %xmm0 374 ; SSE-NEXT: retq 375 ; 376 ; AVX-LABEL: reassociate_mins_single: 377 ; AVX: # %bb.0: 378 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 379 ; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1 380 ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0 381 ; AVX-NEXT: retq 382 %t0 = fdiv float %x0, %x1 383 %cmp1 = fcmp olt float %x2, %t0 384 %sel1 = select i1 %cmp1, float %x2, float %t0 385 %cmp2 = fcmp olt float %x3, %sel1 386 %sel2 = select i1 %cmp2, float %x3, float %sel1 387 ret float %sel2 388 } 389 390 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated. 391 392 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) { 393 ; SSE-LABEL: reassociate_maxs_single: 394 ; SSE: # %bb.0: 395 ; SSE-NEXT: divss %xmm1, %xmm0 396 ; SSE-NEXT: maxss %xmm3, %xmm2 397 ; SSE-NEXT: maxss %xmm2, %xmm0 398 ; SSE-NEXT: retq 399 ; 400 ; AVX-LABEL: reassociate_maxs_single: 401 ; AVX: # %bb.0: 402 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 403 ; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1 404 ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0 405 ; AVX-NEXT: retq 406 %t0 = fdiv float %x0, %x1 407 %cmp1 = fcmp ogt float %x2, %t0 408 %sel1 = select i1 %cmp1, float %x2, float %t0 409 %cmp2 = fcmp ogt float %x3, %sel1 410 %sel2 = select i1 %cmp2, float %x3, float %sel1 411 ret float %sel2 412 } 413 414 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated. 415 416 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) { 417 ; SSE-LABEL: reassociate_mins_double: 418 ; SSE: # %bb.0: 419 ; SSE-NEXT: divsd %xmm1, %xmm0 420 ; SSE-NEXT: minsd %xmm3, %xmm2 421 ; SSE-NEXT: minsd %xmm2, %xmm0 422 ; SSE-NEXT: retq 423 ; 424 ; AVX-LABEL: reassociate_mins_double: 425 ; AVX: # %bb.0: 426 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 427 ; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1 428 ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0 429 ; AVX-NEXT: retq 430 %t0 = fdiv double %x0, %x1 431 %cmp1 = fcmp olt double %x2, %t0 432 %sel1 = select i1 %cmp1, double %x2, double %t0 433 %cmp2 = fcmp olt double %x3, %sel1 434 %sel2 = select i1 %cmp2, double %x3, double %sel1 435 ret double %sel2 436 } 437 438 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated. 439 440 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) { 441 ; SSE-LABEL: reassociate_maxs_double: 442 ; SSE: # %bb.0: 443 ; SSE-NEXT: divsd %xmm1, %xmm0 444 ; SSE-NEXT: maxsd %xmm3, %xmm2 445 ; SSE-NEXT: maxsd %xmm2, %xmm0 446 ; SSE-NEXT: retq 447 ; 448 ; AVX-LABEL: reassociate_maxs_double: 449 ; AVX: # %bb.0: 450 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0 451 ; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1 452 ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0 453 ; AVX-NEXT: retq 454 %t0 = fdiv double %x0, %x1 455 %cmp1 = fcmp ogt double %x2, %t0 456 %sel1 = select i1 %cmp1, double %x2, double %t0 457 %cmp2 = fcmp ogt double %x3, %sel1 458 %sel2 = select i1 %cmp2, double %x3, double %sel1 459 ret double %sel2 460 } 461 462 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated. 463 464 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 465 ; SSE-LABEL: reassociate_mins_v4f32: 466 ; SSE: # %bb.0: 467 ; SSE-NEXT: addps %xmm1, %xmm0 468 ; SSE-NEXT: minps %xmm3, %xmm2 469 ; SSE-NEXT: minps %xmm2, %xmm0 470 ; SSE-NEXT: retq 471 ; 472 ; AVX-LABEL: reassociate_mins_v4f32: 473 ; AVX: # %bb.0: 474 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 475 ; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1 476 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0 477 ; AVX-NEXT: retq 478 %t0 = fadd <4 x float> %x0, %x1 479 %cmp1 = fcmp olt <4 x float> %x2, %t0 480 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 481 %cmp2 = fcmp olt <4 x float> %x3, %sel1 482 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 483 ret <4 x float> %sel2 484 } 485 486 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated. 487 488 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) { 489 ; SSE-LABEL: reassociate_maxs_v4f32: 490 ; SSE: # %bb.0: 491 ; SSE-NEXT: addps %xmm1, %xmm0 492 ; SSE-NEXT: maxps %xmm3, %xmm2 493 ; SSE-NEXT: maxps %xmm2, %xmm0 494 ; SSE-NEXT: retq 495 ; 496 ; AVX-LABEL: reassociate_maxs_v4f32: 497 ; AVX: # %bb.0: 498 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0 499 ; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1 500 ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0 501 ; AVX-NEXT: retq 502 %t0 = fadd <4 x float> %x0, %x1 503 %cmp1 = fcmp ogt <4 x float> %x2, %t0 504 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0 505 %cmp2 = fcmp ogt <4 x float> %x3, %sel1 506 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1 507 ret <4 x float> %sel2 508 } 509 510 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated. 511 512 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 513 ; SSE-LABEL: reassociate_mins_v2f64: 514 ; SSE: # %bb.0: 515 ; SSE-NEXT: addpd %xmm1, %xmm0 516 ; SSE-NEXT: minpd %xmm3, %xmm2 517 ; SSE-NEXT: minpd %xmm2, %xmm0 518 ; SSE-NEXT: retq 519 ; 520 ; AVX-LABEL: reassociate_mins_v2f64: 521 ; AVX: # %bb.0: 522 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 523 ; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1 524 ; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0 525 ; AVX-NEXT: retq 526 %t0 = fadd <2 x double> %x0, %x1 527 %cmp1 = fcmp olt <2 x double> %x2, %t0 528 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 529 %cmp2 = fcmp olt <2 x double> %x3, %sel1 530 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 531 ret <2 x double> %sel2 532 } 533 534 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated. 535 536 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) { 537 ; SSE-LABEL: reassociate_maxs_v2f64: 538 ; SSE: # %bb.0: 539 ; SSE-NEXT: addpd %xmm1, %xmm0 540 ; SSE-NEXT: maxpd %xmm3, %xmm2 541 ; SSE-NEXT: maxpd %xmm2, %xmm0 542 ; SSE-NEXT: retq 543 ; 544 ; AVX-LABEL: reassociate_maxs_v2f64: 545 ; AVX: # %bb.0: 546 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0 547 ; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1 548 ; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0 549 ; AVX-NEXT: retq 550 %t0 = fadd <2 x double> %x0, %x1 551 %cmp1 = fcmp ogt <2 x double> %x2, %t0 552 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0 553 %cmp2 = fcmp ogt <2 x double> %x3, %sel1 554 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1 555 ret <2 x double> %sel2 556 } 557 558 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated. 559 560 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 561 ; AVX-LABEL: reassociate_mins_v8f32: 562 ; AVX: # %bb.0: 563 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 564 ; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1 565 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0 566 ; AVX-NEXT: retq 567 %t0 = fadd <8 x float> %x0, %x1 568 %cmp1 = fcmp olt <8 x float> %x2, %t0 569 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 570 %cmp2 = fcmp olt <8 x float> %x3, %sel1 571 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 572 ret <8 x float> %sel2 573 } 574 575 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated. 576 577 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) { 578 ; AVX-LABEL: reassociate_maxs_v8f32: 579 ; AVX: # %bb.0: 580 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0 581 ; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1 582 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0 583 ; AVX-NEXT: retq 584 %t0 = fadd <8 x float> %x0, %x1 585 %cmp1 = fcmp ogt <8 x float> %x2, %t0 586 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0 587 %cmp2 = fcmp ogt <8 x float> %x3, %sel1 588 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1 589 ret <8 x float> %sel2 590 } 591 592 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated. 593 594 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 595 ; AVX-LABEL: reassociate_mins_v4f64: 596 ; AVX: # %bb.0: 597 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 598 ; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1 599 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0 600 ; AVX-NEXT: retq 601 %t0 = fadd <4 x double> %x0, %x1 602 %cmp1 = fcmp olt <4 x double> %x2, %t0 603 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 604 %cmp2 = fcmp olt <4 x double> %x3, %sel1 605 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 606 ret <4 x double> %sel2 607 } 608 609 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated. 610 611 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) { 612 ; AVX-LABEL: reassociate_maxs_v4f64: 613 ; AVX: # %bb.0: 614 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0 615 ; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1 616 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0 617 ; AVX-NEXT: retq 618 %t0 = fadd <4 x double> %x0, %x1 619 %cmp1 = fcmp ogt <4 x double> %x2, %t0 620 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0 621 %cmp2 = fcmp ogt <4 x double> %x3, %sel1 622 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1 623 ret <4 x double> %sel2 624 } 625 626 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016 627 ; Verify that reassociation is not happening needlessly or wrongly. 628 629 declare double @bar() 630 631 define double @reassociate_adds_from_calls() { 632 ; AVX-LABEL: reassociate_adds_from_calls: 633 ; AVX: callq bar 634 ; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 635 ; AVX-NEXT: callq bar 636 ; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 637 ; AVX-NEXT: callq bar 638 ; AVX-NEXT: vmovsd %xmm0, (%rsp) 639 ; AVX-NEXT: callq bar 640 ; AVX-NEXT: vmovsd 8(%rsp), %xmm1 641 ; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 642 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 643 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 644 645 %x0 = call double @bar() 646 %x1 = call double @bar() 647 %x2 = call double @bar() 648 %x3 = call double @bar() 649 %t0 = fadd double %x0, %x1 650 %t1 = fadd double %t0, %x2 651 %t2 = fadd double %t1, %x3 652 ret double %t2 653 } 654 655 define double @already_reassociated() { 656 ; AVX-LABEL: already_reassociated: 657 ; AVX: callq bar 658 ; AVX-NEXT: vmovsd %xmm0, 16(%rsp) 659 ; AVX-NEXT: callq bar 660 ; AVX-NEXT: vmovsd %xmm0, 8(%rsp) 661 ; AVX-NEXT: callq bar 662 ; AVX-NEXT: vmovsd %xmm0, (%rsp) 663 ; AVX-NEXT: callq bar 664 ; AVX-NEXT: vmovsd 8(%rsp), %xmm1 665 ; AVX: vaddsd 16(%rsp), %xmm1, %xmm1 666 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 667 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0 668 669 %x0 = call double @bar() 670 %x1 = call double @bar() 671 %x2 = call double @bar() 672 %x3 = call double @bar() 673 %t0 = fadd double %x0, %x1 674 %t1 = fadd double %x2, %x3 675 %t2 = fadd double %t0, %t1 676 ret double %t2 677 } 678 679