1 ; RUN: opt < %s -instcombine -S | FileCheck %s 2 3 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 4 ; 1.2f and 2.3f is supposed to be fold. 5 define float @fold(float %a) { 6 %mul = fmul fast float %a, 0x3FF3333340000000 7 %mul1 = fmul fast float %mul, 0x4002666660000000 8 ret float %mul1 9 ; CHECK-LABEL: @fold( 10 ; CHECK: fmul fast float %a, 0x4006147AE0000000 11 } 12 13 ; Same testing-case as the one used in fold() except that the operators have 14 ; fixed FP mode. 15 define float @notfold(float %a) { 16 ; CHECK-LABEL: @notfold( 17 ; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000 18 %mul = fmul fast float %a, 0x3FF3333340000000 19 %mul1 = fmul float %mul, 0x4002666660000000 20 ret float %mul1 21 } 22 23 define float @fold2(float %a) { 24 ; CHECK-LABEL: @fold2( 25 ; CHECK: fmul fast float %a, 0x4006147AE0000000 26 %mul = fmul float %a, 0x3FF3333340000000 27 %mul1 = fmul fast float %mul, 0x4002666660000000 28 ret float %mul1 29 } 30 31 ; C * f1 + f1 = (C+1) * f1 32 define double @fold3(double %f1) { 33 %t1 = fmul fast double 2.000000e+00, %f1 34 %t2 = fadd fast double %f1, %t1 35 ret double %t2 36 ; CHECK-LABEL: @fold3( 37 ; CHECK: fmul fast double %f1, 3.000000e+00 38 } 39 40 ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 41 define float @fold4(float %f1, float %f2) { 42 %sub = fsub float 4.000000e+00, %f1 43 %sub1 = fsub float 5.000000e+00, %f2 44 %add = fadd fast float %sub, %sub1 45 ret float %add 46 ; CHECK-LABEL: @fold4( 47 ; CHECK: %1 = fadd fast float %f1, %f2 48 ; CHECK: fsub fast float 9.000000e+00, %1 49 } 50 51 ; (X + C1) + C2 => X + (C1 + C2) 52 define float @fold5(float %f1, float %f2) { 53 %add = fadd float %f1, 4.000000e+00 54 %add1 = fadd fast float %add, 5.000000e+00 55 ret float %add1 56 ; CHECK-LABEL: @fold5( 57 ; CHECK: fadd fast float %f1, 9.000000e+00 58 } 59 60 ; (X + X) + X => 3.0 * X 61 define float @fold6(float %f1) { 62 %t1 = fadd fast float %f1, %f1 63 %t2 = fadd fast float %f1, %t1 64 ret float %t2 65 ; CHECK-LABEL: @fold6( 66 ; CHECK: fmul fast float %f1, 3.000000e+00 67 } 68 69 ; C1 * X + (X + X) = (C1 + 2) * X 70 define float @fold7(float %f1) { 71 %t1 = fmul fast float %f1, 5.000000e+00 72 %t2 = fadd fast float %f1, %f1 73 %t3 = fadd fast float %t1, %t2 74 ret float %t3 75 ; CHECK-LABEL: @fold7( 76 ; CHECK: fmul fast float %f1, 7.000000e+00 77 } 78 79 ; (X + X) + (X + X) => 4.0 * X 80 define float @fold8(float %f1) { 81 %t1 = fadd fast float %f1, %f1 82 %t2 = fadd fast float %f1, %f1 83 %t3 = fadd fast float %t1, %t2 84 ret float %t3 85 ; CHECK: fold8 86 ; CHECK: fmul fast float %f1, 4.000000e+00 87 } 88 89 ; X - (X + Y) => 0 - Y 90 define float @fold9(float %f1, float %f2) { 91 %t1 = fadd float %f1, %f2 92 %t3 = fsub fast float %f1, %t1 93 ret float %t3 94 95 ; CHECK-LABEL: @fold9( 96 ; CHECK: fsub fast float -0.000000e+00, %f2 97 } 98 99 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 100 ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 101 ; top of resulting simplified expression tree may potentially reveal some 102 ; optimization opportunities in the super-expression trees. 103 ; 104 define float @fold10(float %f1, float %f2) { 105 %t1 = fadd fast float 2.000000e+00, %f1 106 %t2 = fsub fast float %f2, 3.000000e+00 107 %t3 = fadd fast float %t1, %t2 108 ret float %t3 109 ; CHECK-LABEL: @fold10( 110 ; CHECK: %t3 = fadd fast float %t2, -1.000000e+00 111 ; CHECK: ret float %t3 112 } 113 114 ; once cause Crash/miscompilation 115 define float @fail1(float %f1, float %f2) { 116 %conv3 = fadd fast float %f1, -1.000000e+00 117 %add = fadd fast float %conv3, %conv3 118 %add2 = fadd fast float %add, %conv3 119 ret float %add2 120 ; CHECK-LABEL: @fail1( 121 ; CHECK: ret 122 } 123 124 define double @fail2(double %f1, double %f2) { 125 %t1 = fsub fast double %f1, %f2 126 %t2 = fadd fast double %f1, %f2 127 %t3 = fsub fast double %t1, %t2 128 ret double %t3 129 ; CHECK-LABEL: @fail2( 130 ; CHECK: ret 131 } 132 133 ; c1 * x - x => (c1 - 1.0) * x 134 define float @fold13(float %x) { 135 %mul = fmul fast float %x, 7.000000e+00 136 %sub = fsub fast float %mul, %x 137 ret float %sub 138 ; CHECK: fold13 139 ; CHECK: fmul fast float %x, 6.000000e+00 140 ; CHECK: ret 141 } 142 143 ; -x + y => y - x 144 define float @fold14(float %x, float %y) { 145 %neg = fsub fast float -0.0, %x 146 %add = fadd fast float %neg, %y 147 ret float %add 148 ; CHECK: fold14 149 ; CHECK: fsub fast float %y, %x 150 ; CHECK: ret 151 } 152 153 ; x + -y => x - y 154 define float @fold15(float %x, float %y) { 155 %neg = fsub fast float -0.0, %y 156 %add = fadd fast float %x, %neg 157 ret float %add 158 ; CHECK: fold15 159 ; CHECK: fsub fast float %x, %y 160 ; CHECK: ret 161 } 162 163 ; (select X+Y, X-Y) => X + (select Y, -Y) 164 define float @fold16(float %x, float %y) { 165 %cmp = fcmp ogt float %x, %y 166 %plus = fadd fast float %x, %y 167 %minus = fsub fast float %x, %y 168 %r = select i1 %cmp, float %plus, float %minus 169 ret float %r 170 ; CHECK: fold16 171 ; CHECK: fsub fast float 172 ; CHECK: select 173 ; CHECK: fadd fast float 174 ; CHECK: ret 175 } 176 177 178 179 ; ========================================================================= 180 ; 181 ; Testing-cases about fmul begin 182 ; 183 ; ========================================================================= 184 185 ; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 186 define float @fmul_distribute1(float %f1) { 187 %t1 = fmul float %f1, 6.0e+3 188 %t2 = fadd float %t1, 2.0e+3 189 %t3 = fmul fast float %t2, 5.0e+3 190 ret float %t3 191 ; CHECK-LABEL: @fmul_distribute1( 192 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07 193 ; CHECK: %t3 = fadd fast float %1, 1.000000e+07 194 } 195 196 ; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3 197 define double @fmul_distribute2(double %f1, double %f2) { 198 %t1 = fdiv double %f1, 3.0e+0 199 %t2 = fadd double %t1, 5.0e+1 200 ; 0x10000000000000 = DBL_MIN 201 %t3 = fmul fast double %t2, 0x10000000000000 202 ret double %t3 203 204 ; CHECK-LABEL: @fmul_distribute2( 205 ; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000 206 ; CHECK: fadd fast double %1, 0x69000000000000 207 } 208 209 ; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot 210 ; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN) 211 define double @fmul_distribute3(double %f1) { 212 %t1 = fdiv double %f1, 3.0e+0 213 %t2 = fadd double %t1, 5.0e-1 214 %t3 = fmul fast double %t2, 0x10000000000000 215 ret double %t3 216 217 ; CHECK-LABEL: @fmul_distribute3( 218 ; CHECK: fmul fast double %t2, 0x10000000000000 219 } 220 221 ; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 222 define float @fmul_distribute4(float %f1) { 223 %t1 = fmul float %f1, 6.0e+3 224 %t2 = fsub float 2.0e+3, %t1 225 %t3 = fmul fast float %t2, 5.0e+3 226 ret float %t3 227 ; CHECK-LABEL: @fmul_distribute4( 228 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07 229 ; CHECK: %t3 = fsub fast float 1.000000e+07, %1 230 } 231 232 ; C1/X * C2 => (C1*C2) / X 233 define float @fmul2(float %f1) { 234 %t1 = fdiv float 2.0e+3, %f1 235 %t3 = fmul fast float %t1, 6.0e+3 236 ret float %t3 237 ; CHECK-LABEL: @fmul2( 238 ; CHECK: fdiv fast float 1.200000e+07, %f1 239 } 240 241 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses 242 @fmul2_external = external global float 243 define float @fmul2_disable(float %f1) { 244 %div = fdiv fast float 1.000000e+00, %f1 245 store float %div, float* @fmul2_external 246 %mul = fmul fast float %div, 2.000000e+00 247 ret float %mul 248 ; CHECK-LABEL: @fmul2_disable 249 ; CHECK: store 250 ; CHECK: fmul fast 251 } 252 253 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp) 254 define float @fmul3(float %f1, float %f2) { 255 %t1 = fdiv float %f1, 2.0e+3 256 %t3 = fmul fast float %t1, 6.0e+3 257 ret float %t3 258 ; CHECK-LABEL: @fmul3( 259 ; CHECK: fmul fast float %f1, 3.000000e+00 260 } 261 262 define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) { 263 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 264 %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3> 265 ret <4 x float> %t3 266 ; CHECK-LABEL: @fmul3_vec( 267 ; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> 268 } 269 270 ; Make sure fmul with constant expression doesn't assert. 271 define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) { 272 %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float> 273 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 274 %t3 = fmul fast <4 x float> %t1, %constExprMul 275 ret <4 x float> %t3 276 } 277 278 ; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special 279 ; value of a denormal. The 0x3810000000000000 here take value FLT_MIN 280 ; 281 define float @fmul4(float %f1, float %f2) { 282 %t1 = fdiv float %f1, 2.0e+3 283 %t3 = fmul fast float %t1, 0x3810000000000000 284 ret float %t3 285 ; CHECK-LABEL: @fmul4( 286 ; CHECK: fmul fast float %t1, 0x3810000000000000 287 } 288 289 ; X / C1 * C2 => X / (C2/C1) if C1/C2 is either a special value of a denormal, 290 ; and C2/C1 is a normal value. 291 ; 292 define float @fmul5(float %f1, float %f2) { 293 %t1 = fdiv float %f1, 3.0e+0 294 %t3 = fmul fast float %t1, 0x3810000000000000 295 ret float %t3 296 ; CHECK-LABEL: @fmul5( 297 ; CHECK: fdiv fast float %f1, 0x47E8000000000000 298 } 299 300 ; (X*Y) * X => (X*X) * Y 301 define float @fmul6(float %f1, float %f2) { 302 %mul = fmul float %f1, %f2 303 %mul1 = fmul fast float %mul, %f1 304 ret float %mul1 305 ; CHECK-LABEL: @fmul6( 306 ; CHECK: fmul fast float %f1, %f1 307 } 308 309 ; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses 310 define float @fmul7(float %f1, float %f2) { 311 %mul = fmul float %f1, %f2 312 %mul1 = fmul fast float %mul, %f1 313 %add = fadd float %mul1, %mul 314 ret float %add 315 ; CHECK-LABEL: @fmul7( 316 ; CHECK: fmul fast float %mul, %f1 317 } 318 319 ; ========================================================================= 320 ; 321 ; Testing-cases about negation 322 ; 323 ; ========================================================================= 324 define float @fneg1(float %f1, float %f2) { 325 %sub = fsub float -0.000000e+00, %f1 326 %sub1 = fsub nsz float 0.000000e+00, %f2 327 %mul = fmul float %sub, %sub1 328 ret float %mul 329 ; CHECK-LABEL: @fneg1( 330 ; CHECK: fmul float %f1, %f2 331 } 332 333 define float @fneg2(float %x) { 334 %sub = fsub nsz float 0.0, %x 335 ret float %sub 336 ; CHECK-LABEL: @fneg2( 337 ; CHECK-NEXT: fsub nsz float -0.000000e+00, %x 338 ; CHECK-NEXT: ret float 339 } 340 341 ; ========================================================================= 342 ; 343 ; Testing-cases about div 344 ; 345 ; ========================================================================= 346 347 ; X/C1 / C2 => X * (1/(C2*C1)) 348 define float @fdiv1(float %x) { 349 %div = fdiv float %x, 0x3FF3333340000000 350 %div1 = fdiv fast float %div, 0x4002666660000000 351 ret float %div1 352 ; 0x3FF3333340000000 = 1.2f 353 ; 0x4002666660000000 = 2.3f 354 ; 0x3FD7303B60000000 = 0.36231884057971014492 355 ; CHECK-LABEL: @fdiv1( 356 ; CHECK: fmul fast float %x, 0x3FD7303B60000000 357 } 358 359 ; X*C1 / C2 => X * (C1/C2) 360 define float @fdiv2(float %x) { 361 %mul = fmul float %x, 0x3FF3333340000000 362 %div1 = fdiv fast float %mul, 0x4002666660000000 363 ret float %div1 364 365 ; 0x3FF3333340000000 = 1.2f 366 ; 0x4002666660000000 = 2.3f 367 ; 0x3FE0B21660000000 = 0.52173918485641479492 368 ; CHECK-LABEL: @fdiv2( 369 ; CHECK: fmul fast float %x, 0x3FE0B21660000000 370 } 371 372 define <2 x float> @fdiv2_vec(<2 x float> %x) { 373 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 374 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 375 ret <2 x float> %div1 376 377 ; CHECK-LABEL: @fdiv2_vec( 378 ; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00> 379 } 380 381 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 382 ; 383 define float @fdiv3(float %x) { 384 %div = fdiv float %x, 0x47EFFFFFE0000000 385 %div1 = fdiv fast float %div, 0x4002666660000000 386 ret float %div1 387 ; CHECK-LABEL: @fdiv3( 388 ; CHECK: fdiv float %x, 0x47EFFFFFE0000000 389 } 390 391 ; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 392 define float @fdiv4(float %x) { 393 %mul = fmul float %x, 0x47EFFFFFE0000000 394 %div = fdiv float %mul, 0x3FC99999A0000000 395 ret float %div 396 ; CHECK-LABEL: @fdiv4( 397 ; CHECK: fmul float %x, 0x47EFFFFFE0000000 398 } 399 400 ; (X/Y)/Z = > X/(Y*Z) 401 define float @fdiv5(float %f1, float %f2, float %f3) { 402 %t1 = fdiv float %f1, %f2 403 %t2 = fdiv fast float %t1, %f3 404 ret float %t2 405 ; CHECK-LABEL: @fdiv5( 406 ; CHECK: fmul float %f2, %f3 407 } 408 409 ; Z/(X/Y) = > (Z*Y)/X 410 define float @fdiv6(float %f1, float %f2, float %f3) { 411 %t1 = fdiv float %f1, %f2 412 %t2 = fdiv fast float %f3, %t1 413 ret float %t2 414 ; CHECK-LABEL: @fdiv6( 415 ; CHECK: fmul float %f3, %f2 416 } 417 418 ; C1/(X*C2) => (C1/C2) / X 419 define float @fdiv7(float %x) { 420 %t1 = fmul float %x, 3.0e0 421 %t2 = fdiv fast float 15.0e0, %t1 422 ret float %t2 423 ; CHECK-LABEL: @fdiv7( 424 ; CHECK: fdiv fast float 5.000000e+00, %x 425 } 426 427 ; C1/(X/C2) => (C1*C2) / X 428 define float @fdiv8(float %x) { 429 %t1 = fdiv float %x, 3.0e0 430 %t2 = fdiv fast float 15.0e0, %t1 431 ret float %t2 432 ; CHECK-LABEL: @fdiv8( 433 ; CHECK: fdiv fast float 4.500000e+01, %x 434 } 435 436 ; C1/(C2/X) => (C1/C2) * X 437 define float @fdiv9(float %x) { 438 %t1 = fdiv float 3.0e0, %x 439 %t2 = fdiv fast float 15.0e0, %t1 440 ret float %t2 441 ; CHECK-LABEL: @fdiv9( 442 ; CHECK: fmul fast float %x, 5.000000e+00 443 } 444 445 ; ========================================================================= 446 ; 447 ; Testing-cases about factorization 448 ; 449 ; ========================================================================= 450 ; x*z + y*z => (x+y) * z 451 define float @fact_mul1(float %x, float %y, float %z) { 452 %t1 = fmul fast float %x, %z 453 %t2 = fmul fast float %y, %z 454 %t3 = fadd fast float %t1, %t2 455 ret float %t3 456 ; CHECK-LABEL: @fact_mul1( 457 ; CHECK: fmul fast float %1, %z 458 } 459 460 ; z*x + y*z => (x+y) * z 461 define float @fact_mul2(float %x, float %y, float %z) { 462 %t1 = fmul fast float %z, %x 463 %t2 = fmul fast float %y, %z 464 %t3 = fsub fast float %t1, %t2 465 ret float %t3 466 ; CHECK-LABEL: @fact_mul2( 467 ; CHECK: fmul fast float %1, %z 468 } 469 470 ; z*x - z*y => (x-y) * z 471 define float @fact_mul3(float %x, float %y, float %z) { 472 %t2 = fmul fast float %z, %y 473 %t1 = fmul fast float %z, %x 474 %t3 = fsub fast float %t1, %t2 475 ret float %t3 476 ; CHECK-LABEL: @fact_mul3( 477 ; CHECK: fmul fast float %1, %z 478 } 479 480 ; x*z - z*y => (x-y) * z 481 define float @fact_mul4(float %x, float %y, float %z) { 482 %t1 = fmul fast float %x, %z 483 %t2 = fmul fast float %z, %y 484 %t3 = fsub fast float %t1, %t2 485 ret float %t3 486 ; CHECK-LABEL: @fact_mul4( 487 ; CHECK: fmul fast float %1, %z 488 } 489 490 ; x/y + x/z, no xform 491 define float @fact_div1(float %x, float %y, float %z) { 492 %t1 = fdiv fast float %x, %y 493 %t2 = fdiv fast float %x, %z 494 %t3 = fadd fast float %t1, %t2 495 ret float %t3 496 ; CHECK: fact_div1 497 ; CHECK: fadd fast float %t1, %t2 498 } 499 500 ; x/y + z/x; no xform 501 define float @fact_div2(float %x, float %y, float %z) { 502 %t1 = fdiv fast float %x, %y 503 %t2 = fdiv fast float %z, %x 504 %t3 = fadd fast float %t1, %t2 505 ret float %t3 506 ; CHECK: fact_div2 507 ; CHECK: fadd fast float %t1, %t2 508 } 509 510 ; y/x + z/x => (y+z)/x 511 define float @fact_div3(float %x, float %y, float %z) { 512 %t1 = fdiv fast float %y, %x 513 %t2 = fdiv fast float %z, %x 514 %t3 = fadd fast float %t1, %t2 515 ret float %t3 516 ; CHECK: fact_div3 517 ; CHECK: fdiv fast float %1, %x 518 } 519 520 ; y/x - z/x => (y-z)/x 521 define float @fact_div4(float %x, float %y, float %z) { 522 %t1 = fdiv fast float %y, %x 523 %t2 = fdiv fast float %z, %x 524 %t3 = fsub fast float %t1, %t2 525 ret float %t3 526 ; CHECK: fact_div4 527 ; CHECK: fdiv fast float %1, %x 528 } 529 530 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 531 define float @fact_div5(float %x) { 532 %t1 = fdiv fast float 0x3810000000000000, %x 533 %t2 = fdiv fast float 0x3800000000000000, %x 534 %t3 = fadd fast float %t1, %t2 535 ret float %t3 536 ; CHECK: fact_div5 537 ; CHECK: fdiv fast float 0x3818000000000000, %x 538 } 539 540 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 541 define float @fact_div6(float %x) { 542 %t1 = fdiv fast float 0x3810000000000000, %x 543 %t2 = fdiv fast float 0x3800000000000000, %x 544 %t3 = fsub fast float %t1, %t2 545 ret float %t3 546 ; CHECK: fact_div6 547 ; CHECK: %t3 = fsub fast float %t1, %t2 548 } 549 550 ; ========================================================================= 551 ; 552 ; Test-cases for square root 553 ; 554 ; ========================================================================= 555 556 ; A squared factor fed into a square root intrinsic should be hoisted out 557 ; as a fabs() value. 558 559 declare double @llvm.sqrt.f64(double) 560 561 define double @sqrt_intrinsic_arg_squared(double %x) { 562 %mul = fmul fast double %x, %x 563 %sqrt = call fast double @llvm.sqrt.f64(double %mul) 564 ret double %sqrt 565 566 ; CHECK-LABEL: sqrt_intrinsic_arg_squared( 567 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 568 ; CHECK-NEXT: ret double %fabs 569 } 570 571 ; Check all 6 combinations of a 3-way multiplication tree where 572 ; one factor is repeated. 573 574 define double @sqrt_intrinsic_three_args1(double %x, double %y) { 575 %mul = fmul fast double %y, %x 576 %mul2 = fmul fast double %mul, %x 577 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 578 ret double %sqrt 579 580 ; CHECK-LABEL: sqrt_intrinsic_three_args1( 581 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 582 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 583 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 584 ; CHECK-NEXT: ret double %1 585 } 586 587 define double @sqrt_intrinsic_three_args2(double %x, double %y) { 588 %mul = fmul fast double %x, %y 589 %mul2 = fmul fast double %mul, %x 590 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 591 ret double %sqrt 592 593 ; CHECK-LABEL: sqrt_intrinsic_three_args2( 594 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 595 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 596 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 597 ; CHECK-NEXT: ret double %1 598 } 599 600 define double @sqrt_intrinsic_three_args3(double %x, double %y) { 601 %mul = fmul fast double %x, %x 602 %mul2 = fmul fast double %mul, %y 603 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 604 ret double %sqrt 605 606 ; CHECK-LABEL: sqrt_intrinsic_three_args3( 607 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 608 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 609 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 610 ; CHECK-NEXT: ret double %1 611 } 612 613 define double @sqrt_intrinsic_three_args4(double %x, double %y) { 614 %mul = fmul fast double %y, %x 615 %mul2 = fmul fast double %x, %mul 616 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 617 ret double %sqrt 618 619 ; CHECK-LABEL: sqrt_intrinsic_three_args4( 620 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 621 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 622 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 623 ; CHECK-NEXT: ret double %1 624 } 625 626 define double @sqrt_intrinsic_three_args5(double %x, double %y) { 627 %mul = fmul fast double %x, %y 628 %mul2 = fmul fast double %x, %mul 629 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 630 ret double %sqrt 631 632 ; CHECK-LABEL: sqrt_intrinsic_three_args5( 633 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 634 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 635 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 636 ; CHECK-NEXT: ret double %1 637 } 638 639 define double @sqrt_intrinsic_three_args6(double %x, double %y) { 640 %mul = fmul fast double %x, %x 641 %mul2 = fmul fast double %y, %mul 642 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 643 ret double %sqrt 644 645 ; CHECK-LABEL: sqrt_intrinsic_three_args6( 646 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 647 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 648 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 649 ; CHECK-NEXT: ret double %1 650 } 651 652 ; If any operation is not 'fast', we can't simplify. 653 654 define double @sqrt_intrinsic_not_so_fast(double %x, double %y) { 655 %mul = fmul double %x, %x 656 %mul2 = fmul fast double %mul, %y 657 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 658 ret double %sqrt 659 660 ; CHECK-LABEL: sqrt_intrinsic_not_so_fast( 661 ; CHECK-NEXT: %mul = fmul double %x, %x 662 ; CHECK-NEXT: %mul2 = fmul fast double %mul, %y 663 ; CHECK-NEXT: %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 664 ; CHECK-NEXT: ret double %sqrt 665 } 666 667 define double @sqrt_intrinsic_arg_4th(double %x) { 668 %mul = fmul fast double %x, %x 669 %mul2 = fmul fast double %mul, %mul 670 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 671 ret double %sqrt 672 673 ; CHECK-LABEL: sqrt_intrinsic_arg_4th( 674 ; CHECK-NEXT: %mul = fmul fast double %x, %x 675 ; CHECK-NEXT: ret double %mul 676 } 677 678 define double @sqrt_intrinsic_arg_5th(double %x) { 679 %mul = fmul fast double %x, %x 680 %mul2 = fmul fast double %mul, %x 681 %mul3 = fmul fast double %mul2, %mul 682 %sqrt = call fast double @llvm.sqrt.f64(double %mul3) 683 ret double %sqrt 684 685 ; CHECK-LABEL: sqrt_intrinsic_arg_5th( 686 ; CHECK-NEXT: %mul = fmul fast double %x, %x 687 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) 688 ; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 689 ; CHECK-NEXT: ret double %1 690 } 691 692 ; Check that square root calls have the same behavior. 693 694 declare float @sqrtf(float) 695 declare double @sqrt(double) 696 declare fp128 @sqrtl(fp128) 697 698 define float @sqrt_call_squared_f32(float %x) { 699 %mul = fmul fast float %x, %x 700 %sqrt = call fast float @sqrtf(float %mul) 701 ret float %sqrt 702 703 ; CHECK-LABEL: sqrt_call_squared_f32( 704 ; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x) 705 ; CHECK-NEXT: ret float %fabs 706 } 707 708 define double @sqrt_call_squared_f64(double %x) { 709 %mul = fmul fast double %x, %x 710 %sqrt = call fast double @sqrt(double %mul) 711 ret double %sqrt 712 713 ; CHECK-LABEL: sqrt_call_squared_f64( 714 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 715 ; CHECK-NEXT: ret double %fabs 716 } 717 718 define fp128 @sqrt_call_squared_f128(fp128 %x) { 719 %mul = fmul fast fp128 %x, %x 720 %sqrt = call fast fp128 @sqrtl(fp128 %mul) 721 ret fp128 %sqrt 722 723 ; CHECK-LABEL: sqrt_call_squared_f128( 724 ; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x) 725 ; CHECK-NEXT: ret fp128 %fabs 726 } 727 728 ; ========================================================================= 729 ; 730 ; Test-cases for fmin / fmax 731 ; 732 ; ========================================================================= 733 734 declare double @fmax(double, double) 735 declare double @fmin(double, double) 736 declare float @fmaxf(float, float) 737 declare float @fminf(float, float) 738 declare fp128 @fmaxl(fp128, fp128) 739 declare fp128 @fminl(fp128, fp128) 740 741 ; No NaNs is the minimum requirement to replace these calls. 742 ; This should always be set when unsafe-fp-math is true, but 743 ; alternate the attributes for additional test coverage. 744 ; 'nsz' is implied by the definition of fmax or fmin itself. 745 746 ; Shrink and remove the call. 747 define float @max1(float %a, float %b) { 748 %c = fpext float %a to double 749 %d = fpext float %b to double 750 %e = call fast double @fmax(double %c, double %d) 751 %f = fptrunc double %e to float 752 ret float %f 753 754 ; CHECK-LABEL: max1( 755 ; CHECK-NEXT: fcmp fast ogt float %a, %b 756 ; CHECK-NEXT: select {{.*}} float %a, float %b 757 ; CHECK-NEXT: ret 758 } 759 760 define float @max2(float %a, float %b) { 761 %c = call nnan float @fmaxf(float %a, float %b) 762 ret float %c 763 764 ; CHECK-LABEL: max2( 765 ; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b 766 ; CHECK-NEXT: select {{.*}} float %a, float %b 767 ; CHECK-NEXT: ret 768 } 769 770 771 define double @max3(double %a, double %b) { 772 %c = call fast double @fmax(double %a, double %b) 773 ret double %c 774 775 ; CHECK-LABEL: max3( 776 ; CHECK-NEXT: fcmp fast ogt double %a, %b 777 ; CHECK-NEXT: select {{.*}} double %a, double %b 778 ; CHECK-NEXT: ret 779 } 780 781 define fp128 @max4(fp128 %a, fp128 %b) { 782 %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b) 783 ret fp128 %c 784 785 ; CHECK-LABEL: max4( 786 ; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b 787 ; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 788 ; CHECK-NEXT: ret 789 } 790 791 ; Shrink and remove the call. 792 define float @min1(float %a, float %b) { 793 %c = fpext float %a to double 794 %d = fpext float %b to double 795 %e = call nnan double @fmin(double %c, double %d) 796 %f = fptrunc double %e to float 797 ret float %f 798 799 ; CHECK-LABEL: min1( 800 ; CHECK-NEXT: fcmp nnan nsz olt float %a, %b 801 ; CHECK-NEXT: select {{.*}} float %a, float %b 802 ; CHECK-NEXT: ret 803 } 804 805 define float @min2(float %a, float %b) { 806 %c = call fast float @fminf(float %a, float %b) 807 ret float %c 808 809 ; CHECK-LABEL: min2( 810 ; CHECK-NEXT: fcmp fast olt float %a, %b 811 ; CHECK-NEXT: select {{.*}} float %a, float %b 812 ; CHECK-NEXT: ret 813 } 814 815 define double @min3(double %a, double %b) { 816 %c = call nnan double @fmin(double %a, double %b) 817 ret double %c 818 819 ; CHECK-LABEL: min3( 820 ; CHECK-NEXT: fcmp nnan nsz olt double %a, %b 821 ; CHECK-NEXT: select {{.*}} double %a, double %b 822 ; CHECK-NEXT: ret 823 } 824 825 define fp128 @min4(fp128 %a, fp128 %b) { 826 %c = call fast fp128 @fminl(fp128 %a, fp128 %b) 827 ret fp128 %c 828 829 ; CHECK-LABEL: min4( 830 ; CHECK-NEXT: fcmp fast olt fp128 %a, %b 831 ; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 832 ; CHECK-NEXT: ret 833 } 834