1 ; RUN: opt < %s -instcombine -S | FileCheck %s 2 3 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 4 ; 1.2f and 2.3f is supposed to be fold. 5 define float @fold(float %a) { 6 %mul = fmul fast float %a, 0x3FF3333340000000 7 %mul1 = fmul fast float %mul, 0x4002666660000000 8 ret float %mul1 9 ; CHECK-LABEL: @fold( 10 ; CHECK: fmul fast float %a, 0x4006147AE0000000 11 } 12 13 ; Same testing-case as the one used in fold() except that the operators have 14 ; fixed FP mode. 15 define float @notfold(float %a) { 16 ; CHECK-LABEL: @notfold( 17 ; CHECK: %mul = fmul fast float %a, 0x3FF3333340000000 18 %mul = fmul fast float %a, 0x3FF3333340000000 19 %mul1 = fmul float %mul, 0x4002666660000000 20 ret float %mul1 21 } 22 23 define float @fold2(float %a) { 24 ; CHECK-LABEL: @fold2( 25 ; CHECK: fmul fast float %a, 0x4006147AE0000000 26 %mul = fmul float %a, 0x3FF3333340000000 27 %mul1 = fmul fast float %mul, 0x4002666660000000 28 ret float %mul1 29 } 30 31 ; C * f1 + f1 = (C+1) * f1 32 define double @fold3(double %f1) { 33 %t1 = fmul fast double 2.000000e+00, %f1 34 %t2 = fadd fast double %f1, %t1 35 ret double %t2 36 ; CHECK-LABEL: @fold3( 37 ; CHECK: fmul fast double %f1, 3.000000e+00 38 } 39 40 ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 41 define float @fold4(float %f1, float %f2) { 42 %sub = fsub float 4.000000e+00, %f1 43 %sub1 = fsub float 5.000000e+00, %f2 44 %add = fadd fast float %sub, %sub1 45 ret float %add 46 ; CHECK-LABEL: @fold4( 47 ; CHECK: %1 = fadd fast float %f1, %f2 48 ; CHECK: fsub fast float 9.000000e+00, %1 49 } 50 51 ; (X + C1) + C2 => X + (C1 + C2) 52 define float @fold5(float %f1, float %f2) { 53 %add = fadd float %f1, 4.000000e+00 54 %add1 = fadd fast float %add, 5.000000e+00 55 ret float %add1 56 ; CHECK-LABEL: @fold5( 57 ; CHECK: fadd fast float %f1, 9.000000e+00 58 } 59 60 ; (X + X) + X => 3.0 * X 61 define float @fold6(float %f1) { 62 %t1 = fadd fast float %f1, %f1 63 %t2 = fadd fast float %f1, %t1 64 ret float %t2 65 ; CHECK-LABEL: @fold6( 66 ; CHECK: fmul fast float %f1, 3.000000e+00 67 } 68 69 ; C1 * X + (X + X) = (C1 + 2) * X 70 define float @fold7(float %f1) { 71 %t1 = fmul fast float %f1, 5.000000e+00 72 %t2 = fadd fast float %f1, %f1 73 %t3 = fadd fast float %t1, %t2 74 ret float %t3 75 ; CHECK-LABEL: @fold7( 76 ; CHECK: fmul fast float %f1, 7.000000e+00 77 } 78 79 ; (X + X) + (X + X) => 4.0 * X 80 define float @fold8(float %f1) { 81 %t1 = fadd fast float %f1, %f1 82 %t2 = fadd fast float %f1, %f1 83 %t3 = fadd fast float %t1, %t2 84 ret float %t3 85 ; CHECK: fold8 86 ; CHECK: fmul fast float %f1, 4.000000e+00 87 } 88 89 ; X - (X + Y) => 0 - Y 90 define float @fold9(float %f1, float %f2) { 91 %t1 = fadd float %f1, %f2 92 %t3 = fsub fast float %f1, %t1 93 ret float %t3 94 95 ; CHECK-LABEL: @fold9( 96 ; CHECK: fsub fast float -0.000000e+00, %f2 97 } 98 99 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 100 ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 101 ; top of resulting simplified expression tree may potentially reveal some 102 ; optimization opportunities in the super-expression trees. 103 ; 104 define float @fold10(float %f1, float %f2) { 105 %t1 = fadd fast float 2.000000e+00, %f1 106 %t2 = fsub fast float %f2, 3.000000e+00 107 %t3 = fadd fast float %t1, %t2 108 ret float %t3 109 ; CHECK-LABEL: @fold10( 110 ; CHECK: %t3 = fadd fast float %t2, -1.000000e+00 111 ; CHECK: ret float %t3 112 } 113 114 ; once cause Crash/miscompilation 115 define float @fail1(float %f1, float %f2) { 116 %conv3 = fadd fast float %f1, -1.000000e+00 117 %add = fadd fast float %conv3, %conv3 118 %add2 = fadd fast float %add, %conv3 119 ret float %add2 120 ; CHECK-LABEL: @fail1( 121 ; CHECK: ret 122 } 123 124 define double @fail2(double %f1, double %f2) { 125 %t1 = fsub fast double %f1, %f2 126 %t2 = fadd fast double %f1, %f2 127 %t3 = fsub fast double %t1, %t2 128 ret double %t3 129 ; CHECK-LABEL: @fail2( 130 ; CHECK: ret 131 } 132 133 ; c1 * x - x => (c1 - 1.0) * x 134 define float @fold13(float %x) { 135 %mul = fmul fast float %x, 7.000000e+00 136 %sub = fsub fast float %mul, %x 137 ret float %sub 138 ; CHECK: fold13 139 ; CHECK: fmul fast float %x, 6.000000e+00 140 ; CHECK: ret 141 } 142 143 ; -x + y => y - x 144 define float @fold14(float %x, float %y) { 145 %neg = fsub fast float -0.0, %x 146 %add = fadd fast float %neg, %y 147 ret float %add 148 ; CHECK: fold14 149 ; CHECK: fsub fast float %y, %x 150 ; CHECK: ret 151 } 152 153 ; x + -y => x - y 154 define float @fold15(float %x, float %y) { 155 %neg = fsub fast float -0.0, %y 156 %add = fadd fast float %x, %neg 157 ret float %add 158 ; CHECK: fold15 159 ; CHECK: fsub fast float %x, %y 160 ; CHECK: ret 161 } 162 163 ; (select X+Y, X-Y) => X + (select Y, -Y) 164 define float @fold16(float %x, float %y) { 165 %cmp = fcmp ogt float %x, %y 166 %plus = fadd fast float %x, %y 167 %minus = fsub fast float %x, %y 168 %r = select i1 %cmp, float %plus, float %minus 169 ret float %r 170 ; CHECK: fold16 171 ; CHECK: fsub fast float 172 ; CHECK: select 173 ; CHECK: fadd fast float 174 ; CHECK: ret 175 } 176 177 178 179 ; ========================================================================= 180 ; 181 ; Testing-cases about fmul begin 182 ; 183 ; ========================================================================= 184 185 ; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 186 define float @fmul_distribute1(float %f1) { 187 %t1 = fmul float %f1, 6.0e+3 188 %t2 = fadd float %t1, 2.0e+3 189 %t3 = fmul fast float %t2, 5.0e+3 190 ret float %t3 191 ; CHECK-LABEL: @fmul_distribute1( 192 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07 193 ; CHECK: %t3 = fadd fast float %1, 1.000000e+07 194 } 195 196 ; (X/C1 + C2) * C3 => X/(C1/C3) + C2*C3 197 define double @fmul_distribute2(double %f1, double %f2) { 198 %t1 = fdiv double %f1, 3.0e+0 199 %t2 = fadd double %t1, 5.0e+1 200 ; 0x10000000000000 = DBL_MIN 201 %t3 = fmul fast double %t2, 0x10000000000000 202 ret double %t3 203 204 ; CHECK-LABEL: @fmul_distribute2( 205 ; CHECK: %1 = fdiv fast double %f1, 0x7FE8000000000000 206 ; CHECK: fadd fast double %1, 0x69000000000000 207 } 208 209 ; 5.0e-1 * DBL_MIN yields denormal, so "(f1*3.0 + 5.0e-1) * DBL_MIN" cannot 210 ; be simplified into f1 * (3.0*DBL_MIN) + (5.0e-1*DBL_MIN) 211 define double @fmul_distribute3(double %f1) { 212 %t1 = fdiv double %f1, 3.0e+0 213 %t2 = fadd double %t1, 5.0e-1 214 %t3 = fmul fast double %t2, 0x10000000000000 215 ret double %t3 216 217 ; CHECK-LABEL: @fmul_distribute3( 218 ; CHECK: fmul fast double %t2, 0x10000000000000 219 } 220 221 ; ((X*C1) + C2) * C3 => (X * (C1*C3)) + (C2*C3) (i.e. distribution) 222 define float @fmul_distribute4(float %f1) { 223 %t1 = fmul float %f1, 6.0e+3 224 %t2 = fsub float 2.0e+3, %t1 225 %t3 = fmul fast float %t2, 5.0e+3 226 ret float %t3 227 ; CHECK-LABEL: @fmul_distribute4( 228 ; CHECK: %1 = fmul fast float %f1, 3.000000e+07 229 ; CHECK: %t3 = fsub fast float 1.000000e+07, %1 230 } 231 232 ; C1/X * C2 => (C1*C2) / X 233 define float @fmul2(float %f1) { 234 %t1 = fdiv float 2.0e+3, %f1 235 %t3 = fmul fast float %t1, 6.0e+3 236 ret float %t3 237 ; CHECK-LABEL: @fmul2( 238 ; CHECK: fdiv fast float 1.200000e+07, %f1 239 } 240 241 ; X/C1 * C2 => X * (C2/C1) is disabled if X/C1 has multiple uses 242 @fmul2_external = external global float 243 define float @fmul2_disable(float %f1) { 244 %div = fdiv fast float 1.000000e+00, %f1 245 store float %div, float* @fmul2_external 246 %mul = fmul fast float %div, 2.000000e+00 247 ret float %mul 248 ; CHECK-LABEL: @fmul2_disable 249 ; CHECK: store 250 ; CHECK: fmul fast 251 } 252 253 ; X/C1 * C2 => X * (C2/C1) (if C2/C1 is normal Fp) 254 define float @fmul3(float %f1, float %f2) { 255 %t1 = fdiv float %f1, 2.0e+3 256 %t3 = fmul fast float %t1, 6.0e+3 257 ret float %t3 258 ; CHECK-LABEL: @fmul3( 259 ; CHECK: fmul fast float %f1, 3.000000e+00 260 } 261 262 define <4 x float> @fmul3_vec(<4 x float> %f1, <4 x float> %f2) { 263 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 264 %t3 = fmul fast <4 x float> %t1, <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3> 265 ret <4 x float> %t3 266 ; CHECK-LABEL: @fmul3_vec( 267 ; CHECK: fmul fast <4 x float> %f1, <float 3.000000e+00, float 2.000000e+00, float 1.000000e+00, float 1.000000e+00> 268 } 269 270 ; Make sure fmul with constant expression doesn't assert. 271 define <4 x float> @fmul3_vec_constexpr(<4 x float> %f1, <4 x float> %f2) { 272 %constExprMul = bitcast i128 trunc (i160 bitcast (<5 x float> <float 6.0e+3, float 6.0e+3, float 2.0e+3, float 1.0e+3, float undef> to i160) to i128) to <4 x float> 273 %t1 = fdiv <4 x float> %f1, <float 2.0e+3, float 3.0e+3, float 2.0e+3, float 1.0e+3> 274 %t3 = fmul fast <4 x float> %t1, %constExprMul 275 ret <4 x float> %t3 276 } 277 278 ; Rule "X/C1 * C2 => X * (C2/C1) is not applicable if C2/C1 is either a special 279 ; value of a denormal. The 0x3810000000000000 here take value FLT_MIN 280 ; 281 define float @fmul4(float %f1, float %f2) { 282 %t1 = fdiv float %f1, 2.0e+3 283 %t3 = fmul fast float %t1, 0x3810000000000000 284 ret float %t3 285 ; CHECK-LABEL: @fmul4( 286 ; CHECK: fmul fast float %t1, 0x3810000000000000 287 } 288 289 ; X / C1 * C2 => X / (C2/C1) if C1/C2 is either a special value of a denormal, 290 ; and C2/C1 is a normal value. 291 ; 292 define float @fmul5(float %f1, float %f2) { 293 %t1 = fdiv float %f1, 3.0e+0 294 %t3 = fmul fast float %t1, 0x3810000000000000 295 ret float %t3 296 ; CHECK-LABEL: @fmul5( 297 ; CHECK: fdiv fast float %f1, 0x47E8000000000000 298 } 299 300 ; (X*Y) * X => (X*X) * Y 301 define float @fmul6(float %f1, float %f2) { 302 %mul = fmul float %f1, %f2 303 %mul1 = fmul fast float %mul, %f1 304 ret float %mul1 305 ; CHECK-LABEL: @fmul6( 306 ; CHECK: fmul fast float %f1, %f1 307 } 308 309 ; "(X*Y) * X => (X*X) * Y" is disabled if "X*Y" has multiple uses 310 define float @fmul7(float %f1, float %f2) { 311 %mul = fmul float %f1, %f2 312 %mul1 = fmul fast float %mul, %f1 313 %add = fadd float %mul1, %mul 314 ret float %add 315 ; CHECK-LABEL: @fmul7( 316 ; CHECK: fmul fast float %mul, %f1 317 } 318 319 ; ========================================================================= 320 ; 321 ; Testing-cases about negation 322 ; 323 ; ========================================================================= 324 define float @fneg1(float %f1, float %f2) { 325 %sub = fsub float -0.000000e+00, %f1 326 %sub1 = fsub nsz float 0.000000e+00, %f2 327 %mul = fmul float %sub, %sub1 328 ret float %mul 329 ; CHECK-LABEL: @fneg1( 330 ; CHECK: fmul float %f1, %f2 331 } 332 333 define float @fneg2(float %x) { 334 %sub = fsub nsz float 0.0, %x 335 ret float %sub 336 ; CHECK-LABEL: @fneg2( 337 ; CHECK-NEXT: fsub nsz float -0.000000e+00, %x 338 ; CHECK-NEXT: ret float 339 } 340 341 ; ========================================================================= 342 ; 343 ; Testing-cases about div 344 ; 345 ; ========================================================================= 346 347 ; X/C1 / C2 => X * (1/(C2*C1)) 348 define float @fdiv1(float %x) { 349 %div = fdiv float %x, 0x3FF3333340000000 350 %div1 = fdiv fast float %div, 0x4002666660000000 351 ret float %div1 352 ; 0x3FF3333340000000 = 1.2f 353 ; 0x4002666660000000 = 2.3f 354 ; 0x3FD7303B60000000 = 0.36231884057971014492 355 ; CHECK-LABEL: @fdiv1( 356 ; CHECK: fmul fast float %x, 0x3FD7303B60000000 357 } 358 359 ; X*C1 / C2 => X * (C1/C2) 360 define float @fdiv2(float %x) { 361 %mul = fmul float %x, 0x3FF3333340000000 362 %div1 = fdiv fast float %mul, 0x4002666660000000 363 ret float %div1 364 365 ; 0x3FF3333340000000 = 1.2f 366 ; 0x4002666660000000 = 2.3f 367 ; 0x3FE0B21660000000 = 0.52173918485641479492 368 ; CHECK-LABEL: @fdiv2( 369 ; CHECK: fmul fast float %x, 0x3FE0B21660000000 370 } 371 372 define <2 x float> @fdiv2_vec(<2 x float> %x) { 373 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 374 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 375 ret <2 x float> %div1 376 377 ; CHECK-LABEL: @fdiv2_vec( 378 ; CHECK: fmul fast <2 x float> %x, <float 3.000000e+00, float 3.000000e+00> 379 } 380 381 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 382 ; 383 define float @fdiv3(float %x) { 384 %div = fdiv float %x, 0x47EFFFFFE0000000 385 %div1 = fdiv fast float %div, 0x4002666660000000 386 ret float %div1 387 ; CHECK-LABEL: @fdiv3( 388 ; CHECK: fdiv float %x, 0x47EFFFFFE0000000 389 } 390 391 ; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 392 define float @fdiv4(float %x) { 393 %mul = fmul float %x, 0x47EFFFFFE0000000 394 %div = fdiv float %mul, 0x3FC99999A0000000 395 ret float %div 396 ; CHECK-LABEL: @fdiv4( 397 ; CHECK: fmul float %x, 0x47EFFFFFE0000000 398 } 399 400 ; (X/Y)/Z = > X/(Y*Z) 401 define float @fdiv5(float %f1, float %f2, float %f3) { 402 %t1 = fdiv float %f1, %f2 403 %t2 = fdiv fast float %t1, %f3 404 ret float %t2 405 ; CHECK-LABEL: @fdiv5( 406 ; CHECK: fmul float %f2, %f3 407 } 408 409 ; Z/(X/Y) = > (Z*Y)/X 410 define float @fdiv6(float %f1, float %f2, float %f3) { 411 %t1 = fdiv float %f1, %f2 412 %t2 = fdiv fast float %f3, %t1 413 ret float %t2 414 ; CHECK-LABEL: @fdiv6( 415 ; CHECK: fmul float %f3, %f2 416 } 417 418 ; C1/(X*C2) => (C1/C2) / X 419 define float @fdiv7(float %x) { 420 %t1 = fmul float %x, 3.0e0 421 %t2 = fdiv fast float 15.0e0, %t1 422 ret float %t2 423 ; CHECK-LABEL: @fdiv7( 424 ; CHECK: fdiv fast float 5.000000e+00, %x 425 } 426 427 ; C1/(X/C2) => (C1*C2) / X 428 define float @fdiv8(float %x) { 429 %t1 = fdiv float %x, 3.0e0 430 %t2 = fdiv fast float 15.0e0, %t1 431 ret float %t2 432 ; CHECK-LABEL: @fdiv8( 433 ; CHECK: fdiv fast float 4.500000e+01, %x 434 } 435 436 ; C1/(C2/X) => (C1/C2) * X 437 define float @fdiv9(float %x) { 438 %t1 = fdiv float 3.0e0, %x 439 %t2 = fdiv fast float 15.0e0, %t1 440 ret float %t2 441 ; CHECK-LABEL: @fdiv9( 442 ; CHECK: fmul fast float %x, 5.000000e+00 443 } 444 445 ; ========================================================================= 446 ; 447 ; Testing-cases about factorization 448 ; 449 ; ========================================================================= 450 ; x*z + y*z => (x+y) * z 451 define float @fact_mul1(float %x, float %y, float %z) { 452 %t1 = fmul fast float %x, %z 453 %t2 = fmul fast float %y, %z 454 %t3 = fadd fast float %t1, %t2 455 ret float %t3 456 ; CHECK-LABEL: @fact_mul1( 457 ; CHECK: fmul fast float %1, %z 458 } 459 460 ; z*x + y*z => (x+y) * z 461 define float @fact_mul2(float %x, float %y, float %z) { 462 %t1 = fmul fast float %z, %x 463 %t2 = fmul fast float %y, %z 464 %t3 = fsub fast float %t1, %t2 465 ret float %t3 466 ; CHECK-LABEL: @fact_mul2( 467 ; CHECK: fmul fast float %1, %z 468 } 469 470 ; z*x - z*y => (x-y) * z 471 define float @fact_mul3(float %x, float %y, float %z) { 472 %t2 = fmul fast float %z, %y 473 %t1 = fmul fast float %z, %x 474 %t3 = fsub fast float %t1, %t2 475 ret float %t3 476 ; CHECK-LABEL: @fact_mul3( 477 ; CHECK: fmul fast float %1, %z 478 } 479 480 ; x*z - z*y => (x-y) * z 481 define float @fact_mul4(float %x, float %y, float %z) { 482 %t1 = fmul fast float %x, %z 483 %t2 = fmul fast float %z, %y 484 %t3 = fsub fast float %t1, %t2 485 ret float %t3 486 ; CHECK-LABEL: @fact_mul4( 487 ; CHECK: fmul fast float %1, %z 488 } 489 490 ; x/y + x/z, no xform 491 define float @fact_div1(float %x, float %y, float %z) { 492 %t1 = fdiv fast float %x, %y 493 %t2 = fdiv fast float %x, %z 494 %t3 = fadd fast float %t1, %t2 495 ret float %t3 496 ; CHECK: fact_div1 497 ; CHECK: fadd fast float %t1, %t2 498 } 499 500 ; x/y + z/x; no xform 501 define float @fact_div2(float %x, float %y, float %z) { 502 %t1 = fdiv fast float %x, %y 503 %t2 = fdiv fast float %z, %x 504 %t3 = fadd fast float %t1, %t2 505 ret float %t3 506 ; CHECK: fact_div2 507 ; CHECK: fadd fast float %t1, %t2 508 } 509 510 ; y/x + z/x => (y+z)/x 511 define float @fact_div3(float %x, float %y, float %z) { 512 %t1 = fdiv fast float %y, %x 513 %t2 = fdiv fast float %z, %x 514 %t3 = fadd fast float %t1, %t2 515 ret float %t3 516 ; CHECK: fact_div3 517 ; CHECK: fdiv fast float %1, %x 518 } 519 520 ; y/x - z/x => (y-z)/x 521 define float @fact_div4(float %x, float %y, float %z) { 522 %t1 = fdiv fast float %y, %x 523 %t2 = fdiv fast float %z, %x 524 %t3 = fsub fast float %t1, %t2 525 ret float %t3 526 ; CHECK: fact_div4 527 ; CHECK: fdiv fast float %1, %x 528 } 529 530 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 531 define float @fact_div5(float %x) { 532 %t1 = fdiv fast float 0x3810000000000000, %x 533 %t2 = fdiv fast float 0x3800000000000000, %x 534 %t3 = fadd fast float %t1, %t2 535 ret float %t3 536 ; CHECK: fact_div5 537 ; CHECK: fdiv fast float 0x3818000000000000, %x 538 } 539 540 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 541 define float @fact_div6(float %x) { 542 %t1 = fdiv fast float 0x3810000000000000, %x 543 %t2 = fdiv fast float 0x3800000000000000, %x 544 %t3 = fsub fast float %t1, %t2 545 ret float %t3 546 ; CHECK: fact_div6 547 ; CHECK: %t3 = fsub fast float %t1, %t2 548 } 549 550 ; ========================================================================= 551 ; 552 ; Test-cases for square root 553 ; 554 ; ========================================================================= 555 556 ; A squared factor fed into a square root intrinsic should be hoisted out 557 ; as a fabs() value. 558 ; We have to rely on a function-level attribute to enable this optimization 559 ; because intrinsics don't currently have access to IR-level fast-math 560 ; flags. If that changes, we can relax the requirement on all of these 561 ; tests to just specify 'fast' on the sqrt. 562 563 attributes #0 = { "unsafe-fp-math" = "true" } 564 565 declare double @llvm.sqrt.f64(double) 566 567 define double @sqrt_intrinsic_arg_squared(double %x) #0 { 568 %mul = fmul fast double %x, %x 569 %sqrt = call double @llvm.sqrt.f64(double %mul) 570 ret double %sqrt 571 572 ; CHECK-LABEL: sqrt_intrinsic_arg_squared( 573 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 574 ; CHECK-NEXT: ret double %fabs 575 } 576 577 ; Check all 6 combinations of a 3-way multiplication tree where 578 ; one factor is repeated. 579 580 define double @sqrt_intrinsic_three_args1(double %x, double %y) #0 { 581 %mul = fmul fast double %y, %x 582 %mul2 = fmul fast double %mul, %x 583 %sqrt = call double @llvm.sqrt.f64(double %mul2) 584 ret double %sqrt 585 586 ; CHECK-LABEL: sqrt_intrinsic_three_args1( 587 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 588 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 589 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 590 ; CHECK-NEXT: ret double %1 591 } 592 593 define double @sqrt_intrinsic_three_args2(double %x, double %y) #0 { 594 %mul = fmul fast double %x, %y 595 %mul2 = fmul fast double %mul, %x 596 %sqrt = call double @llvm.sqrt.f64(double %mul2) 597 ret double %sqrt 598 599 ; CHECK-LABEL: sqrt_intrinsic_three_args2( 600 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 601 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 602 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 603 ; CHECK-NEXT: ret double %1 604 } 605 606 define double @sqrt_intrinsic_three_args3(double %x, double %y) #0 { 607 %mul = fmul fast double %x, %x 608 %mul2 = fmul fast double %mul, %y 609 %sqrt = call double @llvm.sqrt.f64(double %mul2) 610 ret double %sqrt 611 612 ; CHECK-LABEL: sqrt_intrinsic_three_args3( 613 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 614 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 615 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 616 ; CHECK-NEXT: ret double %1 617 } 618 619 define double @sqrt_intrinsic_three_args4(double %x, double %y) #0 { 620 %mul = fmul fast double %y, %x 621 %mul2 = fmul fast double %x, %mul 622 %sqrt = call double @llvm.sqrt.f64(double %mul2) 623 ret double %sqrt 624 625 ; CHECK-LABEL: sqrt_intrinsic_three_args4( 626 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 627 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 628 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 629 ; CHECK-NEXT: ret double %1 630 } 631 632 define double @sqrt_intrinsic_three_args5(double %x, double %y) #0 { 633 %mul = fmul fast double %x, %y 634 %mul2 = fmul fast double %x, %mul 635 %sqrt = call double @llvm.sqrt.f64(double %mul2) 636 ret double %sqrt 637 638 ; CHECK-LABEL: sqrt_intrinsic_three_args5( 639 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 640 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 641 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 642 ; CHECK-NEXT: ret double %1 643 } 644 645 define double @sqrt_intrinsic_three_args6(double %x, double %y) #0 { 646 %mul = fmul fast double %x, %x 647 %mul2 = fmul fast double %y, %mul 648 %sqrt = call double @llvm.sqrt.f64(double %mul2) 649 ret double %sqrt 650 651 ; CHECK-LABEL: sqrt_intrinsic_three_args6( 652 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 653 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %y) 654 ; CHECK-NEXT: %1 = fmul fast double %fabs, %sqrt1 655 ; CHECK-NEXT: ret double %1 656 } 657 658 define double @sqrt_intrinsic_arg_4th(double %x) #0 { 659 %mul = fmul fast double %x, %x 660 %mul2 = fmul fast double %mul, %mul 661 %sqrt = call double @llvm.sqrt.f64(double %mul2) 662 ret double %sqrt 663 664 ; CHECK-LABEL: sqrt_intrinsic_arg_4th( 665 ; CHECK-NEXT: %mul = fmul fast double %x, %x 666 ; CHECK-NEXT: ret double %mul 667 } 668 669 define double @sqrt_intrinsic_arg_5th(double %x) #0 { 670 %mul = fmul fast double %x, %x 671 %mul2 = fmul fast double %mul, %x 672 %mul3 = fmul fast double %mul2, %mul 673 %sqrt = call double @llvm.sqrt.f64(double %mul3) 674 ret double %sqrt 675 676 ; CHECK-LABEL: sqrt_intrinsic_arg_5th( 677 ; CHECK-NEXT: %mul = fmul fast double %x, %x 678 ; CHECK-NEXT: %sqrt1 = call fast double @llvm.sqrt.f64(double %x) 679 ; CHECK-NEXT: %1 = fmul fast double %mul, %sqrt1 680 ; CHECK-NEXT: ret double %1 681 } 682 683 ; Check that square root calls have the same behavior. 684 685 declare float @sqrtf(float) 686 declare double @sqrt(double) 687 declare fp128 @sqrtl(fp128) 688 689 define float @sqrt_call_squared_f32(float %x) #0 { 690 %mul = fmul fast float %x, %x 691 %sqrt = call float @sqrtf(float %mul) 692 ret float %sqrt 693 694 ; CHECK-LABEL: sqrt_call_squared_f32( 695 ; CHECK-NEXT: %fabs = call fast float @llvm.fabs.f32(float %x) 696 ; CHECK-NEXT: ret float %fabs 697 } 698 699 define double @sqrt_call_squared_f64(double %x) #0 { 700 %mul = fmul fast double %x, %x 701 %sqrt = call double @sqrt(double %mul) 702 ret double %sqrt 703 704 ; CHECK-LABEL: sqrt_call_squared_f64( 705 ; CHECK-NEXT: %fabs = call fast double @llvm.fabs.f64(double %x) 706 ; CHECK-NEXT: ret double %fabs 707 } 708 709 define fp128 @sqrt_call_squared_f128(fp128 %x) #0 { 710 %mul = fmul fast fp128 %x, %x 711 %sqrt = call fp128 @sqrtl(fp128 %mul) 712 ret fp128 %sqrt 713 714 ; CHECK-LABEL: sqrt_call_squared_f128( 715 ; CHECK-NEXT: %fabs = call fast fp128 @llvm.fabs.f128(fp128 %x) 716 ; CHECK-NEXT: ret fp128 %fabs 717 } 718 719 ; ========================================================================= 720 ; 721 ; Test-cases for fmin / fmax 722 ; 723 ; ========================================================================= 724 725 declare double @fmax(double, double) 726 declare double @fmin(double, double) 727 declare float @fmaxf(float, float) 728 declare float @fminf(float, float) 729 declare fp128 @fmaxl(fp128, fp128) 730 declare fp128 @fminl(fp128, fp128) 731 732 ; No NaNs is the minimum requirement to replace these calls. 733 ; This should always be set when unsafe-fp-math is true, but 734 ; alternate the attributes for additional test coverage. 735 ; 'nsz' is implied by the definition of fmax or fmin itself. 736 attributes #1 = { "no-nans-fp-math" = "true" } 737 738 ; Shrink and remove the call. 739 define float @max1(float %a, float %b) #0 { 740 %c = fpext float %a to double 741 %d = fpext float %b to double 742 %e = call double @fmax(double %c, double %d) 743 %f = fptrunc double %e to float 744 ret float %f 745 746 ; CHECK-LABEL: max1( 747 ; CHECK-NEXT: fcmp fast ogt float %a, %b 748 ; CHECK-NEXT: select {{.*}} float %a, float %b 749 ; CHECK-NEXT: ret 750 } 751 752 define float @max2(float %a, float %b) #1 { 753 %c = call float @fmaxf(float %a, float %b) 754 ret float %c 755 756 ; CHECK-LABEL: max2( 757 ; CHECK-NEXT: fcmp nnan nsz ogt float %a, %b 758 ; CHECK-NEXT: select {{.*}} float %a, float %b 759 ; CHECK-NEXT: ret 760 } 761 762 763 define double @max3(double %a, double %b) #0 { 764 %c = call double @fmax(double %a, double %b) 765 ret double %c 766 767 ; CHECK-LABEL: max3( 768 ; CHECK-NEXT: fcmp fast ogt double %a, %b 769 ; CHECK-NEXT: select {{.*}} double %a, double %b 770 ; CHECK-NEXT: ret 771 } 772 773 define fp128 @max4(fp128 %a, fp128 %b) #1 { 774 %c = call fp128 @fmaxl(fp128 %a, fp128 %b) 775 ret fp128 %c 776 777 ; CHECK-LABEL: max4( 778 ; CHECK-NEXT: fcmp nnan nsz ogt fp128 %a, %b 779 ; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 780 ; CHECK-NEXT: ret 781 } 782 783 ; Shrink and remove the call. 784 define float @min1(float %a, float %b) #1 { 785 %c = fpext float %a to double 786 %d = fpext float %b to double 787 %e = call double @fmin(double %c, double %d) 788 %f = fptrunc double %e to float 789 ret float %f 790 791 ; CHECK-LABEL: min1( 792 ; CHECK-NEXT: fcmp nnan nsz olt float %a, %b 793 ; CHECK-NEXT: select {{.*}} float %a, float %b 794 ; CHECK-NEXT: ret 795 } 796 797 define float @min2(float %a, float %b) #0 { 798 %c = call float @fminf(float %a, float %b) 799 ret float %c 800 801 ; CHECK-LABEL: min2( 802 ; CHECK-NEXT: fcmp fast olt float %a, %b 803 ; CHECK-NEXT: select {{.*}} float %a, float %b 804 ; CHECK-NEXT: ret 805 } 806 807 define double @min3(double %a, double %b) #1 { 808 %c = call double @fmin(double %a, double %b) 809 ret double %c 810 811 ; CHECK-LABEL: min3( 812 ; CHECK-NEXT: fcmp nnan nsz olt double %a, %b 813 ; CHECK-NEXT: select {{.*}} double %a, double %b 814 ; CHECK-NEXT: ret 815 } 816 817 define fp128 @min4(fp128 %a, fp128 %b) #0 { 818 %c = call fp128 @fminl(fp128 %a, fp128 %b) 819 ret fp128 %c 820 821 ; CHECK-LABEL: min4( 822 ; CHECK-NEXT: fcmp fast olt fp128 %a, %b 823 ; CHECK-NEXT: select {{.*}} fp128 %a, fp128 %b 824 ; CHECK-NEXT: ret 825 } 826