1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py 2 ; RUN: opt < %s -instcombine -S | FileCheck %s 3 4 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }" 5 ; 1.2f and 2.3f is supposed to be fold. 6 define float @fold(float %a) { 7 ; CHECK-LABEL: @fold( 8 ; CHECK-NEXT: [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000 9 ; CHECK-NEXT: ret float [[MUL1]] 10 ; 11 %mul = fmul fast float %a, 0x3FF3333340000000 12 %mul1 = fmul fast float %mul, 0x4002666660000000 13 ret float %mul1 14 } 15 16 ; Same testing-case as the one used in fold() except that the operators have 17 ; fixed FP mode. 18 define float @notfold(float %a) { 19 ; CHECK-LABEL: @notfold( 20 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000 21 ; CHECK-NEXT: [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000 22 ; CHECK-NEXT: ret float [[MUL1]] 23 ; 24 %mul = fmul fast float %a, 0x3FF3333340000000 25 %mul1 = fmul float %mul, 0x4002666660000000 26 ret float %mul1 27 } 28 29 define float @fold2(float %a) { 30 ; CHECK-LABEL: @fold2( 31 ; CHECK-NEXT: [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000 32 ; CHECK-NEXT: ret float [[MUL1]] 33 ; 34 %mul = fmul float %a, 0x3FF3333340000000 35 %mul1 = fmul fast float %mul, 0x4002666660000000 36 ret float %mul1 37 } 38 39 ; C * f1 + f1 = (C+1) * f1 40 ; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is 41 ; always safe, and so doesn't need any FMF. 42 ; That is, (x + x + x) and (3*x) each have only a single rounding. 43 define double @fold3(double %f1) { 44 ; CHECK-LABEL: @fold3( 45 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00 46 ; CHECK-NEXT: ret double [[TMP1]] 47 ; 48 %t1 = fmul fast double 5.000000e+00, %f1 49 %t2 = fadd fast double %f1, %t1 50 ret double %t2 51 } 52 53 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 54 define double @fold3_reassoc_nsz(double %f1) { 55 ; CHECK-LABEL: @fold3_reassoc_nsz( 56 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00 57 ; CHECK-NEXT: ret double [[TMP1]] 58 ; 59 %t1 = fmul reassoc nsz double 5.000000e+00, %f1 60 %t2 = fadd reassoc nsz double %f1, %t1 61 ret double %t2 62 } 63 64 ; TODO: This doesn't require 'nsz'. It should fold to f1 * 6.0. 65 define double @fold3_reassoc(double %f1) { 66 ; CHECK-LABEL: @fold3_reassoc( 67 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00 68 ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[F1]] 69 ; CHECK-NEXT: ret double [[TMP2]] 70 ; 71 %t1 = fmul reassoc double 5.000000e+00, %f1 72 %t2 = fadd reassoc double %f1, %t1 73 ret double %t2 74 } 75 76 ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y) 77 define float @fold4(float %f1, float %f2) { 78 ; CHECK-LABEL: @fold4( 79 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]] 80 ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]] 81 ; CHECK-NEXT: ret float [[TMP2]] 82 ; 83 %sub = fsub float 4.000000e+00, %f1 84 %sub1 = fsub float 5.000000e+00, %f2 85 %add = fadd fast float %sub, %sub1 86 ret float %add 87 } 88 89 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 90 define float @fold4_reassoc_nsz(float %f1, float %f2) { 91 ; CHECK-LABEL: @fold4_reassoc_nsz( 92 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] 93 ; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]] 94 ; CHECK-NEXT: ret float [[TMP2]] 95 ; 96 %sub = fsub float 4.000000e+00, %f1 97 %sub1 = fsub float 5.000000e+00, %f2 98 %add = fadd reassoc nsz float %sub, %sub1 99 ret float %add 100 } 101 102 ; TODO: This doesn't require 'nsz'. It should fold to (9.0 - (f1 + f2)). 103 define float @fold4_reassoc(float %f1, float %f2) { 104 ; CHECK-LABEL: @fold4_reassoc( 105 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float 4.000000e+00, [[F1:%.*]] 106 ; CHECK-NEXT: [[TMP2:%.*]] = fsub float 5.000000e+00, [[F2:%.*]] 107 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 108 ; CHECK-NEXT: ret float [[TMP3]] 109 ; 110 %sub = fsub float 4.000000e+00, %f1 111 %sub1 = fsub float 5.000000e+00, %f2 112 %add = fadd reassoc float %sub, %sub1 113 ret float %add 114 } 115 116 ; (X + C1) + C2 => X + (C1 + C2) 117 define float @fold5(float %f1) { 118 ; CHECK-LABEL: @fold5( 119 ; CHECK-NEXT: [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00 120 ; CHECK-NEXT: ret float [[ADD1]] 121 ; 122 %add = fadd float %f1, 4.000000e+00 123 %add1 = fadd fast float %add, 5.000000e+00 124 ret float %add1 125 } 126 127 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 128 define float @fold5_reassoc_nsz(float %f1) { 129 ; CHECK-LABEL: @fold5_reassoc_nsz( 130 ; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00 131 ; CHECK-NEXT: ret float [[ADD1]] 132 ; 133 %add = fadd float %f1, 4.000000e+00 134 %add1 = fadd reassoc nsz float %add, 5.000000e+00 135 ret float %add1 136 } 137 138 ; TODO: This doesn't require 'nsz'. It should fold to f1 + 9.0 139 define float @fold5_reassoc(float %f1) { 140 ; CHECK-LABEL: @fold5_reassoc( 141 ; CHECK-NEXT: [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00 142 ; CHECK-NEXT: [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00 143 ; CHECK-NEXT: ret float [[ADD1]] 144 ; 145 %add = fadd float %f1, 4.000000e+00 146 %add1 = fadd reassoc float %add, 5.000000e+00 147 ret float %add1 148 } 149 150 ; (X + X) + X + X => 4.0 * X 151 define float @fold6(float %f1) { 152 ; CHECK-LABEL: @fold6( 153 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00 154 ; CHECK-NEXT: ret float [[TMP1]] 155 ; 156 %t1 = fadd fast float %f1, %f1 157 %t2 = fadd fast float %f1, %t1 158 %t3 = fadd fast float %t2, %f1 159 ret float %t3 160 } 161 162 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 163 define float @fold6_reassoc_nsz(float %f1) { 164 ; CHECK-LABEL: @fold6_reassoc_nsz( 165 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00 166 ; CHECK-NEXT: ret float [[TMP1]] 167 ; 168 %t1 = fadd reassoc nsz float %f1, %f1 169 %t2 = fadd reassoc nsz float %f1, %t1 170 %t3 = fadd reassoc nsz float %t2, %f1 171 ret float %t3 172 } 173 174 ; TODO: This doesn't require 'nsz'. It should fold to f1 * 4.0. 175 define float @fold6_reassoc(float %f1) { 176 ; CHECK-LABEL: @fold6_reassoc( 177 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] 178 ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[TMP1]], [[F1]] 179 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP2]], [[F1]] 180 ; CHECK-NEXT: ret float [[TMP3]] 181 ; 182 %t1 = fadd reassoc float %f1, %f1 183 %t2 = fadd reassoc float %f1, %t1 184 %t3 = fadd reassoc float %t2, %f1 185 ret float %t3 186 } 187 188 ; C1 * X + (X + X) = (C1 + 2) * X 189 define float @fold7(float %f1) { 190 ; CHECK-LABEL: @fold7( 191 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00 192 ; CHECK-NEXT: ret float [[TMP1]] 193 ; 194 %t1 = fmul fast float %f1, 5.000000e+00 195 %t2 = fadd fast float %f1, %f1 196 %t3 = fadd fast float %t1, %t2 197 ret float %t3 198 } 199 200 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 201 define float @fold7_reassoc_nsz(float %f1) { 202 ; CHECK-LABEL: @fold7_reassoc_nsz( 203 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00 204 ; CHECK-NEXT: ret float [[TMP1]] 205 ; 206 %t1 = fmul reassoc nsz float %f1, 5.000000e+00 207 %t2 = fadd reassoc nsz float %f1, %f1 208 %t3 = fadd reassoc nsz float %t1, %t2 209 ret float %t3 210 } 211 212 ; TODO: This doesn't require 'nsz'. It should fold to f1 * 7.0. 213 define float @fold7_reassoc(float %f1) { 214 ; CHECK-LABEL: @fold7_reassoc( 215 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00 216 ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] 217 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 218 ; CHECK-NEXT: ret float [[TMP3]] 219 ; 220 %t1 = fmul reassoc float %f1, 5.000000e+00 221 %t2 = fadd reassoc float %f1, %f1 222 %t3 = fadd reassoc float %t1, %t2 223 ret float %t3 224 } 225 226 ; (X + X) + (X + X) + X => 5.0 * X 227 define float @fold8(float %f1) { 228 ; CHECK-LABEL: @fold8( 229 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00 230 ; CHECK-NEXT: ret float [[TMP1]] 231 ; 232 %t1 = fadd fast float %f1, %f1 233 %t2 = fadd fast float %f1, %f1 234 %t3 = fadd fast float %t1, %t2 235 %t4 = fadd fast float %t3, %f1 236 ret float %t4 237 } 238 239 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 240 define float @fold8_reassoc_nsz(float %f1) { 241 ; CHECK-LABEL: @fold8_reassoc_nsz( 242 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00 243 ; CHECK-NEXT: ret float [[TMP1]] 244 ; 245 %t1 = fadd reassoc nsz float %f1, %f1 246 %t2 = fadd reassoc nsz float %f1, %f1 247 %t3 = fadd reassoc nsz float %t1, %t2 248 %t4 = fadd reassoc nsz float %t3, %f1 249 ret float %t4 250 } 251 252 ; TODO: This doesn't require 'nsz'. It should fold to f1 * 5.0. 253 define float @fold8_reassoc(float %f1) { 254 ; CHECK-LABEL: @fold8_reassoc( 255 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]] 256 ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]] 257 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 258 ; CHECK-NEXT: [[TMP4:%.*]] = fadd reassoc float [[TMP3]], [[F1]] 259 ; CHECK-NEXT: ret float [[TMP4]] 260 ; 261 %t1 = fadd reassoc float %f1, %f1 262 %t2 = fadd reassoc float %f1, %f1 263 %t3 = fadd reassoc float %t1, %t2 264 %t4 = fadd reassoc float %t3, %f1 265 ret float %t4 266 } 267 268 ; X - (X + Y) => 0 - Y 269 define float @fold9(float %f1, float %f2) { 270 ; CHECK-LABEL: @fold9( 271 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[F2:%.*]] 272 ; CHECK-NEXT: ret float [[TMP1]] 273 ; 274 %t1 = fadd float %f1, %f2 275 %t3 = fsub fast float %f1, %t1 276 ret float %t3 277 } 278 279 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required). 280 define float @fold9_reassoc_nsz(float %f1, float %f2) { 281 ; CHECK-LABEL: @fold9_reassoc_nsz( 282 ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[F2:%.*]] 283 ; CHECK-NEXT: ret float [[TMP1]] 284 ; 285 %t1 = fadd float %f1, %f2 286 %t3 = fsub reassoc nsz float %f1, %t1 287 ret float %t3 288 } 289 290 ; TODO: This doesn't require 'nsz'. It should fold to 0 - f2 291 define float @fold9_reassoc(float %f1, float %f2) { 292 ; CHECK-LABEL: @fold9_reassoc( 293 ; CHECK-NEXT: [[TMP1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]] 294 ; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[F1]], [[TMP1]] 295 ; CHECK-NEXT: ret float [[TMP2]] 296 ; 297 %t1 = fadd float %f1, %f2 298 %t3 = fsub reassoc float %f1, %t1 299 ret float %t3 300 } 301 302 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of 303 ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the 304 ; top of resulting simplified expression tree may potentially reveal some 305 ; optimization opportunities in the super-expression trees. 306 ; 307 define float @fold10(float %f1, float %f2) { 308 ; CHECK-LABEL: @fold10( 309 ; CHECK-NEXT: [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]] 310 ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00 311 ; CHECK-NEXT: ret float [[T3]] 312 ; 313 %t1 = fadd fast float 2.000000e+00, %f1 314 %t2 = fsub fast float %f2, 3.000000e+00 315 %t3 = fadd fast float %t1, %t2 316 ret float %t3 317 } 318 319 ; Check again with 'reassoc' and 'nsz'. 320 ; TODO: We may be able to remove the 'nsz' requirement. 321 define float @fold10_reassoc_nsz(float %f1, float %f2) { 322 ; CHECK-LABEL: @fold10_reassoc_nsz( 323 ; CHECK-NEXT: [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]] 324 ; CHECK-NEXT: [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00 325 ; CHECK-NEXT: ret float [[T3]] 326 ; 327 %t1 = fadd reassoc nsz float 2.000000e+00, %f1 328 %t2 = fsub reassoc nsz float %f2, 3.000000e+00 329 %t3 = fadd reassoc nsz float %t1, %t2 330 ret float %t3 331 } 332 333 ; Observe that the fold is not done with only reassoc (the instructions are 334 ; canonicalized, but not folded). 335 ; TODO: As noted above, 'nsz' may not be required for this to be fully folded. 336 define float @fold10_reassoc(float %f1, float %f2) { 337 ; CHECK-LABEL: @fold10_reassoc( 338 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00 339 ; CHECK-NEXT: [[TMP2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00 340 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 341 ; CHECK-NEXT: ret float [[TMP3]] 342 ; 343 %t1 = fadd reassoc float 2.000000e+00, %f1 344 %t2 = fsub reassoc float %f2, 3.000000e+00 345 %t3 = fadd reassoc float %t1, %t2 346 ret float %t3 347 } 348 349 ; This used to crash/miscompile. 350 351 define float @fail1(float %f1, float %f2) { 352 ; CHECK-LABEL: @fail1( 353 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00 354 ; CHECK-NEXT: [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00 355 ; CHECK-NEXT: ret float [[TMP2]] 356 ; 357 %conv3 = fadd fast float %f1, -1.000000e+00 358 %add = fadd fast float %conv3, %conv3 359 %add2 = fadd fast float %add, %conv3 360 ret float %add2 361 } 362 363 define double @fail2(double %f1, double %f2) { 364 ; CHECK-LABEL: @fail2( 365 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]] 366 ; CHECK-NEXT: [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]] 367 ; CHECK-NEXT: ret double [[TMP2]] 368 ; 369 %t1 = fsub fast double %f1, %f2 370 %t2 = fadd fast double %f1, %f2 371 %t3 = fsub fast double %t1, %t2 372 ret double %t3 373 } 374 375 ; c1 * x - x => (c1 - 1.0) * x 376 define float @fold13(float %x) { 377 ; CHECK-LABEL: @fold13( 378 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast float [[X:%.*]], 6.000000e+00 379 ; CHECK-NEXT: ret float [[TMP1]] 380 ; 381 %mul = fmul fast float %x, 7.000000e+00 382 %sub = fsub fast float %mul, %x 383 ret float %sub 384 } 385 386 ; Check again using the minimal subset of FMF. 387 define float @fold13_reassoc_nsz(float %x) { 388 ; CHECK-LABEL: @fold13_reassoc_nsz( 389 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00 390 ; CHECK-NEXT: ret float [[TMP1]] 391 ; 392 %mul = fmul reassoc nsz float %x, 7.000000e+00 393 %sub = fsub reassoc nsz float %mul, %x 394 ret float %sub 395 } 396 397 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 398 define float @fold13_reassoc(float %x) { 399 ; CHECK-LABEL: @fold13_reassoc( 400 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00 401 ; CHECK-NEXT: [[TMP2:%.*]] = fsub reassoc float [[TMP1]], [[X]] 402 ; CHECK-NEXT: ret float [[TMP2]] 403 ; 404 %mul = fmul reassoc float %x, 7.000000e+00 405 %sub = fsub reassoc float %mul, %x 406 ret float %sub 407 } 408 409 ; (select X+Y, X-Y) => X + (select Y, -Y) 410 ; This is always safe. No FMF required. 411 define float @fold16(float %x, float %y) { 412 ; CHECK-LABEL: @fold16( 413 ; CHECK-NEXT: [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]] 414 ; CHECK-NEXT: [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]] 415 ; CHECK-NEXT: [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]] 416 ; CHECK-NEXT: [[R:%.*]] = fadd float [[R_P]], [[X]] 417 ; CHECK-NEXT: ret float [[R]] 418 ; 419 %cmp = fcmp ogt float %x, %y 420 %plus = fadd float %x, %y 421 %minus = fsub float %x, %y 422 %r = select i1 %cmp, float %plus, float %minus 423 ret float %r 424 } 425 426 ; ========================================================================= 427 ; 428 ; Testing-cases about negation 429 ; 430 ; ========================================================================= 431 define float @fneg1(float %f1, float %f2) { 432 ; CHECK-LABEL: @fneg1( 433 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]] 434 ; CHECK-NEXT: ret float [[MUL]] 435 ; 436 %sub = fsub float -0.000000e+00, %f1 437 %sub1 = fsub nsz float 0.000000e+00, %f2 438 %mul = fmul float %sub, %sub1 439 ret float %mul 440 } 441 442 define float @fneg2(float %x) { 443 ; CHECK-LABEL: @fneg2( 444 ; CHECK-NEXT: [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]] 445 ; CHECK-NEXT: ret float [[SUB]] 446 ; 447 %sub = fsub nsz float 0.0, %x 448 ret float %sub 449 } 450 451 define <2 x float> @fneg2_vec_undef(<2 x float> %x) { 452 ; CHECK-LABEL: @fneg2_vec_undef( 453 ; CHECK-NEXT: [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]] 454 ; CHECK-NEXT: ret <2 x float> [[SUB]] 455 ; 456 %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x 457 ret <2 x float> %sub 458 } 459 460 ; ========================================================================= 461 ; 462 ; Testing-cases about div 463 ; 464 ; ========================================================================= 465 466 ; X/C1 / C2 => X * (1/(C2*C1)) 467 define float @fdiv1(float %x) { 468 ; CHECK-LABEL: @fdiv1( 469 ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000 470 ; CHECK-NEXT: ret float [[DIV1]] 471 ; 472 %div = fdiv float %x, 0x3FF3333340000000 473 %div1 = fdiv fast float %div, 0x4002666660000000 474 ret float %div1 475 ; 0x3FF3333340000000 = 1.2f 476 ; 0x4002666660000000 = 2.3f 477 ; 0x3FD7303B60000000 = 0.36231884057971014492 478 } 479 480 ; X*C1 / C2 => X * (C1/C2) 481 define float @fdiv2(float %x) { 482 ; CHECK-LABEL: @fdiv2( 483 ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000 484 ; CHECK-NEXT: ret float [[DIV1]] 485 ; 486 %mul = fmul float %x, 0x3FF3333340000000 487 %div1 = fdiv fast float %mul, 0x4002666660000000 488 ret float %div1 489 490 ; 0x3FF3333340000000 = 1.2f 491 ; 0x4002666660000000 = 2.3f 492 ; 0x3FE0B21660000000 = 0.52173918485641479492 493 } 494 495 define <2 x float> @fdiv2_vec(<2 x float> %x) { 496 ; CHECK-LABEL: @fdiv2_vec( 497 ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00> 498 ; CHECK-NEXT: ret <2 x float> [[DIV1]] 499 ; 500 %mul = fmul <2 x float> %x, <float 6.0, float 9.0> 501 %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0> 502 ret <2 x float> %div1 503 } 504 505 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal 506 ; 507 define float @fdiv3(float %x) { 508 ; CHECK-LABEL: @fdiv3( 509 ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47EFFFFFE0000000 510 ; CHECK-NEXT: [[DIV1:%.*]] = fmul fast float [[DIV]], 0x3FDBD37A80000000 511 ; CHECK-NEXT: ret float [[DIV1]] 512 ; 513 %div = fdiv float %x, 0x47EFFFFFE0000000 514 %div1 = fdiv fast float %div, 0x4002666660000000 515 ret float %div1 516 } 517 518 ; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal 519 define float @fdiv4(float %x) { 520 ; CHECK-LABEL: @fdiv4( 521 ; CHECK-NEXT: [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000 522 ; CHECK-NEXT: [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000 523 ; CHECK-NEXT: ret float [[DIV]] 524 ; 525 %mul = fmul float %x, 0x47EFFFFFE0000000 526 %div = fdiv float %mul, 0x3FC99999A0000000 527 ret float %div 528 } 529 530 ; ========================================================================= 531 ; 532 ; Testing-cases about factorization 533 ; 534 ; ========================================================================= 535 ; x*z + y*z => (x+y) * z 536 define float @fact_mul1(float %x, float %y, float %z) { 537 ; CHECK-LABEL: @fact_mul1( 538 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]] 539 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 540 ; CHECK-NEXT: ret float [[TMP2]] 541 ; 542 %t1 = fmul fast float %x, %z 543 %t2 = fmul fast float %y, %z 544 %t3 = fadd fast float %t1, %t2 545 ret float %t3 546 } 547 548 ; Check again using the minimal subset of FMF. 549 define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) { 550 ; CHECK-LABEL: @fact_mul1_reassoc_nsz( 551 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]] 552 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 553 ; CHECK-NEXT: ret float [[TMP2]] 554 ; 555 %t1 = fmul reassoc nsz float %x, %z 556 %t2 = fmul reassoc nsz float %y, %z 557 %t3 = fadd reassoc nsz float %t1, %t2 558 ret float %t3 559 } 560 561 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 562 define float @fact_mul1_reassoc(float %x, float %y, float %z) { 563 ; CHECK-LABEL: @fact_mul1_reassoc( 564 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] 565 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]] [[Z]] 566 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 567 ; CHECK-NEXT: ret float [[TMP3]] 568 ; 569 %t1 = fmul reassoc float %x, %z 570 %t2 = fmul reassoc float %y, %z 571 %t3 = fadd reassoc float %t1, %t2 572 ret float %t3 573 } 574 575 ; z*x + y*z => (x+y) * z 576 define float @fact_mul2(float %x, float %y, float %z) { 577 ; CHECK-LABEL: @fact_mul2( 578 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 579 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 580 ; CHECK-NEXT: ret float [[TMP2]] 581 ; 582 %t1 = fmul fast float %z, %x 583 %t2 = fmul fast float %y, %z 584 %t3 = fsub fast float %t1, %t2 585 ret float %t3 586 } 587 588 ; Check again using the minimal subset of FMF. 589 define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) { 590 ; CHECK-LABEL: @fact_mul2_reassoc_nsz( 591 ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 592 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 593 ; CHECK-NEXT: ret float [[TMP2]] 594 ; 595 %t1 = fmul reassoc nsz float %z, %x 596 %t2 = fmul reassoc nsz float %y, %z 597 %t3 = fsub reassoc nsz float %t1, %t2 598 ret float %t3 599 } 600 601 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 602 define float @fact_mul2_reassoc(float %x, float %y, float %z) { 603 ; CHECK-LABEL: @fact_mul2_reassoc( 604 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]] 605 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]] 606 ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 607 ; CHECK-NEXT: ret float [[TMP3]] 608 ; 609 %t1 = fmul reassoc float %z, %x 610 %t2 = fmul reassoc float %y, %z 611 %t3 = fsub reassoc float %t1, %t2 612 ret float %t3 613 } 614 615 ; z*x - z*y => (x-y) * z 616 define float @fact_mul3(float %x, float %y, float %z) { 617 ; CHECK-LABEL: @fact_mul3( 618 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 619 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 620 ; CHECK-NEXT: ret float [[TMP2]] 621 ; 622 %t2 = fmul fast float %z, %y 623 %t1 = fmul fast float %z, %x 624 %t3 = fsub fast float %t1, %t2 625 ret float %t3 626 } 627 628 ; Check again using the minimal subset of FMF. 629 define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) { 630 ; CHECK-LABEL: @fact_mul3_reassoc_nsz( 631 ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 632 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 633 ; CHECK-NEXT: ret float [[TMP2]] 634 ; 635 %t2 = fmul reassoc nsz float %z, %y 636 %t1 = fmul reassoc nsz float %z, %x 637 %t3 = fsub reassoc nsz float %t1, %t2 638 ret float %t3 639 } 640 641 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 642 define float @fact_mul3_reassoc(float %x, float %y, float %z) { 643 ; CHECK-LABEL: @fact_mul3_reassoc( 644 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]] 645 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]] 646 ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 647 ; CHECK-NEXT: ret float [[TMP3]] 648 ; 649 %t2 = fmul reassoc float %z, %y 650 %t1 = fmul reassoc float %z, %x 651 %t3 = fsub reassoc float %t1, %t2 652 ret float %t3 653 } 654 655 ; x*z - z*y => (x-y) * z 656 define float @fact_mul4(float %x, float %y, float %z) { 657 ; CHECK-LABEL: @fact_mul4( 658 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]] 659 ; CHECK-NEXT: [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]] 660 ; CHECK-NEXT: ret float [[TMP2]] 661 ; 662 %t1 = fmul fast float %x, %z 663 %t2 = fmul fast float %z, %y 664 %t3 = fsub fast float %t1, %t2 665 ret float %t3 666 } 667 668 ; Check again using the minimal subset of FMF. 669 define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) { 670 ; CHECK-LABEL: @fact_mul4_reassoc_nsz( 671 ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]] 672 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]] 673 ; CHECK-NEXT: ret float [[TMP2]] 674 ; 675 %t1 = fmul reassoc nsz float %x, %z 676 %t2 = fmul reassoc nsz float %z, %y 677 %t3 = fsub reassoc nsz float %t1, %t2 678 ret float %t3 679 } 680 681 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 682 define float @fact_mul4_reassoc(float %x, float %y, float %z) { 683 ; CHECK-LABEL: @fact_mul4_reassoc( 684 ; CHECK-NEXT: [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]] 685 ; CHECK-NEXT: [[TMP2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]] 686 ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 687 ; CHECK-NEXT: ret float [[TMP3]] 688 ; 689 %t1 = fmul reassoc float %x, %z 690 %t2 = fmul reassoc float %z, %y 691 %t3 = fsub reassoc float %t1, %t2 692 ret float %t3 693 } 694 695 ; x/y + x/z, no xform 696 define float @fact_div1(float %x, float %y, float %z) { 697 ; CHECK-LABEL: @fact_div1( 698 ; CHECK-NEXT: [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]] 699 ; CHECK-NEXT: [[T2:%.*]] = fdiv fast float [[X]], [[Z:%.*]] 700 ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T1]], [[T2]] 701 ; CHECK-NEXT: ret float [[T3]] 702 ; 703 %t1 = fdiv fast float %x, %y 704 %t2 = fdiv fast float %x, %z 705 %t3 = fadd fast float %t1, %t2 706 ret float %t3 707 } 708 709 ; x/y + z/x; no xform 710 define float @fact_div2(float %x, float %y, float %z) { 711 ; CHECK-LABEL: @fact_div2( 712 ; CHECK-NEXT: [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]] 713 ; CHECK-NEXT: [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]] 714 ; CHECK-NEXT: [[T3:%.*]] = fadd fast float [[T1]], [[T2]] 715 ; CHECK-NEXT: ret float [[T3]] 716 ; 717 %t1 = fdiv fast float %x, %y 718 %t2 = fdiv fast float %z, %x 719 %t3 = fadd fast float %t1, %t2 720 ret float %t3 721 } 722 723 ; y/x + z/x => (y+z)/x 724 define float @fact_div3(float %x, float %y, float %z) { 725 ; CHECK-LABEL: @fact_div3( 726 ; CHECK-NEXT: [[TMP1:%.*]] = fadd fast float [[Y:%.*]], [[Z:%.*]] 727 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]] 728 ; CHECK-NEXT: ret float [[TMP2]] 729 ; 730 %t1 = fdiv fast float %y, %x 731 %t2 = fdiv fast float %z, %x 732 %t3 = fadd fast float %t1, %t2 733 ret float %t3 734 } 735 736 ; Check again using the minimal subset of FMF. 737 define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) { 738 ; CHECK-LABEL: @fact_div3_reassoc_nsz( 739 ; CHECK-NEXT: [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]] 740 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] 741 ; CHECK-NEXT: ret float [[TMP2]] 742 ; 743 %t1 = fdiv reassoc nsz float %y, %x 744 %t2 = fdiv reassoc nsz float %z, %x 745 %t3 = fadd reassoc nsz float %t1, %t2 746 ret float %t3 747 } 748 749 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 750 define float @fact_div3_reassoc(float %x, float %y, float %z) { 751 ; CHECK-LABEL: @fact_div3_reassoc( 752 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] 753 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] 754 ; CHECK-NEXT: [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]] 755 ; CHECK-NEXT: ret float [[TMP3]] 756 ; 757 %t1 = fdiv reassoc float %y, %x 758 %t2 = fdiv reassoc float %z, %x 759 %t3 = fadd reassoc float %t1, %t2 760 ret float %t3 761 } 762 763 ; y/x - z/x => (y-z)/x 764 define float @fact_div4(float %x, float %y, float %z) { 765 ; CHECK-LABEL: @fact_div4( 766 ; CHECK-NEXT: [[TMP1:%.*]] = fsub fast float [[Y:%.*]], [[Z:%.*]] 767 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]] 768 ; CHECK-NEXT: ret float [[TMP2]] 769 ; 770 %t1 = fdiv fast float %y, %x 771 %t2 = fdiv fast float %z, %x 772 %t3 = fsub fast float %t1, %t2 773 ret float %t3 774 } 775 776 ; Check again using the minimal subset of FMF. 777 define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) { 778 ; CHECK-LABEL: @fact_div4_reassoc_nsz( 779 ; CHECK-NEXT: [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]] 780 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]] 781 ; CHECK-NEXT: ret float [[TMP2]] 782 ; 783 %t1 = fdiv reassoc nsz float %y, %x 784 %t2 = fdiv reassoc nsz float %z, %x 785 %t3 = fsub reassoc nsz float %t1, %t2 786 ret float %t3 787 } 788 789 ; Verify the fold is not done with only 'reassoc' ('nsz' is required). 790 define float @fact_div4_reassoc(float %x, float %y, float %z) { 791 ; CHECK-LABEL: @fact_div4_reassoc( 792 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]] 793 ; CHECK-NEXT: [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]] 794 ; CHECK-NEXT: [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]] 795 ; CHECK-NEXT: ret float [[TMP3]] 796 ; 797 %t1 = fdiv reassoc float %y, %x 798 %t2 = fdiv reassoc float %z, %x 799 %t3 = fsub reassoc float %t1, %t2 800 ret float %t3 801 } 802 803 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 804 define float @fact_div5(float %x) { 805 ; CHECK-LABEL: @fact_div5( 806 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]] 807 ; CHECK-NEXT: ret float [[TMP1]] 808 ; 809 %t1 = fdiv fast float 0x3810000000000000, %x 810 %t2 = fdiv fast float 0x3800000000000000, %x 811 %t3 = fadd fast float %t1, %t2 812 ret float %t3 813 } 814 815 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal. 816 define float @fact_div6(float %x) { 817 ; CHECK-LABEL: @fact_div6( 818 ; CHECK-NEXT: [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]] 819 ; CHECK-NEXT: [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]] 820 ; CHECK-NEXT: [[T3:%.*]] = fsub fast float [[T1]], [[T2]] 821 ; CHECK-NEXT: ret float [[T3]] 822 ; 823 %t1 = fdiv fast float 0x3810000000000000, %x 824 %t2 = fdiv fast float 0x3800000000000000, %x 825 %t3 = fsub fast float %t1, %t2 826 ret float %t3 827 } 828 829 ; ========================================================================= 830 ; 831 ; Test-cases for square root 832 ; 833 ; ========================================================================= 834 835 ; A squared factor fed into a square root intrinsic should be hoisted out 836 ; as a fabs() value. 837 838 declare double @llvm.sqrt.f64(double) 839 840 define double @sqrt_intrinsic_arg_squared(double %x) { 841 ; CHECK-LABEL: @sqrt_intrinsic_arg_squared( 842 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 843 ; CHECK-NEXT: ret double [[FABS]] 844 ; 845 %mul = fmul fast double %x, %x 846 %sqrt = call fast double @llvm.sqrt.f64(double %mul) 847 ret double %sqrt 848 } 849 850 ; Check all 6 combinations of a 3-way multiplication tree where 851 ; one factor is repeated. 852 853 define double @sqrt_intrinsic_three_args1(double %x, double %y) { 854 ; CHECK-LABEL: @sqrt_intrinsic_three_args1( 855 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 856 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 857 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 858 ; CHECK-NEXT: ret double [[TMP1]] 859 ; 860 %mul = fmul fast double %y, %x 861 %mul2 = fmul fast double %mul, %x 862 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 863 ret double %sqrt 864 } 865 866 define double @sqrt_intrinsic_three_args2(double %x, double %y) { 867 ; CHECK-LABEL: @sqrt_intrinsic_three_args2( 868 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 869 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 870 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 871 ; CHECK-NEXT: ret double [[TMP1]] 872 ; 873 %mul = fmul fast double %x, %y 874 %mul2 = fmul fast double %mul, %x 875 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 876 ret double %sqrt 877 } 878 879 define double @sqrt_intrinsic_three_args3(double %x, double %y) { 880 ; CHECK-LABEL: @sqrt_intrinsic_three_args3( 881 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 882 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 883 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 884 ; CHECK-NEXT: ret double [[TMP1]] 885 ; 886 %mul = fmul fast double %x, %x 887 %mul2 = fmul fast double %mul, %y 888 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 889 ret double %sqrt 890 } 891 892 define double @sqrt_intrinsic_three_args4(double %x, double %y) { 893 ; CHECK-LABEL: @sqrt_intrinsic_three_args4( 894 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 895 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 896 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 897 ; CHECK-NEXT: ret double [[TMP1]] 898 ; 899 %mul = fmul fast double %y, %x 900 %mul2 = fmul fast double %x, %mul 901 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 902 ret double %sqrt 903 } 904 905 define double @sqrt_intrinsic_three_args5(double %x, double %y) { 906 ; CHECK-LABEL: @sqrt_intrinsic_three_args5( 907 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 908 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 909 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 910 ; CHECK-NEXT: ret double [[TMP1]] 911 ; 912 %mul = fmul fast double %x, %y 913 %mul2 = fmul fast double %x, %mul 914 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 915 ret double %sqrt 916 } 917 918 define double @sqrt_intrinsic_three_args6(double %x, double %y) { 919 ; CHECK-LABEL: @sqrt_intrinsic_three_args6( 920 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 921 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]]) 922 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]] 923 ; CHECK-NEXT: ret double [[TMP1]] 924 ; 925 %mul = fmul fast double %x, %x 926 %mul2 = fmul fast double %y, %mul 927 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 928 ret double %sqrt 929 } 930 931 ; If any operation is not 'fast', we can't simplify. 932 933 define double @sqrt_intrinsic_not_so_fast(double %x, double %y) { 934 ; CHECK-LABEL: @sqrt_intrinsic_not_so_fast( 935 ; CHECK-NEXT: [[MUL:%.*]] = fmul double [[X:%.*]], [[X]] 936 ; CHECK-NEXT: [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]] 937 ; CHECK-NEXT: [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]]) 938 ; CHECK-NEXT: ret double [[SQRT]] 939 ; 940 %mul = fmul double %x, %x 941 %mul2 = fmul fast double %mul, %y 942 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 943 ret double %sqrt 944 } 945 946 define double @sqrt_intrinsic_arg_4th(double %x) { 947 ; CHECK-LABEL: @sqrt_intrinsic_arg_4th( 948 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]] 949 ; CHECK-NEXT: ret double [[MUL]] 950 ; 951 %mul = fmul fast double %x, %x 952 %mul2 = fmul fast double %mul, %mul 953 %sqrt = call fast double @llvm.sqrt.f64(double %mul2) 954 ret double %sqrt 955 } 956 957 define double @sqrt_intrinsic_arg_5th(double %x) { 958 ; CHECK-LABEL: @sqrt_intrinsic_arg_5th( 959 ; CHECK-NEXT: [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]] 960 ; CHECK-NEXT: [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]]) 961 ; CHECK-NEXT: [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]] 962 ; CHECK-NEXT: ret double [[TMP1]] 963 ; 964 %mul = fmul fast double %x, %x 965 %mul2 = fmul fast double %mul, %x 966 %mul3 = fmul fast double %mul2, %mul 967 %sqrt = call fast double @llvm.sqrt.f64(double %mul3) 968 ret double %sqrt 969 } 970 971 ; Check that square root calls have the same behavior. 972 973 declare float @sqrtf(float) 974 declare double @sqrt(double) 975 declare fp128 @sqrtl(fp128) 976 977 define float @sqrt_call_squared_f32(float %x) { 978 ; CHECK-LABEL: @sqrt_call_squared_f32( 979 ; CHECK-NEXT: [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]]) 980 ; CHECK-NEXT: ret float [[FABS]] 981 ; 982 %mul = fmul fast float %x, %x 983 %sqrt = call fast float @sqrtf(float %mul) 984 ret float %sqrt 985 } 986 987 define double @sqrt_call_squared_f64(double %x) { 988 ; CHECK-LABEL: @sqrt_call_squared_f64( 989 ; CHECK-NEXT: [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]]) 990 ; CHECK-NEXT: ret double [[FABS]] 991 ; 992 %mul = fmul fast double %x, %x 993 %sqrt = call fast double @sqrt(double %mul) 994 ret double %sqrt 995 } 996 997 define fp128 @sqrt_call_squared_f128(fp128 %x) { 998 ; CHECK-LABEL: @sqrt_call_squared_f128( 999 ; CHECK-NEXT: [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]]) 1000 ; CHECK-NEXT: ret fp128 [[FABS]] 1001 ; 1002 %mul = fmul fast fp128 %x, %x 1003 %sqrt = call fast fp128 @sqrtl(fp128 %mul) 1004 ret fp128 %sqrt 1005 } 1006 1007 ; ========================================================================= 1008 ; 1009 ; Test-cases for fmin / fmax 1010 ; 1011 ; ========================================================================= 1012 1013 declare double @fmax(double, double) 1014 declare double @fmin(double, double) 1015 declare float @fmaxf(float, float) 1016 declare float @fminf(float, float) 1017 declare fp128 @fmaxl(fp128, fp128) 1018 declare fp128 @fminl(fp128, fp128) 1019 1020 ; No NaNs is the minimum requirement to replace these calls. 1021 ; This should always be set when unsafe-fp-math is true, but 1022 ; alternate the attributes for additional test coverage. 1023 ; 'nsz' is implied by the definition of fmax or fmin itself. 1024 1025 ; Shrink and remove the call. 1026 define float @max1(float %a, float %b) { 1027 ; CHECK-LABEL: @max1( 1028 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]] 1029 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1030 ; CHECK-NEXT: ret float [[TMP2]] 1031 ; 1032 %c = fpext float %a to double 1033 %d = fpext float %b to double 1034 %e = call fast double @fmax(double %c, double %d) 1035 %f = fptrunc double %e to float 1036 ret float %f 1037 } 1038 1039 define float @max2(float %a, float %b) { 1040 ; CHECK-LABEL: @max2( 1041 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]] 1042 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1043 ; CHECK-NEXT: ret float [[TMP2]] 1044 ; 1045 %c = call nnan float @fmaxf(float %a, float %b) 1046 ret float %c 1047 } 1048 1049 1050 define double @max3(double %a, double %b) { 1051 ; CHECK-LABEL: @max3( 1052 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]] 1053 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]] 1054 ; CHECK-NEXT: ret double [[TMP2]] 1055 ; 1056 %c = call fast double @fmax(double %a, double %b) 1057 ret double %c 1058 } 1059 1060 define fp128 @max4(fp128 %a, fp128 %b) { 1061 ; CHECK-LABEL: @max4( 1062 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]] 1063 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]] 1064 ; CHECK-NEXT: ret fp128 [[TMP2]] 1065 ; 1066 %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b) 1067 ret fp128 %c 1068 } 1069 1070 ; Shrink and remove the call. 1071 define float @min1(float %a, float %b) { 1072 ; CHECK-LABEL: @min1( 1073 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]] 1074 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1075 ; CHECK-NEXT: ret float [[TMP2]] 1076 ; 1077 %c = fpext float %a to double 1078 %d = fpext float %b to double 1079 %e = call nnan double @fmin(double %c, double %d) 1080 %f = fptrunc double %e to float 1081 ret float %f 1082 } 1083 1084 define float @min2(float %a, float %b) { 1085 ; CHECK-LABEL: @min2( 1086 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]] 1087 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]] 1088 ; CHECK-NEXT: ret float [[TMP2]] 1089 ; 1090 %c = call fast float @fminf(float %a, float %b) 1091 ret float %c 1092 } 1093 1094 define double @min3(double %a, double %b) { 1095 ; CHECK-LABEL: @min3( 1096 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]] 1097 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]] 1098 ; CHECK-NEXT: ret double [[TMP2]] 1099 ; 1100 %c = call nnan double @fmin(double %a, double %b) 1101 ret double %c 1102 } 1103 1104 define fp128 @min4(fp128 %a, fp128 %b) { 1105 ; CHECK-LABEL: @min4( 1106 ; CHECK-NEXT: [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]] 1107 ; CHECK-NEXT: [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]] 1108 ; CHECK-NEXT: ret fp128 [[TMP2]] 1109 ; 1110 %c = call fast fp128 @fminl(fp128 %a, fp128 %b) 1111 ret fp128 %c 1112 } 1113 1114 ; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0)) 1115 ; This is always safe. No FMF required. 1116 define float @test55(i1 %which, float %a) { 1117 ; CHECK-LABEL: @test55( 1118 ; CHECK-NEXT: entry: 1119 ; CHECK-NEXT: br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]] 1120 ; CHECK: delay: 1121 ; CHECK-NEXT: [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00 1122 ; CHECK-NEXT: br label [[FINAL]] 1123 ; CHECK: final: 1124 ; CHECK-NEXT: [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ] 1125 ; CHECK-NEXT: ret float [[A]] 1126 ; 1127 entry: 1128 br i1 %which, label %final, label %delay 1129 1130 delay: 1131 br label %final 1132 1133 final: 1134 %A = phi float [ 2.0, %entry ], [ %a, %delay ] 1135 %value = fadd float %A, 1.0 1136 ret float %value 1137 } 1138