Home | History | Annotate | Download | only in InstCombine
      1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
      2 ; RUN: opt < %s -instcombine -S | FileCheck %s
      3 
      4 ; testing-case "float fold(float a) { return 1.2f * a * 2.3f; }"
      5 ; 1.2f and 2.3f is supposed to be fold.
      6 define float @fold(float %a) {
      7 ; CHECK-LABEL: @fold(
      8 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
      9 ; CHECK-NEXT:    ret float [[MUL1]]
     10 ;
     11   %mul = fmul fast float %a, 0x3FF3333340000000
     12   %mul1 = fmul fast float %mul, 0x4002666660000000
     13   ret float %mul1
     14 }
     15 
     16 ; Same testing-case as the one used in fold() except that the operators have
     17 ; fixed FP mode.
     18 define float @notfold(float %a) {
     19 ; CHECK-LABEL: @notfold(
     20 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast float [[A:%.*]], 0x3FF3333340000000
     21 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul float [[MUL]], 0x4002666660000000
     22 ; CHECK-NEXT:    ret float [[MUL1]]
     23 ;
     24   %mul = fmul fast float %a, 0x3FF3333340000000
     25   %mul1 = fmul float %mul, 0x4002666660000000
     26   ret float %mul1
     27 }
     28 
     29 define float @fold2(float %a) {
     30 ; CHECK-LABEL: @fold2(
     31 ; CHECK-NEXT:    [[MUL1:%.*]] = fmul fast float [[A:%.*]], 0x4006147AE0000000
     32 ; CHECK-NEXT:    ret float [[MUL1]]
     33 ;
     34   %mul = fmul float %a, 0x3FF3333340000000
     35   %mul1 = fmul fast float %mul, 0x4002666660000000
     36   ret float %mul1
     37 }
     38 
     39 ; C * f1 + f1 = (C+1) * f1
     40 ; TODO: The particular case where C is 2 (so the folded result is 3.0*f1) is
     41 ; always safe, and so doesn't need any FMF.
     42 ; That is, (x + x + x) and (3*x) each have only a single rounding.
     43 define double @fold3(double %f1) {
     44 ; CHECK-LABEL: @fold3(
     45 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[F1:%.*]], 6.000000e+00
     46 ; CHECK-NEXT:    ret double [[TMP1]]
     47 ;
     48   %t1 = fmul fast double 5.000000e+00, %f1
     49   %t2 = fadd fast double %f1, %t1
     50   ret double %t2
     51 }
     52 
     53 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
     54 define double @fold3_reassoc_nsz(double %f1) {
     55 ; CHECK-LABEL: @fold3_reassoc_nsz(
     56 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz double [[F1:%.*]], 6.000000e+00
     57 ; CHECK-NEXT:    ret double [[TMP1]]
     58 ;
     59   %t1 = fmul reassoc nsz double 5.000000e+00, %f1
     60   %t2 = fadd reassoc nsz double %f1, %t1
     61   ret double %t2
     62 }
     63 
     64 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 6.0.
     65 define double @fold3_reassoc(double %f1) {
     66 ; CHECK-LABEL: @fold3_reassoc(
     67 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc double [[F1:%.*]], 5.000000e+00
     68 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc double [[TMP1]], [[F1]]
     69 ; CHECK-NEXT:    ret double [[TMP2]]
     70 ;
     71   %t1 = fmul reassoc double 5.000000e+00, %f1
     72   %t2 = fadd reassoc double %f1, %t1
     73   ret double %t2
     74 }
     75 
     76 ; (C1 - X) + (C2 - Y) => (C1+C2) - (X + Y)
     77 define float @fold4(float %f1, float %f2) {
     78 ; CHECK-LABEL: @fold4(
     79 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
     80 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast float 9.000000e+00, [[TMP1]]
     81 ; CHECK-NEXT:    ret float [[TMP2]]
     82 ;
     83   %sub = fsub float 4.000000e+00, %f1
     84   %sub1 = fsub float 5.000000e+00, %f2
     85   %add = fadd fast float %sub, %sub1
     86   ret float %add
     87 }
     88 
     89 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
     90 define float @fold4_reassoc_nsz(float %f1, float %f2) {
     91 ; CHECK-LABEL: @fold4_reassoc_nsz(
     92 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
     93 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc nsz float 9.000000e+00, [[TMP1]]
     94 ; CHECK-NEXT:    ret float [[TMP2]]
     95 ;
     96   %sub = fsub float 4.000000e+00, %f1
     97   %sub1 = fsub float 5.000000e+00, %f2
     98   %add = fadd reassoc nsz float %sub, %sub1
     99   ret float %add
    100 }
    101 
    102 ; TODO: This doesn't require 'nsz'.  It should fold to (9.0 - (f1 + f2)).
    103 define float @fold4_reassoc(float %f1, float %f2) {
    104 ; CHECK-LABEL: @fold4_reassoc(
    105 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float 4.000000e+00, [[F1:%.*]]
    106 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub float 5.000000e+00, [[F2:%.*]]
    107 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    108 ; CHECK-NEXT:    ret float [[TMP3]]
    109 ;
    110   %sub = fsub float 4.000000e+00, %f1
    111   %sub1 = fsub float 5.000000e+00, %f2
    112   %add = fadd reassoc float %sub, %sub1
    113   ret float %add
    114 }
    115 
    116 ; (X + C1) + C2 => X + (C1 + C2)
    117 define float @fold5(float %f1) {
    118 ; CHECK-LABEL: @fold5(
    119 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd fast float [[F1:%.*]], 9.000000e+00
    120 ; CHECK-NEXT:    ret float [[ADD1]]
    121 ;
    122   %add = fadd float %f1, 4.000000e+00
    123   %add1 = fadd fast float %add, 5.000000e+00
    124   ret float %add1
    125 }
    126 
    127 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
    128 define float @fold5_reassoc_nsz(float %f1) {
    129 ; CHECK-LABEL: @fold5_reassoc_nsz(
    130 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc nsz float [[F1:%.*]], 9.000000e+00
    131 ; CHECK-NEXT:    ret float [[ADD1]]
    132 ;
    133   %add = fadd float %f1, 4.000000e+00
    134   %add1 = fadd reassoc nsz float %add, 5.000000e+00
    135   ret float %add1
    136 }
    137 
    138 ; TODO: This doesn't require 'nsz'.  It should fold to f1 + 9.0
    139 define float @fold5_reassoc(float %f1) {
    140 ; CHECK-LABEL: @fold5_reassoc(
    141 ; CHECK-NEXT:    [[ADD:%.*]] = fadd float [[F1:%.*]], 4.000000e+00
    142 ; CHECK-NEXT:    [[ADD1:%.*]] = fadd reassoc float [[ADD]], 5.000000e+00
    143 ; CHECK-NEXT:    ret float [[ADD1]]
    144 ;
    145   %add = fadd float %f1, 4.000000e+00
    146   %add1 = fadd reassoc float %add, 5.000000e+00
    147   ret float %add1
    148 }
    149 
    150 ; (X + X) + X + X => 4.0 * X
    151 define float @fold6(float %f1) {
    152 ; CHECK-LABEL: @fold6(
    153 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 4.000000e+00
    154 ; CHECK-NEXT:    ret float [[TMP1]]
    155 ;
    156   %t1 = fadd fast float %f1, %f1
    157   %t2 = fadd fast float %f1, %t1
    158   %t3 = fadd fast float %t2, %f1
    159   ret float %t3
    160 }
    161 
    162 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
    163 define float @fold6_reassoc_nsz(float %f1) {
    164 ; CHECK-LABEL: @fold6_reassoc_nsz(
    165 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 4.000000e+00
    166 ; CHECK-NEXT:    ret float [[TMP1]]
    167 ;
    168   %t1 = fadd reassoc nsz float %f1, %f1
    169   %t2 = fadd reassoc nsz float %f1, %t1
    170   %t3 = fadd reassoc nsz float %t2, %f1
    171   ret float %t3
    172 }
    173 
    174 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 4.0.
    175 define float @fold6_reassoc(float %f1) {
    176 ; CHECK-LABEL: @fold6_reassoc(
    177 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
    178 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[TMP1]], [[F1]]
    179 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP2]], [[F1]]
    180 ; CHECK-NEXT:    ret float [[TMP3]]
    181 ;
    182   %t1 = fadd reassoc float %f1, %f1
    183   %t2 = fadd reassoc float %f1, %t1
    184   %t3 = fadd reassoc float %t2, %f1
    185   ret float %t3
    186 }
    187 
    188 ; C1 * X + (X + X) = (C1 + 2) * X
    189 define float @fold7(float %f1) {
    190 ; CHECK-LABEL: @fold7(
    191 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 7.000000e+00
    192 ; CHECK-NEXT:    ret float [[TMP1]]
    193 ;
    194   %t1 = fmul fast float %f1, 5.000000e+00
    195   %t2 = fadd fast float %f1, %f1
    196   %t3 = fadd fast float %t1, %t2
    197   ret float %t3
    198 }
    199 
    200 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
    201 define float @fold7_reassoc_nsz(float %f1) {
    202 ; CHECK-LABEL: @fold7_reassoc_nsz(
    203 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 7.000000e+00
    204 ; CHECK-NEXT:    ret float [[TMP1]]
    205 ;
    206   %t1 = fmul reassoc nsz float %f1, 5.000000e+00
    207   %t2 = fadd reassoc nsz float %f1, %f1
    208   %t3 = fadd reassoc nsz float %t1, %t2
    209   ret float %t3
    210 }
    211 
    212 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 7.0.
    213 define float @fold7_reassoc(float %f1) {
    214 ; CHECK-LABEL: @fold7_reassoc(
    215 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[F1:%.*]], 5.000000e+00
    216 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]]
    217 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    218 ; CHECK-NEXT:    ret float [[TMP3]]
    219 ;
    220   %t1 = fmul reassoc float %f1, 5.000000e+00
    221   %t2 = fadd reassoc float %f1, %f1
    222   %t3 = fadd reassoc float %t1, %t2
    223   ret float %t3
    224 }
    225 
    226 ; (X + X) + (X + X) + X => 5.0 * X
    227 define float @fold8(float %f1) {
    228 ; CHECK-LABEL: @fold8(
    229 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 5.000000e+00
    230 ; CHECK-NEXT:    ret float [[TMP1]]
    231 ;
    232   %t1 = fadd fast float %f1, %f1
    233   %t2 = fadd fast float %f1, %f1
    234   %t3 = fadd fast float %t1, %t2
    235   %t4 = fadd fast float %t3, %f1
    236   ret float %t4
    237 }
    238 
    239 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
    240 define float @fold8_reassoc_nsz(float %f1) {
    241 ; CHECK-LABEL: @fold8_reassoc_nsz(
    242 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[F1:%.*]], 5.000000e+00
    243 ; CHECK-NEXT:    ret float [[TMP1]]
    244 ;
    245   %t1 = fadd reassoc nsz float %f1, %f1
    246   %t2 = fadd reassoc nsz float %f1, %f1
    247   %t3 = fadd reassoc nsz float %t1, %t2
    248   %t4 = fadd reassoc nsz float %t3, %f1
    249   ret float %t4
    250 }
    251 
    252 ; TODO: This doesn't require 'nsz'.  It should fold to f1 * 5.0.
    253 define float @fold8_reassoc(float %f1) {
    254 ; CHECK-LABEL: @fold8_reassoc(
    255 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], [[F1]]
    256 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F1]], [[F1]]
    257 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    258 ; CHECK-NEXT:    [[TMP4:%.*]] = fadd reassoc float [[TMP3]], [[F1]]
    259 ; CHECK-NEXT:    ret float [[TMP4]]
    260 ;
    261   %t1 = fadd reassoc float %f1, %f1
    262   %t2 = fadd reassoc float %f1, %f1
    263   %t3 = fadd reassoc float %t1, %t2
    264   %t4 = fadd reassoc float %t3, %f1
    265   ret float %t4
    266 }
    267 
    268 ; X - (X + Y) => 0 - Y
    269 define float @fold9(float %f1, float %f2) {
    270 ; CHECK-LABEL: @fold9(
    271 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float -0.000000e+00, [[F2:%.*]]
    272 ; CHECK-NEXT:    ret float [[TMP1]]
    273 ;
    274   %t1 = fadd float %f1, %f2
    275   %t3 = fsub fast float %f1, %t1
    276   ret float %t3
    277 }
    278 
    279 ; Check again with 'reassoc' and 'nsz' ('nsz' not technically required).
    280 define float @fold9_reassoc_nsz(float %f1, float %f2) {
    281 ; CHECK-LABEL: @fold9_reassoc_nsz(
    282 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float -0.000000e+00, [[F2:%.*]]
    283 ; CHECK-NEXT:    ret float [[TMP1]]
    284 ;
    285   %t1 = fadd float %f1, %f2
    286   %t3 = fsub reassoc nsz float %f1, %t1
    287   ret float %t3
    288 }
    289 
    290 ; TODO: This doesn't require 'nsz'.  It should fold to 0 - f2
    291 define float @fold9_reassoc(float %f1, float %f2) {
    292 ; CHECK-LABEL: @fold9_reassoc(
    293 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd float [[F1:%.*]], [[F2:%.*]]
    294 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc float [[F1]], [[TMP1]]
    295 ; CHECK-NEXT:    ret float [[TMP2]]
    296 ;
    297   %t1 = fadd float %f1, %f2
    298   %t3 = fsub reassoc float %f1, %t1
    299   ret float %t3
    300 }
    301 
    302 ; Let C3 = C1 + C2. (f1 + C1) + (f2 + C2) => (f1 + f2) + C3 instead of
    303 ; "(f1 + C3) + f2" or "(f2 + C3) + f1". Placing constant-addend at the
    304 ; top of resulting simplified expression tree may potentially reveal some
    305 ; optimization opportunities in the super-expression trees.
    306 ;
    307 define float @fold10(float %f1, float %f2) {
    308 ; CHECK-LABEL: @fold10(
    309 ; CHECK-NEXT:    [[T2:%.*]] = fadd fast float [[F1:%.*]], [[F2:%.*]]
    310 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T2]], -1.000000e+00
    311 ; CHECK-NEXT:    ret float [[T3]]
    312 ;
    313   %t1 = fadd fast float 2.000000e+00, %f1
    314   %t2 = fsub fast float %f2, 3.000000e+00
    315   %t3 = fadd fast float %t1, %t2
    316   ret float %t3
    317 }
    318 
    319 ; Check again with 'reassoc' and 'nsz'.
    320 ; TODO: We may be able to remove the 'nsz' requirement.
    321 define float @fold10_reassoc_nsz(float %f1, float %f2) {
    322 ; CHECK-LABEL: @fold10_reassoc_nsz(
    323 ; CHECK-NEXT:    [[T2:%.*]] = fadd reassoc nsz float [[F1:%.*]], [[F2:%.*]]
    324 ; CHECK-NEXT:    [[T3:%.*]] = fadd reassoc nsz float [[T2]], -1.000000e+00
    325 ; CHECK-NEXT:    ret float [[T3]]
    326 ;
    327   %t1 = fadd reassoc nsz float 2.000000e+00, %f1
    328   %t2 = fsub reassoc nsz float %f2, 3.000000e+00
    329   %t3 = fadd reassoc nsz float %t1, %t2
    330   ret float %t3
    331 }
    332 
    333 ; Observe that the fold is not done with only reassoc (the instructions are
    334 ; canonicalized, but not folded).
    335 ; TODO: As noted above, 'nsz' may not be required for this to be fully folded.
    336 define float @fold10_reassoc(float %f1, float %f2) {
    337 ; CHECK-LABEL: @fold10_reassoc(
    338 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc float [[F1:%.*]], 2.000000e+00
    339 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd reassoc float [[F2:%.*]], -3.000000e+00
    340 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    341 ; CHECK-NEXT:    ret float [[TMP3]]
    342 ;
    343   %t1 = fadd reassoc float 2.000000e+00, %f1
    344   %t2 = fsub reassoc float %f2, 3.000000e+00
    345   %t3 = fadd reassoc float %t1, %t2
    346   ret float %t3
    347 }
    348 
    349 ; This used to crash/miscompile.
    350 
    351 define float @fail1(float %f1, float %f2) {
    352 ; CHECK-LABEL: @fail1(
    353 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[F1:%.*]], 3.000000e+00
    354 ; CHECK-NEXT:    [[TMP2:%.*]] = fadd fast float [[TMP1]], -3.000000e+00
    355 ; CHECK-NEXT:    ret float [[TMP2]]
    356 ;
    357   %conv3 = fadd fast float %f1, -1.000000e+00
    358   %add = fadd fast float %conv3, %conv3
    359   %add2 = fadd fast float %add, %conv3
    360   ret float %add2
    361 }
    362 
    363 define double @fail2(double %f1, double %f2) {
    364 ; CHECK-LABEL: @fail2(
    365 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast double [[F2:%.*]], [[F2]]
    366 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub fast double -0.000000e+00, [[TMP1]]
    367 ; CHECK-NEXT:    ret double [[TMP2]]
    368 ;
    369   %t1 = fsub fast double %f1, %f2
    370   %t2 = fadd fast double %f1, %f2
    371   %t3 = fsub fast double %t1, %t2
    372   ret double %t3
    373 }
    374 
    375 ; c1 * x - x => (c1 - 1.0) * x
    376 define float @fold13(float %x) {
    377 ; CHECK-LABEL: @fold13(
    378 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast float [[X:%.*]], 6.000000e+00
    379 ; CHECK-NEXT:    ret float [[TMP1]]
    380 ;
    381   %mul = fmul fast float %x, 7.000000e+00
    382   %sub = fsub fast float %mul, %x
    383   ret float %sub
    384 }
    385 
    386 ; Check again using the minimal subset of FMF.
    387 define float @fold13_reassoc_nsz(float %x) {
    388 ; CHECK-LABEL: @fold13_reassoc_nsz(
    389 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc nsz float [[X:%.*]], 6.000000e+00
    390 ; CHECK-NEXT:    ret float [[TMP1]]
    391 ;
    392   %mul = fmul reassoc nsz float %x, 7.000000e+00
    393   %sub = fsub reassoc nsz float %mul, %x
    394   ret float %sub
    395 }
    396 
    397 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    398 define float @fold13_reassoc(float %x) {
    399 ; CHECK-LABEL: @fold13_reassoc(
    400 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], 7.000000e+00
    401 ; CHECK-NEXT:    [[TMP2:%.*]] = fsub reassoc float [[TMP1]], [[X]]
    402 ; CHECK-NEXT:    ret float [[TMP2]]
    403 ;
    404   %mul = fmul reassoc float %x, 7.000000e+00
    405   %sub = fsub reassoc float %mul, %x
    406   ret float %sub
    407 }
    408 
    409 ; (select X+Y, X-Y) => X + (select Y, -Y)
    410 ; This is always safe.  No FMF required.
    411 define float @fold16(float %x, float %y) {
    412 ; CHECK-LABEL: @fold16(
    413 ; CHECK-NEXT:    [[CMP:%.*]] = fcmp ogt float [[X:%.*]], [[Y:%.*]]
    414 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub float -0.000000e+00, [[Y]]
    415 ; CHECK-NEXT:    [[R_P:%.*]] = select i1 [[CMP]], float [[Y]], float [[TMP1]]
    416 ; CHECK-NEXT:    [[R:%.*]] = fadd float [[R_P]], [[X]]
    417 ; CHECK-NEXT:    ret float [[R]]
    418 ;
    419   %cmp = fcmp ogt float %x, %y
    420   %plus = fadd float %x, %y
    421   %minus = fsub float %x, %y
    422   %r = select i1 %cmp, float %plus, float %minus
    423   ret float %r
    424 }
    425 
    426 ; =========================================================================
    427 ;
    428 ;   Testing-cases about negation
    429 ;
    430 ; =========================================================================
    431 define float @fneg1(float %f1, float %f2) {
    432 ; CHECK-LABEL: @fneg1(
    433 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[F1:%.*]], [[F2:%.*]]
    434 ; CHECK-NEXT:    ret float [[MUL]]
    435 ;
    436   %sub = fsub float -0.000000e+00, %f1
    437   %sub1 = fsub nsz float 0.000000e+00, %f2
    438   %mul = fmul float %sub, %sub1
    439   ret float %mul
    440 }
    441 
    442 define float @fneg2(float %x) {
    443 ; CHECK-LABEL: @fneg2(
    444 ; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz float -0.000000e+00, [[X:%.*]]
    445 ; CHECK-NEXT:    ret float [[SUB]]
    446 ;
    447   %sub = fsub nsz float 0.0, %x
    448   ret float %sub
    449 }
    450 
    451 define <2 x float> @fneg2_vec_undef(<2 x float> %x) {
    452 ; CHECK-LABEL: @fneg2_vec_undef(
    453 ; CHECK-NEXT:    [[SUB:%.*]] = fsub nsz <2 x float> <float -0.000000e+00, float -0.000000e+00>, [[X:%.*]]
    454 ; CHECK-NEXT:    ret <2 x float> [[SUB]]
    455 ;
    456   %sub = fsub nsz <2 x float> <float undef, float 0.0>, %x
    457   ret <2 x float> %sub
    458 }
    459 
    460 ; =========================================================================
    461 ;
    462 ;   Testing-cases about div
    463 ;
    464 ; =========================================================================
    465 
    466 ; X/C1 / C2 => X * (1/(C2*C1))
    467 define float @fdiv1(float %x) {
    468 ; CHECK-LABEL: @fdiv1(
    469 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FD7303B60000000
    470 ; CHECK-NEXT:    ret float [[DIV1]]
    471 ;
    472   %div = fdiv float %x, 0x3FF3333340000000
    473   %div1 = fdiv fast float %div, 0x4002666660000000
    474   ret float %div1
    475 ; 0x3FF3333340000000 = 1.2f
    476 ; 0x4002666660000000 = 2.3f
    477 ; 0x3FD7303B60000000 = 0.36231884057971014492
    478 }
    479 
    480 ; X*C1 / C2 => X * (C1/C2)
    481 define float @fdiv2(float %x) {
    482 ; CHECK-LABEL: @fdiv2(
    483 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[X:%.*]], 0x3FE0B21660000000
    484 ; CHECK-NEXT:    ret float [[DIV1]]
    485 ;
    486   %mul = fmul float %x, 0x3FF3333340000000
    487   %div1 = fdiv fast float %mul, 0x4002666660000000
    488   ret float %div1
    489 
    490 ; 0x3FF3333340000000 = 1.2f
    491 ; 0x4002666660000000 = 2.3f
    492 ; 0x3FE0B21660000000 = 0.52173918485641479492
    493 }
    494 
    495 define <2 x float> @fdiv2_vec(<2 x float> %x) {
    496 ; CHECK-LABEL: @fdiv2_vec(
    497 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast <2 x float> [[X:%.*]], <float 3.000000e+00, float 3.000000e+00>
    498 ; CHECK-NEXT:    ret <2 x float> [[DIV1]]
    499 ;
    500   %mul = fmul <2 x float> %x, <float 6.0, float 9.0>
    501   %div1 = fdiv fast <2 x float> %mul, <float 2.0, float 3.0>
    502   ret <2 x float> %div1
    503 }
    504 
    505 ; "X/C1 / C2 => X * (1/(C2*C1))" is disabled (for now) is C2/C1 is a denormal
    506 ;
    507 define float @fdiv3(float %x) {
    508 ; CHECK-LABEL: @fdiv3(
    509 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[X:%.*]], 0x47EFFFFFE0000000
    510 ; CHECK-NEXT:    [[DIV1:%.*]] = fmul fast float [[DIV]], 0x3FDBD37A80000000
    511 ; CHECK-NEXT:    ret float [[DIV1]]
    512 ;
    513   %div = fdiv float %x, 0x47EFFFFFE0000000
    514   %div1 = fdiv fast float %div, 0x4002666660000000
    515   ret float %div1
    516 }
    517 
    518 ; "X*C1 / C2 => X * (C1/C2)" is disabled if C1/C2 is a denormal
    519 define float @fdiv4(float %x) {
    520 ; CHECK-LABEL: @fdiv4(
    521 ; CHECK-NEXT:    [[MUL:%.*]] = fmul float [[X:%.*]], 0x47EFFFFFE0000000
    522 ; CHECK-NEXT:    [[DIV:%.*]] = fdiv float [[MUL]], 0x3FC99999A0000000
    523 ; CHECK-NEXT:    ret float [[DIV]]
    524 ;
    525   %mul = fmul float %x, 0x47EFFFFFE0000000
    526   %div = fdiv float %mul, 0x3FC99999A0000000
    527   ret float %div
    528 }
    529 
    530 ; =========================================================================
    531 ;
    532 ;   Testing-cases about factorization
    533 ;
    534 ; =========================================================================
    535 ; x*z + y*z => (x+y) * z
    536 define float @fact_mul1(float %x, float %y, float %z) {
    537 ; CHECK-LABEL: @fact_mul1(
    538 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[X:%.*]], [[Y:%.*]]
    539 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
    540 ; CHECK-NEXT:    ret float [[TMP2]]
    541 ;
    542   %t1 = fmul fast float %x, %z
    543   %t2 = fmul fast float %y, %z
    544   %t3 = fadd fast float %t1, %t2
    545   ret float %t3
    546 }
    547 
    548 ; Check again using the minimal subset of FMF.
    549 define float @fact_mul1_reassoc_nsz(float %x, float %y, float %z) {
    550 ; CHECK-LABEL: @fact_mul1_reassoc_nsz(
    551 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[X:%.*]], [[Y:%.*]]
    552 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
    553 ; CHECK-NEXT:    ret float [[TMP2]]
    554 ;
    555   %t1 = fmul reassoc nsz float %x, %z
    556   %t2 = fmul reassoc nsz float %y, %z
    557   %t3 = fadd reassoc nsz float %t1, %t2
    558   ret float %t3
    559 }
    560 
    561 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    562 define float @fact_mul1_reassoc(float %x, float %y, float %z) {
    563 ; CHECK-LABEL: @fact_mul1_reassoc(
    564 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
    565 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]] [[Z]]
    566 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    567 ; CHECK-NEXT:    ret float [[TMP3]]
    568 ;
    569   %t1 = fmul reassoc float %x, %z
    570   %t2 = fmul reassoc float %y, %z
    571   %t3 = fadd reassoc float %t1, %t2
    572   ret float %t3
    573 }
    574 
    575 ; z*x + y*z => (x+y) * z
    576 define float @fact_mul2(float %x, float %y, float %z) {
    577 ; CHECK-LABEL: @fact_mul2(
    578 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
    579 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
    580 ; CHECK-NEXT:    ret float [[TMP2]]
    581 ;
    582   %t1 = fmul fast float %z, %x
    583   %t2 = fmul fast float %y, %z
    584   %t3 = fsub fast float %t1, %t2
    585   ret float %t3
    586 }
    587 
    588 ; Check again using the minimal subset of FMF.
    589 define float @fact_mul2_reassoc_nsz(float %x, float %y, float %z) {
    590 ; CHECK-LABEL: @fact_mul2_reassoc_nsz(
    591 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
    592 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
    593 ; CHECK-NEXT:    ret float [[TMP2]]
    594 ;
    595   %t1 = fmul reassoc nsz float %z, %x
    596   %t2 = fmul reassoc nsz float %y, %z
    597   %t3 = fsub reassoc nsz float %t1, %t2
    598   ret float %t3
    599 }
    600 
    601 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    602 define float @fact_mul2_reassoc(float %x, float %y, float %z) {
    603 ; CHECK-LABEL: @fact_mul2_reassoc(
    604 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[Z:%.*]], [[X:%.*]]
    605 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Y:%.*]], [[Z]]
    606 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
    607 ; CHECK-NEXT:    ret float [[TMP3]]
    608 ;
    609   %t1 = fmul reassoc float %z, %x
    610   %t2 = fmul reassoc float %y, %z
    611   %t3 = fsub reassoc float %t1, %t2
    612   ret float %t3
    613 }
    614 
    615 ; z*x - z*y => (x-y) * z
    616 define float @fact_mul3(float %x, float %y, float %z) {
    617 ; CHECK-LABEL: @fact_mul3(
    618 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
    619 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
    620 ; CHECK-NEXT:    ret float [[TMP2]]
    621 ;
    622   %t2 = fmul fast float %z, %y
    623   %t1 = fmul fast float %z, %x
    624   %t3 = fsub fast float %t1, %t2
    625   ret float %t3
    626 }
    627 
    628 ; Check again using the minimal subset of FMF.
    629 define float @fact_mul3_reassoc_nsz(float %x, float %y, float %z) {
    630 ; CHECK-LABEL: @fact_mul3_reassoc_nsz(
    631 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
    632 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
    633 ; CHECK-NEXT:    ret float [[TMP2]]
    634 ;
    635   %t2 = fmul reassoc nsz float %z, %y
    636   %t1 = fmul reassoc nsz float %z, %x
    637   %t3 = fsub reassoc nsz float %t1, %t2
    638   ret float %t3
    639 }
    640 
    641 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    642 define float @fact_mul3_reassoc(float %x, float %y, float %z) {
    643 ; CHECK-LABEL: @fact_mul3_reassoc(
    644 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Z:%.*]], [[Y:%.*]]
    645 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[Z]], [[X:%.*]]
    646 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
    647 ; CHECK-NEXT:    ret float [[TMP3]]
    648 ;
    649   %t2 = fmul reassoc float %z, %y
    650   %t1 = fmul reassoc float %z, %x
    651   %t3 = fsub reassoc float %t1, %t2
    652   ret float %t3
    653 }
    654 
    655 ; x*z - z*y => (x-y) * z
    656 define float @fact_mul4(float %x, float %y, float %z) {
    657 ; CHECK-LABEL: @fact_mul4(
    658 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[X:%.*]], [[Y:%.*]]
    659 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul fast float [[TMP1]], [[Z:%.*]]
    660 ; CHECK-NEXT:    ret float [[TMP2]]
    661 ;
    662   %t1 = fmul fast float %x, %z
    663   %t2 = fmul fast float %z, %y
    664   %t3 = fsub fast float %t1, %t2
    665   ret float %t3
    666 }
    667 
    668 ; Check again using the minimal subset of FMF.
    669 define float @fact_mul4_reassoc_nsz(float %x, float %y, float %z) {
    670 ; CHECK-LABEL: @fact_mul4_reassoc_nsz(
    671 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[X:%.*]], [[Y:%.*]]
    672 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc nsz float [[TMP1]], [[Z:%.*]]
    673 ; CHECK-NEXT:    ret float [[TMP2]]
    674 ;
    675   %t1 = fmul reassoc nsz float %x, %z
    676   %t2 = fmul reassoc nsz float %z, %y
    677   %t3 = fsub reassoc nsz float %t1, %t2
    678   ret float %t3
    679 }
    680 
    681 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    682 define float @fact_mul4_reassoc(float %x, float %y, float %z) {
    683 ; CHECK-LABEL: @fact_mul4_reassoc(
    684 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul reassoc float [[X:%.*]], [[Z:%.*]]
    685 ; CHECK-NEXT:    [[TMP2:%.*]] = fmul reassoc float [[Z]], [[Y:%.*]]
    686 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
    687 ; CHECK-NEXT:    ret float [[TMP3]]
    688 ;
    689   %t1 = fmul reassoc float %x, %z
    690   %t2 = fmul reassoc float %z, %y
    691   %t3 = fsub reassoc float %t1, %t2
    692   ret float %t3
    693 }
    694 
    695 ; x/y + x/z, no xform
    696 define float @fact_div1(float %x, float %y, float %z) {
    697 ; CHECK-LABEL: @fact_div1(
    698 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
    699 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[X]], [[Z:%.*]]
    700 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
    701 ; CHECK-NEXT:    ret float [[T3]]
    702 ;
    703   %t1 = fdiv fast float %x, %y
    704   %t2 = fdiv fast float %x, %z
    705   %t3 = fadd fast float %t1, %t2
    706   ret float %t3
    707 }
    708 
    709 ; x/y + z/x; no xform
    710 define float @fact_div2(float %x, float %y, float %z) {
    711 ; CHECK-LABEL: @fact_div2(
    712 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float [[X:%.*]], [[Y:%.*]]
    713 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float [[Z:%.*]], [[X]]
    714 ; CHECK-NEXT:    [[T3:%.*]] = fadd fast float [[T1]], [[T2]]
    715 ; CHECK-NEXT:    ret float [[T3]]
    716 ;
    717   %t1 = fdiv fast float %x, %y
    718   %t2 = fdiv fast float %z, %x
    719   %t3 = fadd fast float %t1, %t2
    720   ret float %t3
    721 }
    722 
    723 ; y/x + z/x => (y+z)/x
    724 define float @fact_div3(float %x, float %y, float %z) {
    725 ; CHECK-LABEL: @fact_div3(
    726 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd fast float [[Y:%.*]], [[Z:%.*]]
    727 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
    728 ; CHECK-NEXT:    ret float [[TMP2]]
    729 ;
    730   %t1 = fdiv fast float %y, %x
    731   %t2 = fdiv fast float %z, %x
    732   %t3 = fadd fast float %t1, %t2
    733   ret float %t3
    734 }
    735 
    736 ; Check again using the minimal subset of FMF.
    737 define float @fact_div3_reassoc_nsz(float %x, float %y, float %z) {
    738 ; CHECK-LABEL: @fact_div3_reassoc_nsz(
    739 ; CHECK-NEXT:    [[TMP1:%.*]] = fadd reassoc nsz float [[Y:%.*]], [[Z:%.*]]
    740 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
    741 ; CHECK-NEXT:    ret float [[TMP2]]
    742 ;
    743   %t1 = fdiv reassoc nsz float %y, %x
    744   %t2 = fdiv reassoc nsz float %z, %x
    745   %t3 = fadd reassoc nsz float %t1, %t2
    746   ret float %t3
    747 }
    748 
    749 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    750 define float @fact_div3_reassoc(float %x, float %y, float %z) {
    751 ; CHECK-LABEL: @fact_div3_reassoc(
    752 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
    753 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
    754 ; CHECK-NEXT:    [[TMP3:%.*]] = fadd reassoc float [[TMP1]], [[TMP2]]
    755 ; CHECK-NEXT:    ret float [[TMP3]]
    756 ;
    757   %t1 = fdiv reassoc float %y, %x
    758   %t2 = fdiv reassoc float %z, %x
    759   %t3 = fadd reassoc float %t1, %t2
    760   ret float %t3
    761 }
    762 
    763 ; y/x - z/x => (y-z)/x
    764 define float @fact_div4(float %x, float %y, float %z) {
    765 ; CHECK-LABEL: @fact_div4(
    766 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub fast float [[Y:%.*]], [[Z:%.*]]
    767 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv fast float [[TMP1]], [[X:%.*]]
    768 ; CHECK-NEXT:    ret float [[TMP2]]
    769 ;
    770   %t1 = fdiv fast float %y, %x
    771   %t2 = fdiv fast float %z, %x
    772   %t3 = fsub fast float %t1, %t2
    773   ret float %t3
    774 }
    775 
    776 ; Check again using the minimal subset of FMF.
    777 define float @fact_div4_reassoc_nsz(float %x, float %y, float %z) {
    778 ; CHECK-LABEL: @fact_div4_reassoc_nsz(
    779 ; CHECK-NEXT:    [[TMP1:%.*]] = fsub reassoc nsz float [[Y:%.*]], [[Z:%.*]]
    780 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc nsz float [[TMP1]], [[X:%.*]]
    781 ; CHECK-NEXT:    ret float [[TMP2]]
    782 ;
    783   %t1 = fdiv reassoc nsz float %y, %x
    784   %t2 = fdiv reassoc nsz float %z, %x
    785   %t3 = fsub reassoc nsz float %t1, %t2
    786   ret float %t3
    787 }
    788 
    789 ; Verify the fold is not done with only 'reassoc' ('nsz' is required).
    790 define float @fact_div4_reassoc(float %x, float %y, float %z) {
    791 ; CHECK-LABEL: @fact_div4_reassoc(
    792 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv reassoc float [[Y:%.*]], [[X:%.*]]
    793 ; CHECK-NEXT:    [[TMP2:%.*]] = fdiv reassoc float [[Z:%.*]], [[X]]
    794 ; CHECK-NEXT:    [[TMP3:%.*]] = fsub reassoc float [[TMP1]], [[TMP2]]
    795 ; CHECK-NEXT:    ret float [[TMP3]]
    796 ;
    797   %t1 = fdiv reassoc float %y, %x
    798   %t2 = fdiv reassoc float %z, %x
    799   %t3 = fsub reassoc float %t1, %t2
    800   ret float %t3
    801 }
    802 
    803 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
    804 define float @fact_div5(float %x) {
    805 ; CHECK-LABEL: @fact_div5(
    806 ; CHECK-NEXT:    [[TMP1:%.*]] = fdiv fast float 0x3818000000000000, [[X:%.*]]
    807 ; CHECK-NEXT:    ret float [[TMP1]]
    808 ;
    809   %t1 = fdiv fast float 0x3810000000000000, %x
    810   %t2 = fdiv fast float 0x3800000000000000, %x
    811   %t3 = fadd fast float %t1, %t2
    812   ret float %t3
    813 }
    814 
    815 ; y/x - z/x => (y-z)/x is disabled if y-z is denormal.
    816 define float @fact_div6(float %x) {
    817 ; CHECK-LABEL: @fact_div6(
    818 ; CHECK-NEXT:    [[T1:%.*]] = fdiv fast float 0x3810000000000000, [[X:%.*]]
    819 ; CHECK-NEXT:    [[T2:%.*]] = fdiv fast float 0x3800000000000000, [[X]]
    820 ; CHECK-NEXT:    [[T3:%.*]] = fsub fast float [[T1]], [[T2]]
    821 ; CHECK-NEXT:    ret float [[T3]]
    822 ;
    823   %t1 = fdiv fast float 0x3810000000000000, %x
    824   %t2 = fdiv fast float 0x3800000000000000, %x
    825   %t3 = fsub fast float %t1, %t2
    826   ret float %t3
    827 }
    828 
    829 ; =========================================================================
    830 ;
    831 ;   Test-cases for square root
    832 ;
    833 ; =========================================================================
    834 
    835 ; A squared factor fed into a square root intrinsic should be hoisted out
    836 ; as a fabs() value.
    837 
    838 declare double @llvm.sqrt.f64(double)
    839 
    840 define double @sqrt_intrinsic_arg_squared(double %x) {
    841 ; CHECK-LABEL: @sqrt_intrinsic_arg_squared(
    842 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    843 ; CHECK-NEXT:    ret double [[FABS]]
    844 ;
    845   %mul = fmul fast double %x, %x
    846   %sqrt = call fast double @llvm.sqrt.f64(double %mul)
    847   ret double %sqrt
    848 }
    849 
    850 ; Check all 6 combinations of a 3-way multiplication tree where
    851 ; one factor is repeated.
    852 
    853 define double @sqrt_intrinsic_three_args1(double %x, double %y) {
    854 ; CHECK-LABEL: @sqrt_intrinsic_three_args1(
    855 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    856 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    857 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    858 ; CHECK-NEXT:    ret double [[TMP1]]
    859 ;
    860   %mul = fmul fast double %y, %x
    861   %mul2 = fmul fast double %mul, %x
    862   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    863   ret double %sqrt
    864 }
    865 
    866 define double @sqrt_intrinsic_three_args2(double %x, double %y) {
    867 ; CHECK-LABEL: @sqrt_intrinsic_three_args2(
    868 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    869 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    870 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    871 ; CHECK-NEXT:    ret double [[TMP1]]
    872 ;
    873   %mul = fmul fast double %x, %y
    874   %mul2 = fmul fast double %mul, %x
    875   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    876   ret double %sqrt
    877 }
    878 
    879 define double @sqrt_intrinsic_three_args3(double %x, double %y) {
    880 ; CHECK-LABEL: @sqrt_intrinsic_three_args3(
    881 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    882 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    883 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    884 ; CHECK-NEXT:    ret double [[TMP1]]
    885 ;
    886   %mul = fmul fast double %x, %x
    887   %mul2 = fmul fast double %mul, %y
    888   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    889   ret double %sqrt
    890 }
    891 
    892 define double @sqrt_intrinsic_three_args4(double %x, double %y) {
    893 ; CHECK-LABEL: @sqrt_intrinsic_three_args4(
    894 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    895 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    896 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    897 ; CHECK-NEXT:    ret double [[TMP1]]
    898 ;
    899   %mul = fmul fast double %y, %x
    900   %mul2 = fmul fast double %x, %mul
    901   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    902   ret double %sqrt
    903 }
    904 
    905 define double @sqrt_intrinsic_three_args5(double %x, double %y) {
    906 ; CHECK-LABEL: @sqrt_intrinsic_three_args5(
    907 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    908 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    909 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    910 ; CHECK-NEXT:    ret double [[TMP1]]
    911 ;
    912   %mul = fmul fast double %x, %y
    913   %mul2 = fmul fast double %x, %mul
    914   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    915   ret double %sqrt
    916 }
    917 
    918 define double @sqrt_intrinsic_three_args6(double %x, double %y) {
    919 ; CHECK-LABEL: @sqrt_intrinsic_three_args6(
    920 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    921 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[Y:%.*]])
    922 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[FABS]], [[SQRT1]]
    923 ; CHECK-NEXT:    ret double [[TMP1]]
    924 ;
    925   %mul = fmul fast double %x, %x
    926   %mul2 = fmul fast double %y, %mul
    927   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    928   ret double %sqrt
    929 }
    930 
    931 ; If any operation is not 'fast', we can't simplify.
    932 
    933 define double @sqrt_intrinsic_not_so_fast(double %x, double %y) {
    934 ; CHECK-LABEL: @sqrt_intrinsic_not_so_fast(
    935 ; CHECK-NEXT:    [[MUL:%.*]] = fmul double [[X:%.*]], [[X]]
    936 ; CHECK-NEXT:    [[MUL2:%.*]] = fmul fast double [[MUL]], [[Y:%.*]]
    937 ; CHECK-NEXT:    [[SQRT:%.*]] = call fast double @llvm.sqrt.f64(double [[MUL2]])
    938 ; CHECK-NEXT:    ret double [[SQRT]]
    939 ;
    940   %mul = fmul double %x, %x
    941   %mul2 = fmul fast double %mul, %y
    942   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    943   ret double %sqrt
    944 }
    945 
    946 define double @sqrt_intrinsic_arg_4th(double %x) {
    947 ; CHECK-LABEL: @sqrt_intrinsic_arg_4th(
    948 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
    949 ; CHECK-NEXT:    ret double [[MUL]]
    950 ;
    951   %mul = fmul fast double %x, %x
    952   %mul2 = fmul fast double %mul, %mul
    953   %sqrt = call fast double @llvm.sqrt.f64(double %mul2)
    954   ret double %sqrt
    955 }
    956 
    957 define double @sqrt_intrinsic_arg_5th(double %x) {
    958 ; CHECK-LABEL: @sqrt_intrinsic_arg_5th(
    959 ; CHECK-NEXT:    [[MUL:%.*]] = fmul fast double [[X:%.*]], [[X]]
    960 ; CHECK-NEXT:    [[SQRT1:%.*]] = call fast double @llvm.sqrt.f64(double [[X]])
    961 ; CHECK-NEXT:    [[TMP1:%.*]] = fmul fast double [[MUL]], [[SQRT1]]
    962 ; CHECK-NEXT:    ret double [[TMP1]]
    963 ;
    964   %mul = fmul fast double %x, %x
    965   %mul2 = fmul fast double %mul, %x
    966   %mul3 = fmul fast double %mul2, %mul
    967   %sqrt = call fast double @llvm.sqrt.f64(double %mul3)
    968   ret double %sqrt
    969 }
    970 
    971 ; Check that square root calls have the same behavior.
    972 
    973 declare float @sqrtf(float)
    974 declare double @sqrt(double)
    975 declare fp128 @sqrtl(fp128)
    976 
    977 define float @sqrt_call_squared_f32(float %x) {
    978 ; CHECK-LABEL: @sqrt_call_squared_f32(
    979 ; CHECK-NEXT:    [[FABS:%.*]] = call fast float @llvm.fabs.f32(float [[X:%.*]])
    980 ; CHECK-NEXT:    ret float [[FABS]]
    981 ;
    982   %mul = fmul fast float %x, %x
    983   %sqrt = call fast float @sqrtf(float %mul)
    984   ret float %sqrt
    985 }
    986 
    987 define double @sqrt_call_squared_f64(double %x) {
    988 ; CHECK-LABEL: @sqrt_call_squared_f64(
    989 ; CHECK-NEXT:    [[FABS:%.*]] = call fast double @llvm.fabs.f64(double [[X:%.*]])
    990 ; CHECK-NEXT:    ret double [[FABS]]
    991 ;
    992   %mul = fmul fast double %x, %x
    993   %sqrt = call fast double @sqrt(double %mul)
    994   ret double %sqrt
    995 }
    996 
    997 define fp128 @sqrt_call_squared_f128(fp128 %x) {
    998 ; CHECK-LABEL: @sqrt_call_squared_f128(
    999 ; CHECK-NEXT:    [[FABS:%.*]] = call fast fp128 @llvm.fabs.f128(fp128 [[X:%.*]])
   1000 ; CHECK-NEXT:    ret fp128 [[FABS]]
   1001 ;
   1002   %mul = fmul fast fp128 %x, %x
   1003   %sqrt = call fast fp128 @sqrtl(fp128 %mul)
   1004   ret fp128 %sqrt
   1005 }
   1006 
   1007 ; =========================================================================
   1008 ;
   1009 ;   Test-cases for fmin / fmax
   1010 ;
   1011 ; =========================================================================
   1012 
   1013 declare double @fmax(double, double)
   1014 declare double @fmin(double, double)
   1015 declare float @fmaxf(float, float)
   1016 declare float @fminf(float, float)
   1017 declare fp128 @fmaxl(fp128, fp128)
   1018 declare fp128 @fminl(fp128, fp128)
   1019 
   1020 ; No NaNs is the minimum requirement to replace these calls.
   1021 ; This should always be set when unsafe-fp-math is true, but
   1022 ; alternate the attributes for additional test coverage.
   1023 ; 'nsz' is implied by the definition of fmax or fmin itself.
   1024 
   1025 ; Shrink and remove the call.
   1026 define float @max1(float %a, float %b) {
   1027 ; CHECK-LABEL: @max1(
   1028 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt float [[A:%.*]], [[B:%.*]]
   1029 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
   1030 ; CHECK-NEXT:    ret float [[TMP2]]
   1031 ;
   1032   %c = fpext float %a to double
   1033   %d = fpext float %b to double
   1034   %e = call fast double @fmax(double %c, double %d)
   1035   %f = fptrunc double %e to float
   1036   ret float %f
   1037 }
   1038 
   1039 define float @max2(float %a, float %b) {
   1040 ; CHECK-LABEL: @max2(
   1041 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt float [[A:%.*]], [[B:%.*]]
   1042 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
   1043 ; CHECK-NEXT:    ret float [[TMP2]]
   1044 ;
   1045   %c = call nnan float @fmaxf(float %a, float %b)
   1046   ret float %c
   1047 }
   1048 
   1049 
   1050 define double @max3(double %a, double %b) {
   1051 ; CHECK-LABEL: @max3(
   1052 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast ogt double [[A:%.*]], [[B:%.*]]
   1053 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
   1054 ; CHECK-NEXT:    ret double [[TMP2]]
   1055 ;
   1056   %c = call fast double @fmax(double %a, double %b)
   1057   ret double %c
   1058 }
   1059 
   1060 define fp128 @max4(fp128 %a, fp128 %b) {
   1061 ; CHECK-LABEL: @max4(
   1062 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz ogt fp128 [[A:%.*]], [[B:%.*]]
   1063 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
   1064 ; CHECK-NEXT:    ret fp128 [[TMP2]]
   1065 ;
   1066   %c = call nnan fp128 @fmaxl(fp128 %a, fp128 %b)
   1067   ret fp128 %c
   1068 }
   1069 
   1070 ; Shrink and remove the call.
   1071 define float @min1(float %a, float %b) {
   1072 ; CHECK-LABEL: @min1(
   1073 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt float [[A:%.*]], [[B:%.*]]
   1074 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
   1075 ; CHECK-NEXT:    ret float [[TMP2]]
   1076 ;
   1077   %c = fpext float %a to double
   1078   %d = fpext float %b to double
   1079   %e = call nnan double @fmin(double %c, double %d)
   1080   %f = fptrunc double %e to float
   1081   ret float %f
   1082 }
   1083 
   1084 define float @min2(float %a, float %b) {
   1085 ; CHECK-LABEL: @min2(
   1086 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt float [[A:%.*]], [[B:%.*]]
   1087 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], float [[A]], float [[B]]
   1088 ; CHECK-NEXT:    ret float [[TMP2]]
   1089 ;
   1090   %c = call fast float @fminf(float %a, float %b)
   1091   ret float %c
   1092 }
   1093 
   1094 define double @min3(double %a, double %b) {
   1095 ; CHECK-LABEL: @min3(
   1096 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp nnan nsz olt double [[A:%.*]], [[B:%.*]]
   1097 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], double [[A]], double [[B]]
   1098 ; CHECK-NEXT:    ret double [[TMP2]]
   1099 ;
   1100   %c = call nnan double @fmin(double %a, double %b)
   1101   ret double %c
   1102 }
   1103 
   1104 define fp128 @min4(fp128 %a, fp128 %b) {
   1105 ; CHECK-LABEL: @min4(
   1106 ; CHECK-NEXT:    [[TMP1:%.*]] = fcmp fast olt fp128 [[A:%.*]], [[B:%.*]]
   1107 ; CHECK-NEXT:    [[TMP2:%.*]] = select i1 [[TMP1]], fp128 [[A]], fp128 [[B]]
   1108 ; CHECK-NEXT:    ret fp128 [[TMP2]]
   1109 ;
   1110   %c = call fast fp128 @fminl(fp128 %a, fp128 %b)
   1111   ret fp128 %c
   1112 }
   1113 
   1114 ; ((which ? 2.0 : a) + 1.0) => (which ? 3.0 : (a + 1.0))
   1115 ; This is always safe.  No FMF required.
   1116 define float @test55(i1 %which, float %a) {
   1117 ; CHECK-LABEL: @test55(
   1118 ; CHECK-NEXT:  entry:
   1119 ; CHECK-NEXT:    br i1 [[WHICH:%.*]], label [[FINAL:%.*]], label [[DELAY:%.*]]
   1120 ; CHECK:       delay:
   1121 ; CHECK-NEXT:    [[PHITMP:%.*]] = fadd float [[A:%.*]], 1.000000e+00
   1122 ; CHECK-NEXT:    br label [[FINAL]]
   1123 ; CHECK:       final:
   1124 ; CHECK-NEXT:    [[A:%.*]] = phi float [ 3.000000e+00, [[ENTRY:%.*]] ], [ [[PHITMP]], [[DELAY]] ]
   1125 ; CHECK-NEXT:    ret float [[A]]
   1126 ;
   1127 entry:
   1128   br i1 %which, label %final, label %delay
   1129 
   1130 delay:
   1131   br label %final
   1132 
   1133 final:
   1134   %A = phi float [ 2.0, %entry ], [ %a, %delay ]
   1135   %value = fadd float %A, 1.0
   1136   ret float %value
   1137 }
   1138