1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py 2 ; REQUIRES: asserts 3 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 | FileCheck %s --check-prefix=FMFDEBUG 4 ; RUN: llc < %s -mtriple=powerpc64le | FileCheck %s --check-prefix=FMF 5 ; RUN: llc < %s -mtriple=powerpc64le -debug-only=isel -o /dev/null 2>&1 -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBALDEBUG 6 ; RUN: llc < %s -mtriple=powerpc64le -enable-unsafe-fp-math -enable-no-nans-fp-math | FileCheck %s --check-prefix=GLOBAL 7 8 ; Test FP transforms using instruction/node-level fast-math-flags. 9 ; We're also checking debug output to verify that FMF is propagated to the newly created nodes. 10 ; The run with the global unsafe param tests the pre-FMF behavior using regular instructions/nodes. 11 12 declare float @llvm.fma.f32(float, float, float) 13 declare float @llvm.sqrt.f32(float) 14 15 ; X * Y + Z --> fma(X, Y, Z) 16 17 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract1:' 18 ; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 19 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract1:' 20 21 define float @fmul_fadd_contract1(float %x, float %y, float %z) { 22 ; FMF-LABEL: fmul_fadd_contract1: 23 ; FMF: # %bb.0: 24 ; FMF-NEXT: xsmaddasp 3, 1, 2 25 ; FMF-NEXT: fmr 1, 3 26 ; FMF-NEXT: blr 27 ; 28 ; GLOBAL-LABEL: fmul_fadd_contract1: 29 ; GLOBAL: # %bb.0: 30 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 31 ; GLOBAL-NEXT: fmr 1, 3 32 ; GLOBAL-NEXT: blr 33 %mul = fmul float %x, %y 34 %add = fadd contract float %mul, %z 35 ret float %add 36 } 37 38 ; This shouldn't change anything - the intermediate fmul result is now also flagged. 39 40 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_contract2:' 41 ; FMFDEBUG: fma contract {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 42 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_contract2:' 43 44 define float @fmul_fadd_contract2(float %x, float %y, float %z) { 45 ; FMF-LABEL: fmul_fadd_contract2: 46 ; FMF: # %bb.0: 47 ; FMF-NEXT: xsmaddasp 3, 1, 2 48 ; FMF-NEXT: fmr 1, 3 49 ; FMF-NEXT: blr 50 ; 51 ; GLOBAL-LABEL: fmul_fadd_contract2: 52 ; GLOBAL: # %bb.0: 53 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 54 ; GLOBAL-NEXT: fmr 1, 3 55 ; GLOBAL-NEXT: blr 56 %mul = fmul contract float %x, %y 57 %add = fadd contract float %mul, %z 58 ret float %add 59 } 60 61 ; Reassociation implies that FMA contraction is allowed. 62 63 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc1:' 64 ; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 65 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc1:' 66 67 define float @fmul_fadd_reassoc1(float %x, float %y, float %z) { 68 ; FMF-LABEL: fmul_fadd_reassoc1: 69 ; FMF: # %bb.0: 70 ; FMF-NEXT: xsmaddasp 3, 1, 2 71 ; FMF-NEXT: fmr 1, 3 72 ; FMF-NEXT: blr 73 ; 74 ; GLOBAL-LABEL: fmul_fadd_reassoc1: 75 ; GLOBAL: # %bb.0: 76 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 77 ; GLOBAL-NEXT: fmr 1, 3 78 ; GLOBAL-NEXT: blr 79 %mul = fmul float %x, %y 80 %add = fadd reassoc float %mul, %z 81 ret float %add 82 } 83 84 ; This shouldn't change anything - the intermediate fmul result is now also flagged. 85 86 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_reassoc2:' 87 ; FMFDEBUG: fma reassoc {{t[0-9]+}}, {{t[0-9]+}} 88 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_reassoc2:' 89 90 define float @fmul_fadd_reassoc2(float %x, float %y, float %z) { 91 ; FMF-LABEL: fmul_fadd_reassoc2: 92 ; FMF: # %bb.0: 93 ; FMF-NEXT: xsmaddasp 3, 1, 2 94 ; FMF-NEXT: fmr 1, 3 95 ; FMF-NEXT: blr 96 ; 97 ; GLOBAL-LABEL: fmul_fadd_reassoc2: 98 ; GLOBAL: # %bb.0: 99 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 100 ; GLOBAL-NEXT: fmr 1, 3 101 ; GLOBAL-NEXT: blr 102 %mul = fmul reassoc float %x, %y 103 %add = fadd reassoc float %mul, %z 104 ret float %add 105 } 106 107 ; The fadd is now fully 'fast'. This implies that contraction is allowed. 108 109 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast1:' 110 ; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 111 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast1:' 112 113 define float @fmul_fadd_fast1(float %x, float %y, float %z) { 114 ; FMF-LABEL: fmul_fadd_fast1: 115 ; FMF: # %bb.0: 116 ; FMF-NEXT: xsmaddasp 3, 1, 2 117 ; FMF-NEXT: fmr 1, 3 118 ; FMF-NEXT: blr 119 ; 120 ; GLOBAL-LABEL: fmul_fadd_fast1: 121 ; GLOBAL: # %bb.0: 122 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 123 ; GLOBAL-NEXT: fmr 1, 3 124 ; GLOBAL-NEXT: blr 125 %mul = fmul fast float %x, %y 126 %add = fadd fast float %mul, %z 127 ret float %add 128 } 129 130 ; This shouldn't change anything - the intermediate fmul result is now also flagged. 131 132 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fadd_fast2:' 133 ; FMFDEBUG: fma nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}}, {{t[0-9]+}}, {{t[0-9]+}} 134 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fadd_fast2:' 135 136 define float @fmul_fadd_fast2(float %x, float %y, float %z) { 137 ; FMF-LABEL: fmul_fadd_fast2: 138 ; FMF: # %bb.0: 139 ; FMF-NEXT: xsmaddasp 3, 1, 2 140 ; FMF-NEXT: fmr 1, 3 141 ; FMF-NEXT: blr 142 ; 143 ; GLOBAL-LABEL: fmul_fadd_fast2: 144 ; GLOBAL: # %bb.0: 145 ; GLOBAL-NEXT: xsmaddasp 3, 1, 2 146 ; GLOBAL-NEXT: fmr 1, 3 147 ; GLOBAL-NEXT: blr 148 %mul = fmul fast float %x, %y 149 %add = fadd fast float %mul, %z 150 ret float %add 151 } 152 153 ; fma(X, 7.0, X * 42.0) --> X * 49.0 154 ; This is the minimum FMF needed for this transform - the FMA allows reassociation. 155 156 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 157 ; FMFDEBUG: fmul reassoc {{t[0-9]+}}, 158 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 159 160 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc1:' 161 ; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 162 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc1:' 163 164 define float @fmul_fma_reassoc1(float %x) { 165 ; FMF-LABEL: fmul_fma_reassoc1: 166 ; FMF: # %bb.0: 167 ; FMF-NEXT: addis 3, 2, .LCPI6_0@toc@ha 168 ; FMF-NEXT: addi 3, 3, .LCPI6_0@toc@l 169 ; FMF-NEXT: lfsx 0, 0, 3 170 ; FMF-NEXT: xsmulsp 1, 1, 0 171 ; FMF-NEXT: blr 172 ; 173 ; GLOBAL-LABEL: fmul_fma_reassoc1: 174 ; GLOBAL: # %bb.0: 175 ; GLOBAL-NEXT: addis 3, 2, .LCPI6_0@toc@ha 176 ; GLOBAL-NEXT: addi 3, 3, .LCPI6_0@toc@l 177 ; GLOBAL-NEXT: lfsx 0, 0, 3 178 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 179 ; GLOBAL-NEXT: blr 180 %mul = fmul float %x, 42.0 181 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 182 ret float %fma 183 } 184 185 ; This shouldn't change anything - the intermediate fmul result is now also flagged. 186 187 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 188 ; FMFDEBUG: fmul reassoc {{t[0-9]+}} 189 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 190 191 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_reassoc2:' 192 ; GLOBALDEBUG: fmul reassoc {{t[0-9]+}} 193 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_reassoc2:' 194 195 define float @fmul_fma_reassoc2(float %x) { 196 ; FMF-LABEL: fmul_fma_reassoc2: 197 ; FMF: # %bb.0: 198 ; FMF-NEXT: addis 3, 2, .LCPI7_0@toc@ha 199 ; FMF-NEXT: addi 3, 3, .LCPI7_0@toc@l 200 ; FMF-NEXT: lfsx 0, 0, 3 201 ; FMF-NEXT: xsmulsp 1, 1, 0 202 ; FMF-NEXT: blr 203 ; 204 ; GLOBAL-LABEL: fmul_fma_reassoc2: 205 ; GLOBAL: # %bb.0: 206 ; GLOBAL-NEXT: addis 3, 2, .LCPI7_0@toc@ha 207 ; GLOBAL-NEXT: addi 3, 3, .LCPI7_0@toc@l 208 ; GLOBAL-NEXT: lfsx 0, 0, 3 209 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 210 ; GLOBAL-NEXT: blr 211 %mul = fmul reassoc float %x, 42.0 212 %fma = call reassoc float @llvm.fma.f32(float %x, float 7.0, float %mul) 213 ret float %fma 214 } 215 216 ; The FMA is now fully 'fast'. This implies that reassociation is allowed. 217 218 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 219 ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 220 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 221 222 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast1:' 223 ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 224 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast1:' 225 226 define float @fmul_fma_fast1(float %x) { 227 ; FMF-LABEL: fmul_fma_fast1: 228 ; FMF: # %bb.0: 229 ; FMF-NEXT: addis 3, 2, .LCPI8_0@toc@ha 230 ; FMF-NEXT: addi 3, 3, .LCPI8_0@toc@l 231 ; FMF-NEXT: lfsx 0, 0, 3 232 ; FMF-NEXT: xsmulsp 1, 1, 0 233 ; FMF-NEXT: blr 234 ; 235 ; GLOBAL-LABEL: fmul_fma_fast1: 236 ; GLOBAL: # %bb.0: 237 ; GLOBAL-NEXT: addis 3, 2, .LCPI8_0@toc@ha 238 ; GLOBAL-NEXT: addi 3, 3, .LCPI8_0@toc@l 239 ; GLOBAL-NEXT: lfsx 0, 0, 3 240 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 241 ; GLOBAL-NEXT: blr 242 %mul = fmul float %x, 42.0 243 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) 244 ret float %fma 245 } 246 247 ; This shouldn't change anything - the intermediate fmul result is now also flagged. 248 249 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 250 ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 251 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 252 253 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fmul_fma_fast2:' 254 ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 255 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fmul_fma_fast2:' 256 257 define float @fmul_fma_fast2(float %x) { 258 ; FMF-LABEL: fmul_fma_fast2: 259 ; FMF: # %bb.0: 260 ; FMF-NEXT: addis 3, 2, .LCPI9_0@toc@ha 261 ; FMF-NEXT: addi 3, 3, .LCPI9_0@toc@l 262 ; FMF-NEXT: lfsx 0, 0, 3 263 ; FMF-NEXT: xsmulsp 1, 1, 0 264 ; FMF-NEXT: blr 265 ; 266 ; GLOBAL-LABEL: fmul_fma_fast2: 267 ; GLOBAL: # %bb.0: 268 ; GLOBAL-NEXT: addis 3, 2, .LCPI9_0@toc@ha 269 ; GLOBAL-NEXT: addi 3, 3, .LCPI9_0@toc@l 270 ; GLOBAL-NEXT: lfsx 0, 0, 3 271 ; GLOBAL-NEXT: xsmulsp 1, 1, 0 272 ; GLOBAL-NEXT: blr 273 %mul = fmul fast float %x, 42.0 274 %fma = call fast float @llvm.fma.f32(float %x, float 7.0, float %mul) 275 ret float %fma 276 } 277 278 ; Reduced precision for sqrt is allowed - should use estimate and NR iterations. 279 280 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' 281 ; FMFDEBUG: fmul afn {{t[0-9]+}} 282 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' 283 284 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_afn:' 285 ; GLOBALDEBUG: fmul afn {{t[0-9]+}} 286 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_afn:' 287 288 define float @sqrt_afn(float %x) { 289 ; FMF-LABEL: sqrt_afn: 290 ; FMF: # %bb.0: 291 ; FMF-NEXT: xxlxor 0, 0, 0 292 ; FMF-NEXT: fcmpu 0, 1, 0 293 ; FMF-NEXT: beq 0, .LBB10_2 294 ; FMF-NEXT: # %bb.1: 295 ; FMF-NEXT: addis 3, 2, .LCPI10_0@toc@ha 296 ; FMF-NEXT: xsrsqrtesp 3, 1 297 ; FMF-NEXT: addi 3, 3, .LCPI10_0@toc@l 298 ; FMF-NEXT: lfsx 0, 0, 3 299 ; FMF-NEXT: xsmulsp 2, 1, 0 300 ; FMF-NEXT: xsmulsp 4, 3, 3 301 ; FMF-NEXT: xssubsp 2, 2, 1 302 ; FMF-NEXT: xsmulsp 2, 2, 4 303 ; FMF-NEXT: xssubsp 0, 0, 2 304 ; FMF-NEXT: xsmulsp 0, 3, 0 305 ; FMF-NEXT: xsmulsp 0, 0, 1 306 ; FMF-NEXT: .LBB10_2: 307 ; FMF-NEXT: fmr 1, 0 308 ; FMF-NEXT: blr 309 ; 310 ; GLOBAL-LABEL: sqrt_afn: 311 ; GLOBAL: # %bb.0: 312 ; GLOBAL-NEXT: xxlxor 0, 0, 0 313 ; GLOBAL-NEXT: fcmpu 0, 1, 0 314 ; GLOBAL-NEXT: beq 0, .LBB10_2 315 ; GLOBAL-NEXT: # %bb.1: 316 ; GLOBAL-NEXT: xsrsqrtesp 2, 1 317 ; GLOBAL-NEXT: fneg 0, 1 318 ; GLOBAL-NEXT: addis 3, 2, .LCPI10_0@toc@ha 319 ; GLOBAL-NEXT: fmr 4, 1 320 ; GLOBAL-NEXT: addi 3, 3, .LCPI10_0@toc@l 321 ; GLOBAL-NEXT: lfsx 3, 0, 3 322 ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 323 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 324 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 325 ; GLOBAL-NEXT: xsmulsp 0, 2, 3 326 ; GLOBAL-NEXT: xsmulsp 0, 0, 1 327 ; GLOBAL-NEXT: .LBB10_2: 328 ; GLOBAL-NEXT: fmr 1, 0 329 ; GLOBAL-NEXT: blr 330 %rt = call afn float @llvm.sqrt.f32(float %x) 331 ret float %rt 332 } 333 334 ; The call is now fully 'fast'. This implies that approximation is allowed. 335 336 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' 337 ; FMFDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 338 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' 339 340 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'sqrt_fast:' 341 ; GLOBALDEBUG: fmul nnan ninf nsz arcp contract afn reassoc {{t[0-9]+}} 342 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'sqrt_fast:' 343 344 define float @sqrt_fast(float %x) { 345 ; FMF-LABEL: sqrt_fast: 346 ; FMF: # %bb.0: 347 ; FMF-NEXT: xxlxor 0, 0, 0 348 ; FMF-NEXT: fcmpu 0, 1, 0 349 ; FMF-NEXT: beq 0, .LBB11_2 350 ; FMF-NEXT: # %bb.1: 351 ; FMF-NEXT: xsrsqrtesp 2, 1 352 ; FMF-NEXT: fneg 0, 1 353 ; FMF-NEXT: addis 3, 2, .LCPI11_0@toc@ha 354 ; FMF-NEXT: fmr 4, 1 355 ; FMF-NEXT: addi 3, 3, .LCPI11_0@toc@l 356 ; FMF-NEXT: lfsx 3, 0, 3 357 ; FMF-NEXT: xsmaddasp 4, 0, 3 358 ; FMF-NEXT: xsmulsp 0, 2, 2 359 ; FMF-NEXT: xsmaddasp 3, 4, 0 360 ; FMF-NEXT: xsmulsp 0, 2, 3 361 ; FMF-NEXT: xsmulsp 0, 0, 1 362 ; FMF-NEXT: .LBB11_2: 363 ; FMF-NEXT: fmr 1, 0 364 ; FMF-NEXT: blr 365 ; 366 ; GLOBAL-LABEL: sqrt_fast: 367 ; GLOBAL: # %bb.0: 368 ; GLOBAL-NEXT: xxlxor 0, 0, 0 369 ; GLOBAL-NEXT: fcmpu 0, 1, 0 370 ; GLOBAL-NEXT: beq 0, .LBB11_2 371 ; GLOBAL-NEXT: # %bb.1: 372 ; GLOBAL-NEXT: xsrsqrtesp 2, 1 373 ; GLOBAL-NEXT: fneg 0, 1 374 ; GLOBAL-NEXT: addis 3, 2, .LCPI11_0@toc@ha 375 ; GLOBAL-NEXT: fmr 4, 1 376 ; GLOBAL-NEXT: addi 3, 3, .LCPI11_0@toc@l 377 ; GLOBAL-NEXT: lfsx 3, 0, 3 378 ; GLOBAL-NEXT: xsmaddasp 4, 0, 3 379 ; GLOBAL-NEXT: xsmulsp 0, 2, 2 380 ; GLOBAL-NEXT: xsmaddasp 3, 4, 0 381 ; GLOBAL-NEXT: xsmulsp 0, 2, 3 382 ; GLOBAL-NEXT: xsmulsp 0, 0, 1 383 ; GLOBAL-NEXT: .LBB11_2: 384 ; GLOBAL-NEXT: fmr 1, 0 385 ; GLOBAL-NEXT: blr 386 %rt = call fast float @llvm.sqrt.f32(float %x) 387 ret float %rt 388 } 389 390 ; fcmp can have fast-math-flags. 391 392 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 393 ; FMFDEBUG: select_cc {{t[0-9]+}} 394 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 395 396 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fcmp_nnan:' 397 ; GLOBALDEBUG: select_cc {{t[0-9]+}} 398 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fcmp_nnan:' 399 400 define double @fcmp_nnan(double %a, double %y, double %z) { 401 ; FMF-LABEL: fcmp_nnan: 402 ; FMF: # %bb.0: 403 ; FMF-NEXT: xxlxor 0, 0, 0 404 ; FMF-NEXT: xscmpudp 0, 1, 0 405 ; FMF-NEXT: blt 0, .LBB12_2 406 ; FMF-NEXT: # %bb.1: 407 ; FMF-NEXT: fmr 3, 2 408 ; FMF-NEXT: .LBB12_2: 409 ; FMF-NEXT: fmr 1, 3 410 ; FMF-NEXT: blr 411 ; 412 ; GLOBAL-LABEL: fcmp_nnan: 413 ; GLOBAL: # %bb.0: 414 ; GLOBAL-NEXT: xxlxor 0, 0, 0 415 ; GLOBAL-NEXT: xscmpudp 0, 1, 0 416 ; GLOBAL-NEXT: blt 0, .LBB12_2 417 ; GLOBAL-NEXT: # %bb.1: 418 ; GLOBAL-NEXT: fmr 3, 2 419 ; GLOBAL-NEXT: .LBB12_2: 420 ; GLOBAL-NEXT: fmr 1, 3 421 ; GLOBAL-NEXT: blr 422 %cmp = fcmp nnan ult double %a, 0.0 423 %z.y = select i1 %cmp, double %z, double %y 424 ret double %z.y 425 } 426 427 ; FP library calls can have fast-math-flags. 428 429 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 430 ; FMFDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> 431 ; FMFDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 432 ; FMFDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 433 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 434 435 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'log2_approx:' 436 ; GLOBALDEBUG: ch,glue = PPCISD::CALL_NOP t11, TargetGlobalAddress:i64<double (double)* @log2> 437 ; GLOBALDEBUG: ch,glue = callseq_end t15, TargetConstant:i64<32>, TargetConstant:i64<0>, t15:1 438 ; GLOBALDEBUG: f64,ch,glue = CopyFromReg afn t16, Register:f64 $f1, t16:1 439 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'log2_approx:' 440 441 declare double @log2(double) 442 define double @log2_approx(double %x) nounwind { 443 ; FMF-LABEL: log2_approx: 444 ; FMF: # %bb.0: 445 ; FMF-NEXT: mflr 0 446 ; FMF-NEXT: std 0, 16(1) 447 ; FMF-NEXT: stdu 1, -32(1) 448 ; FMF-NEXT: bl log2 449 ; FMF-NEXT: nop 450 ; FMF-NEXT: addi 1, 1, 32 451 ; FMF-NEXT: ld 0, 16(1) 452 ; FMF-NEXT: mtlr 0 453 ; FMF-NEXT: blr 454 ; 455 ; GLOBAL-LABEL: log2_approx: 456 ; GLOBAL: # %bb.0: 457 ; GLOBAL-NEXT: mflr 0 458 ; GLOBAL-NEXT: std 0, 16(1) 459 ; GLOBAL-NEXT: stdu 1, -32(1) 460 ; GLOBAL-NEXT: bl log2 461 ; GLOBAL-NEXT: nop 462 ; GLOBAL-NEXT: addi 1, 1, 32 463 ; GLOBAL-NEXT: ld 0, 16(1) 464 ; GLOBAL-NEXT: mtlr 0 465 ; GLOBAL-NEXT: blr 466 %r = call afn double @log2(double %x) 467 ret double %r 468 } 469 470 ; -(X - Y) --> (Y - X) 471 472 ; FMFDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 473 ; FMFDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 474 ; FMFDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 475 476 ; GLOBALDEBUG-LABEL: Optimized lowered selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 477 ; GLOBALDEBUG: fsub nsz {{t[0-9]+}}, {{t[0-9]+}} 478 ; GLOBALDEBUG: Type-legalized selection DAG: %bb.0 'fneg_fsub_nozeros_1:' 479 480 define float @fneg_fsub_nozeros_1(float %x, float %y, float %z) { 481 ; FMF-LABEL: fneg_fsub_nozeros_1: 482 ; FMF: # %bb.0: 483 ; FMF-NEXT: xssubsp 1, 2, 1 484 ; FMF-NEXT: blr 485 ; 486 ; GLOBAL-LABEL: fneg_fsub_nozeros_1: 487 ; GLOBAL: # %bb.0: 488 ; GLOBAL-NEXT: xssubsp 1, 2, 1 489 ; GLOBAL-NEXT: blr 490 %neg = fsub float %x, %y 491 %add = fsub nsz float 0.0, %neg 492 ret float %add 493 } 494 495