Home | History | Annotate | Download | only in NVPTX
      1 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_20 -fp-contract=fast | FileCheck %s --check-prefix=FAST
      2 ; RUN: llc < %s -march=nvptx64 -mcpu=sm_30 | FileCheck %s --check-prefix=DEFAULT
      3 
      4 target triple = "nvptx64-unknown-cuda"
      5 
      6 ;; Make sure we are generating proper instruction sequences for fused ops
      7 ;; If fusion is allowed, we try to form fma.rn at the PTX level, and emit
      8 ;; add.f32 otherwise.  Without an explicit rounding mode on add.f32, ptxas
      9 ;; is free to fuse with a multiply if it is able.  If fusion is not allowed,
     10 ;; we do not form fma.rn at the PTX level and explicitly generate add.rn
     11 ;; for all adds to prevent ptxas from fusion the ops.
     12 
     13 ;; FAST-LABEL: @t0
     14 ;; DEFAULT-LABEL: @t0
     15 define float @t0(float %a, float %b, float %c) {
     16 ;; FAST: fma.rn.f32
     17 ;; DEFAULT: mul.rn.f32
     18 ;; DEFAULT: add.rn.f32
     19   %v0 = fmul float %a, %b
     20   %v1 = fadd float %v0, %c
     21   ret float %v1
     22 }
     23 
     24 ;; FAST-LABEL: @t1
     25 ;; DEFAULT-LABEL: @t1
     26 define float @t1(float %a, float %b) {
     27 ;; We cannot form an fma here, but make sure we explicitly emit add.rn.f32
     28 ;; to prevent ptxas from fusing this with anything else.
     29 ;; FAST: add.f32
     30 ;; DEFAULT: add.rn.f32
     31   %v1 = fadd float %a, %b
     32   ret float %v1
     33 }
     34