Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -S -mtriple=amdgcn-- -amdgpu-codegenprepare %s | FileCheck %s
      2 ; RUN: opt -S -amdgpu-codegenprepare %s | FileCheck -check-prefix=NOOP %s
      3 ; Make sure this doesn't crash with no triple
      4 
      5 ; NOOP-LABEL: @noop_fdiv_fpmath(
      6 ; NOOP: %md.25ulp = fdiv float %a, %b, !fpmath !0
      7 define amdgpu_kernel void @noop_fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #3 {
      8   %md.25ulp = fdiv float %a, %b, !fpmath !0
      9   store volatile float %md.25ulp, float addrspace(1)* %out
     10   ret void
     11 }
     12 
     13 ; CHECK-LABEL: @fdiv_fpmath(
     14 ; CHECK: %no.md = fdiv float %a, %b{{$}}
     15 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
     16 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
     17 ; CHECK: %md.25ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !0
     18 ; CHECK: %md.3ulp = call float @llvm.amdgcn.fdiv.fast(float %a, float %b), !fpmath !3
     19 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
     20 ; CHECK: arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
     21 define amdgpu_kernel void @fdiv_fpmath(float addrspace(1)* %out, float %a, float %b) #1 {
     22   %no.md = fdiv float %a, %b
     23   store volatile float %no.md, float addrspace(1)* %out
     24 
     25   %md.half.ulp = fdiv float %a, %b, !fpmath !1
     26   store volatile float %md.half.ulp, float addrspace(1)* %out
     27 
     28   %md.1ulp = fdiv float %a, %b, !fpmath !2
     29   store volatile float %md.1ulp, float addrspace(1)* %out
     30 
     31   %md.25ulp = fdiv float %a, %b, !fpmath !0
     32   store volatile float %md.25ulp, float addrspace(1)* %out
     33 
     34   %md.3ulp = fdiv float %a, %b, !fpmath !3
     35   store volatile float %md.3ulp, float addrspace(1)* %out
     36 
     37   %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
     38   store volatile float %fast.md.25ulp, float addrspace(1)* %out
     39 
     40   %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
     41   store volatile float %arcp.md.25ulp, float addrspace(1)* %out
     42 
     43   ret void
     44 }
     45 
     46 ; CHECK-LABEL: @rcp_fdiv_fpmath(
     47 ; CHECK: %no.md = fdiv float 1.000000e+00, %x{{$}}
     48 ; CHECK: %md.25ulp = fdiv float 1.000000e+00, %x, !fpmath !0
     49 ; CHECK: %md.half.ulp = fdiv float 1.000000e+00, %x, !fpmath !1
     50 ; CHECK: %arcp.no.md = fdiv arcp float 1.000000e+00, %x{{$}}
     51 ; CHECK: %arcp.25ulp = fdiv arcp float 1.000000e+00, %x, !fpmath !0
     52 ; CHECK: %fast.no.md = fdiv fast float 1.000000e+00, %x{{$}}
     53 ; CHECK: %fast.25ulp = fdiv fast float 1.000000e+00, %x, !fpmath !0
     54 define amdgpu_kernel void @rcp_fdiv_fpmath(float addrspace(1)* %out, float %x) #1 {
     55   %no.md = fdiv float 1.0, %x
     56   store volatile float %no.md, float addrspace(1)* %out
     57 
     58   %md.25ulp = fdiv float 1.0, %x, !fpmath !0
     59   store volatile float %md.25ulp, float addrspace(1)* %out
     60 
     61   %md.half.ulp = fdiv float 1.0, %x, !fpmath !1
     62   store volatile float %md.half.ulp, float addrspace(1)* %out
     63 
     64   %arcp.no.md = fdiv arcp float 1.0, %x
     65   store volatile float %arcp.no.md, float addrspace(1)* %out
     66 
     67   %arcp.25ulp = fdiv arcp float 1.0, %x, !fpmath !0
     68   store volatile float %arcp.25ulp, float addrspace(1)* %out
     69 
     70   %fast.no.md = fdiv fast float 1.0, %x
     71   store volatile float %fast.no.md, float addrspace(1)* %out
     72 
     73   %fast.25ulp = fdiv fast float 1.0, %x, !fpmath !0
     74   store volatile float %fast.25ulp, float addrspace(1)* %out
     75 
     76   ret void
     77 }
     78 
     79 ; CHECK-LABEL: @fdiv_fpmath_vector(
     80 ; CHECK: %no.md = fdiv <2 x float> %a, %b{{$}}
     81 ; CHECK: %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
     82 ; CHECK: %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
     83 
     84 ; CHECK: %[[A0:[0-9]+]] = extractelement <2 x float> %a, i64 0
     85 ; CHECK: %[[B0:[0-9]+]] = extractelement <2 x float> %b, i64 0
     86 ; CHECK: %[[FDIV0:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A0]], float %[[B0]]), !fpmath !0
     87 ; CHECK: %[[INS0:[0-9]+]] = insertelement <2 x float> undef, float %[[FDIV0]], i64 0
     88 ; CHECK: %[[A1:[0-9]+]] = extractelement <2 x float> %a, i64 1
     89 ; CHECK: %[[B1:[0-9]+]] = extractelement <2 x float> %b, i64 1
     90 ; CHECK: %[[FDIV1:[0-9]+]] = call float @llvm.amdgcn.fdiv.fast(float %[[A1]], float %[[B1]]), !fpmath !0
     91 ; CHECK: %md.25ulp = insertelement <2 x float> %[[INS0]], float %[[FDIV1]], i64 1
     92 define amdgpu_kernel void @fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %a, <2 x float> %b) #1 {
     93   %no.md = fdiv <2 x float> %a, %b
     94   store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
     95 
     96   %md.half.ulp = fdiv <2 x float> %a, %b, !fpmath !1
     97   store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
     98 
     99   %md.1ulp = fdiv <2 x float> %a, %b, !fpmath !2
    100   store volatile <2 x float> %md.1ulp, <2 x float> addrspace(1)* %out
    101 
    102   %md.25ulp = fdiv <2 x float> %a, %b, !fpmath !0
    103   store volatile <2 x float> %md.25ulp, <2 x float> addrspace(1)* %out
    104 
    105   ret void
    106 }
    107 
    108 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector(
    109 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
    110 ; CHECK: %md.half.ulp = fdiv <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !1
    111 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
    112 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x{{$}}
    113 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
    114 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 1.000000e+00>, %x, !fpmath !0
    115 ; CHECK: store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
    116 define amdgpu_kernel void @rcp_fdiv_fpmath_vector(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
    117   %no.md = fdiv <2 x float> <float 1.0, float 1.0>, %x
    118   store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
    119 
    120   %md.half.ulp = fdiv <2 x float> <float 1.0, float 1.0>, %x, !fpmath !1
    121   store volatile <2 x float> %md.half.ulp, <2 x float> addrspace(1)* %out
    122 
    123   %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x
    124   store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
    125 
    126   %fast.no.md = fdiv fast <2 x float> <float 1.0, float 1.0>, %x
    127   store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
    128 
    129   %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
    130   store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
    131 
    132   %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 1.0>, %x, !fpmath !0
    133   store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
    134 
    135   ret void
    136 }
    137 
    138 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_nonsplat(
    139 ; CHECK: %no.md = fdiv <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
    140 ; CHECK: %arcp.no.md = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x
    141 ; CHECK: %fast.no.md = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x{{$}}
    142 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
    143 ; CHECK: %fast.25ulp = fdiv fast <2 x float> <float 1.000000e+00, float 2.000000e+00>, %x, !fpmath !0
    144 ; CHECK: store volatile <2 x float> %fast.25ulp
    145 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_nonsplat(<2 x float> addrspace(1)* %out, <2 x float> %x) #1 {
    146   %no.md = fdiv <2 x float> <float 1.0, float 2.0>, %x
    147   store volatile <2 x float> %no.md, <2 x float> addrspace(1)* %out
    148 
    149   %arcp.no.md = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x
    150   store volatile <2 x float> %arcp.no.md, <2 x float> addrspace(1)* %out
    151 
    152   %fast.no.md = fdiv fast <2 x float> <float 1.0, float 2.0>, %x
    153   store volatile <2 x float> %fast.no.md, <2 x float> addrspace(1)* %out
    154 
    155   %arcp.25ulp = fdiv arcp <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
    156   store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
    157 
    158   %fast.25ulp = fdiv fast <2 x float> <float 1.0, float 2.0>, %x, !fpmath !0
    159   store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
    160 
    161   ret void
    162 }
    163 
    164 ; FIXME: Should be able to get fdiv for 1.0 component
    165 ; CHECK-LABEL: @rcp_fdiv_fpmath_vector_partial_constant(
    166 ; CHECK: %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
    167 ; CHECK: store volatile <2 x float> %arcp.25ulp
    168 
    169 ; CHECK: %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
    170 ; CHECK: store volatile <2 x float> %fast.25ulp
    171 define amdgpu_kernel void @rcp_fdiv_fpmath_vector_partial_constant(<2 x float> addrspace(1)* %out, <2 x float> %x, <2 x float> %y) #1 {
    172   %x.insert = insertelement <2 x float> %x, float 1.0, i32 0
    173 
    174   %arcp.25ulp = fdiv arcp <2 x float> %x.insert, %y, !fpmath !0
    175   store volatile <2 x float> %arcp.25ulp, <2 x float> addrspace(1)* %out
    176 
    177   %fast.25ulp = fdiv fast <2 x float> %x.insert, %y, !fpmath !0
    178   store volatile <2 x float> %fast.25ulp, <2 x float> addrspace(1)* %out
    179 
    180   ret void
    181 }
    182 
    183 ; CHECK-LABEL: @fdiv_fpmath_f32_denormals(
    184 ; CHECK: %no.md = fdiv float %a, %b{{$}}
    185 ; CHECK: %md.half.ulp = fdiv float %a, %b, !fpmath !1
    186 ; CHECK: %md.1ulp = fdiv float %a, %b, !fpmath !2
    187 ; CHECK: %md.25ulp = fdiv float %a, %b, !fpmath !0
    188 ; CHECK: %md.3ulp = fdiv float %a, %b, !fpmath !3
    189 ; CHECK: %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
    190 ; CHECK: %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
    191 define amdgpu_kernel void @fdiv_fpmath_f32_denormals(float addrspace(1)* %out, float %a, float %b) #2 {
    192   %no.md = fdiv float %a, %b
    193   store volatile float %no.md, float addrspace(1)* %out
    194 
    195   %md.half.ulp = fdiv float %a, %b, !fpmath !1
    196   store volatile float %md.half.ulp, float addrspace(1)* %out
    197 
    198   %md.1ulp = fdiv float %a, %b, !fpmath !2
    199   store volatile float %md.1ulp, float addrspace(1)* %out
    200 
    201   %md.25ulp = fdiv float %a, %b, !fpmath !0
    202   store volatile float %md.25ulp, float addrspace(1)* %out
    203 
    204   %md.3ulp = fdiv float %a, %b, !fpmath !3
    205   store volatile float %md.3ulp, float addrspace(1)* %out
    206 
    207   %fast.md.25ulp = fdiv fast float %a, %b, !fpmath !0
    208   store volatile float %fast.md.25ulp, float addrspace(1)* %out
    209 
    210   %arcp.md.25ulp = fdiv arcp float %a, %b, !fpmath !0
    211   store volatile float %arcp.md.25ulp, float addrspace(1)* %out
    212 
    213   ret void
    214 }
    215 
    216 attributes #0 = { nounwind optnone noinline }
    217 attributes #1 = { nounwind }
    218 attributes #2 = { nounwind "target-features"="+fp32-denormals" }
    219 
    220 ; CHECK: !0 = !{float 2.500000e+00}
    221 ; CHECK: !1 = !{float 5.000000e-01}
    222 ; CHECK: !2 = !{float 1.000000e+00}
    223 ; CHECK: !3 = !{float 3.000000e+00}
    224 
    225 !0 = !{float 2.500000e+00}
    226 !1 = !{float 5.000000e-01}
    227 !2 = !{float 1.000000e+00}
    228 !3 = !{float 3.000000e+00}
    229