Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 
      3 declare float @llvm.amdgcn.rcp.f32(float) #0
      4 declare double @llvm.amdgcn.rcp.f64(double) #0
      5 
      6 declare double @llvm.sqrt.f64(double) #0
      7 declare float @llvm.sqrt.f32(float) #0
      8 
      9 ; FUNC-LABEL: {{^}}rcp_undef_f32:
     10 ; SI-NOT: v_rcp_f32
     11 define void @rcp_undef_f32(float addrspace(1)* %out) #1 {
     12   %rcp = call float @llvm.amdgcn.rcp.f32(float undef)
     13   store float %rcp, float addrspace(1)* %out, align 4
     14   ret void
     15 }
     16 
     17 ; FUNC-LABEL: {{^}}safe_no_fp32_denormals_rcp_f32:
     18 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
     19 ; SI-NOT: [[RESULT]]
     20 ; SI: buffer_store_dword [[RESULT]]
     21 define void @safe_no_fp32_denormals_rcp_f32(float addrspace(1)* %out, float %src) #1 {
     22   %rcp = fdiv float 1.0, %src
     23   store float %rcp, float addrspace(1)* %out, align 4
     24   ret void
     25 }
     26 
     27 ; FUNC-LABEL: {{^}}safe_f32_denormals_rcp_pat_f32:
     28 ; SI: v_rcp_f32_e32 [[RESULT:v[0-9]+]], s{{[0-9]+}}
     29 ; SI-NOT: [[RESULT]]
     30 ; SI: buffer_store_dword [[RESULT]]
     31 define void @safe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #4 {
     32   %rcp = fdiv float 1.0, %src
     33   store float %rcp, float addrspace(1)* %out, align 4
     34   ret void
     35 }
     36 
     37 ; FUNC-LABEL: {{^}}unsafe_f32_denormals_rcp_pat_f32:
     38 ; SI: v_div_scale_f32
     39 define void @unsafe_f32_denormals_rcp_pat_f32(float addrspace(1)* %out, float %src) #3 {
     40   %rcp = fdiv float 1.0, %src
     41   store float %rcp, float addrspace(1)* %out, align 4
     42   ret void
     43 }
     44 
     45 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f32:
     46 ; SI: v_sqrt_f32_e32
     47 ; SI: v_rcp_f32_e32
     48 define void @safe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #1 {
     49   %sqrt = call float @llvm.sqrt.f32(float %src)
     50   %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
     51   store float %rcp, float addrspace(1)* %out, align 4
     52   ret void
     53 }
     54 
     55 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f32:
     56 ; SI: v_rsq_f32_e32
     57 define void @unsafe_rsq_rcp_pat_f32(float addrspace(1)* %out, float %src) #2 {
     58   %sqrt = call float @llvm.sqrt.f32(float %src)
     59   %rcp = call float @llvm.amdgcn.rcp.f32(float %sqrt)
     60   store float %rcp, float addrspace(1)* %out, align 4
     61   ret void
     62 }
     63 
     64 ; FUNC-LABEL: {{^}}rcp_f64:
     65 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
     66 ; SI-NOT: [[RESULT]]
     67 ; SI: buffer_store_dwordx2 [[RESULT]]
     68 define void @rcp_f64(double addrspace(1)* %out, double %src) #1 {
     69   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
     70   store double %rcp, double addrspace(1)* %out, align 8
     71   ret void
     72 }
     73 
     74 ; FUNC-LABEL: {{^}}unsafe_rcp_f64:
     75 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
     76 ; SI-NOT: [[RESULT]]
     77 ; SI: buffer_store_dwordx2 [[RESULT]]
     78 define void @unsafe_rcp_f64(double addrspace(1)* %out, double %src) #2 {
     79   %rcp = call double @llvm.amdgcn.rcp.f64(double %src)
     80   store double %rcp, double addrspace(1)* %out, align 8
     81   ret void
     82 }
     83 
     84 ; FUNC-LABEL: {{^}}rcp_pat_f64:
     85 ; SI: v_div_scale_f64
     86 define void @rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
     87   %rcp = fdiv double 1.0, %src
     88   store double %rcp, double addrspace(1)* %out, align 8
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}unsafe_rcp_pat_f64:
     93 ; SI: v_rcp_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
     94 ; SI-NOT: [[RESULT]]
     95 ; SI: buffer_store_dwordx2 [[RESULT]]
     96 define void @unsafe_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
     97   %rcp = fdiv double 1.0, %src
     98   store double %rcp, double addrspace(1)* %out, align 8
     99   ret void
    100 }
    101 
    102 ; FUNC-LABEL: {{^}}safe_rsq_rcp_pat_f64:
    103 ; SI-NOT: v_rsq_f64_e32
    104 ; SI: v_sqrt_f64
    105 ; SI: v_rcp_f64
    106 define void @safe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #1 {
    107   %sqrt = call double @llvm.sqrt.f64(double %src)
    108   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
    109   store double %rcp, double addrspace(1)* %out, align 8
    110   ret void
    111 }
    112 
    113 ; FUNC-LABEL: {{^}}unsafe_rsq_rcp_pat_f64:
    114 ; SI: v_rsq_f64_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], s{{\[[0-9]+:[0-9]+\]}}
    115 ; SI-NOT: [[RESULT]]
    116 ; SI: buffer_store_dwordx2 [[RESULT]]
    117 define void @unsafe_rsq_rcp_pat_f64(double addrspace(1)* %out, double %src) #2 {
    118   %sqrt = call double @llvm.sqrt.f64(double %src)
    119   %rcp = call double @llvm.amdgcn.rcp.f64(double %sqrt)
    120   store double %rcp, double addrspace(1)* %out, align 8
    121   ret void
    122 }
    123 
    124 attributes #0 = { nounwind readnone }
    125 attributes #1 = { nounwind "unsafe-fp-math"="false" "target-features"="-fp32-denormals" }
    126 attributes #2 = { nounwind "unsafe-fp-math"="true" "target-features"="-fp32-denormals" }
    127 attributes #3 = { nounwind "unsafe-fp-math"="false" "target-features"="+fp32-denormals" }
    128 attributes #4 = { nounwind "unsafe-fp-math"="true" "target-features"="+fp32-denormals" }
    129