Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=cypress -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 
      6 ; FUNC-LABEL: {{^}}rcp_pat_f32:
      7 ; GCN: s_load_dword [[SRC:s[0-9]+]]
      8 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
      9 ; GCN: buffer_store_dword [[RCP]]
     10 
     11 ; EG: RECIP_IEEE
     12 define amdgpu_kernel void @rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
     13   %rcp = fdiv float 1.0, %src
     14   store float %rcp, float addrspace(1)* %out, align 4
     15   ret void
     16 }
     17 
     18 ; FUNC-LABEL: {{^}}rcp_ulp25_pat_f32:
     19 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     20 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
     21 ; GCN: buffer_store_dword [[RCP]]
     22 
     23 ; EG: RECIP_IEEE
     24 define amdgpu_kernel void @rcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
     25   %rcp = fdiv float 1.0, %src, !fpmath !0
     26   store float %rcp, float addrspace(1)* %out, align 4
     27   ret void
     28 }
     29 
     30 ; FUNC-LABEL: {{^}}rcp_fast_ulp25_pat_f32:
     31 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     32 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
     33 ; GCN: buffer_store_dword [[RCP]]
     34 
     35 ; EG: RECIP_IEEE
     36 define amdgpu_kernel void @rcp_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
     37   %rcp = fdiv fast float 1.0, %src, !fpmath !0
     38   store float %rcp, float addrspace(1)* %out, align 4
     39   ret void
     40 }
     41 
     42 ; FUNC-LABEL: {{^}}rcp_arcp_ulp25_pat_f32:
     43 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     44 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
     45 ; GCN: buffer_store_dword [[RCP]]
     46 
     47 ; EG: RECIP_IEEE
     48 define amdgpu_kernel void @rcp_arcp_ulp25_pat_f32(float addrspace(1)* %out, float %src) #0 {
     49   %rcp = fdiv arcp float 1.0, %src, !fpmath !0
     50   store float %rcp, float addrspace(1)* %out, align 4
     51   ret void
     52 }
     53 
     54 ; FUNC-LABEL: {{^}}rcp_global_fast_ulp25_pat_f32:
     55 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     56 ; GCN: v_rcp_f32_e32 [[RCP:v[0-9]+]], [[SRC]]
     57 ; GCN: buffer_store_dword [[RCP]]
     58 
     59 ; EG: RECIP_IEEE
     60 define amdgpu_kernel void @rcp_global_fast_ulp25_pat_f32(float addrspace(1)* %out, float %src) #2 {
     61   %rcp = fdiv float 1.0, %src, !fpmath !0
     62   store float %rcp, float addrspace(1)* %out, align 4
     63   ret void
     64 }
     65 
     66 ; FUNC-LABEL: {{^}}rcp_fabs_pat_f32:
     67 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     68 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], |[[SRC]]|
     69 ; GCN: buffer_store_dword [[RCP]]
     70 
     71 ; EG: RECIP_IEEE
     72 define amdgpu_kernel void @rcp_fabs_pat_f32(float addrspace(1)* %out, float %src) #0 {
     73   %src.fabs = call float @llvm.fabs.f32(float %src)
     74   %rcp = fdiv float 1.0, %src.fabs
     75   store float %rcp, float addrspace(1)* %out, align 4
     76   ret void
     77 }
     78 
     79 ; FUNC-LABEL: {{^}}neg_rcp_pat_f32:
     80 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     81 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -[[SRC]]
     82 ; GCN: buffer_store_dword [[RCP]]
     83 
     84 ; EG: RECIP_IEEE
     85 define amdgpu_kernel void @neg_rcp_pat_f32(float addrspace(1)* %out, float %src) #0 {
     86   %rcp = fdiv float -1.0, %src
     87   store float %rcp, float addrspace(1)* %out, align 4
     88   ret void
     89 }
     90 
     91 ; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_f32:
     92 ; GCN: s_load_dword [[SRC:s[0-9]+]]
     93 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
     94 ; GCN: buffer_store_dword [[RCP]]
     95 define amdgpu_kernel void @rcp_fabs_fneg_pat_f32(float addrspace(1)* %out, float %src) #0 {
     96   %src.fabs = call float @llvm.fabs.f32(float %src)
     97   %src.fabs.fneg = fsub float -0.0, %src.fabs
     98   %rcp = fdiv float 1.0, %src.fabs.fneg
     99   store float %rcp, float addrspace(1)* %out, align 4
    100   ret void
    101 }
    102 
    103 ; FUNC-LABEL: {{^}}rcp_fabs_fneg_pat_multi_use_f32:
    104 ; GCN: s_load_dword [[SRC:s[0-9]+]]
    105 ; GCN: v_rcp_f32_e64 [[RCP:v[0-9]+]], -|[[SRC]]|
    106 ; GCN: v_mul_f32_e64 [[MUL:v[0-9]+]], [[SRC]], -|[[SRC]]|
    107 ; GCN: buffer_store_dword [[RCP]]
    108 ; GCN: buffer_store_dword [[MUL]]
    109 define amdgpu_kernel void @rcp_fabs_fneg_pat_multi_use_f32(float addrspace(1)* %out, float %src) #0 {
    110   %src.fabs = call float @llvm.fabs.f32(float %src)
    111   %src.fabs.fneg = fsub float -0.0, %src.fabs
    112   %rcp = fdiv float 1.0, %src.fabs.fneg
    113   store volatile float %rcp, float addrspace(1)* %out, align 4
    114 
    115   %other = fmul float %src, %src.fabs.fneg
    116   store volatile float %other, float addrspace(1)* %out, align 4
    117   ret void
    118 }
    119 
    120 ; FUNC-LABEL: {{^}}div_arcp_2_x_pat_f32:
    121 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0.5, v{{[0-9]+}}
    122 ; GCN: buffer_store_dword [[MUL]]
    123 define amdgpu_kernel void @div_arcp_2_x_pat_f32(float addrspace(1)* %out) #0 {
    124   %x = load float, float addrspace(1)* undef
    125   %rcp = fdiv arcp float %x, 2.0
    126   store float %rcp, float addrspace(1)* %out, align 4
    127   ret void
    128 }
    129 
    130 ; FUNC-LABEL: {{^}}div_arcp_k_x_pat_f32:
    131 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0x3dcccccd, v{{[0-9]+}}
    132 ; GCN: buffer_store_dword [[MUL]]
    133 define amdgpu_kernel void @div_arcp_k_x_pat_f32(float addrspace(1)* %out) #0 {
    134   %x = load float, float addrspace(1)* undef
    135   %rcp = fdiv arcp float %x, 10.0
    136   store float %rcp, float addrspace(1)* %out, align 4
    137   ret void
    138 }
    139 
    140 ; FUNC-LABEL: {{^}}div_arcp_neg_k_x_pat_f32:
    141 ; GCN: v_mul_f32_e32 [[MUL:v[0-9]+]], 0xbdcccccd, v{{[0-9]+}}
    142 ; GCN: buffer_store_dword [[MUL]]
    143 define amdgpu_kernel void @div_arcp_neg_k_x_pat_f32(float addrspace(1)* %out) #0 {
    144   %x = load float, float addrspace(1)* undef
    145   %rcp = fdiv arcp float %x, -10.0
    146   store float %rcp, float addrspace(1)* %out, align 4
    147   ret void
    148 }
    149 
    150 declare float @llvm.fabs.f32(float) #1
    151 declare float @llvm.sqrt.f32(float) #1
    152 
    153 attributes #0 = { nounwind "unsafe-fp-math"="false" }
    154 attributes #1 = { nounwind readnone }
    155 attributes #2 = { nounwind "unsafe-fp-math"="true" }
    156 
    157 !0 = !{float 2.500000e+00}
    158