Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM %s
      2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=kaveri -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,CISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM  %s
      3 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=tahiti -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SIFASTF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM  %s
      4 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=verde -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=ALL,SISLOWF64,NOFP32DENORM,NOFP16,NOFP16-NOFP32DENORM  %s
      5 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=hawaii -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,SLOWFP32DENORMS,NOFP16,NOFP16-FP32DENORM %s
      6 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 -mattr=+fp32-denormals < %s | FileCheck -check-prefixes=ALL,FP32DENORMS,FASTFP32DENORMS,FP16 %s
      7 
      8 ; ALL: 'fdiv_f32'
      9 ; NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv float
     10 ; FP32DENORMS: estimated cost of 10 for {{.*}} fdiv float
     11 define amdgpu_kernel void @fdiv_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
     12   %vec = load float, float addrspace(1)* %vaddr
     13   %add = fdiv float %vec, %b
     14   store float %add, float addrspace(1)* %out
     15   ret void
     16 }
     17 
     18 ; ALL: 'fdiv_v2f32'
     19 ; NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x float>
     20 ; FP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float>
     21 define amdgpu_kernel void @fdiv_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
     22   %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
     23   %add = fdiv <2 x float> %vec, %b
     24   store <2 x float> %add, <2 x float> addrspace(1)* %out
     25   ret void
     26 }
     27 
     28 ; ALL: 'fdiv_v3f32'
     29 ; NOFP32DENORM: estimated cost of 36 for {{.*}} fdiv <3 x float>
     30 ; FP32DENORMS: estimated cost of 30 for {{.*}} fdiv <3 x float>
     31 define amdgpu_kernel void @fdiv_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
     32   %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
     33   %add = fdiv <3 x float> %vec, %b
     34   store <3 x float> %add, <3 x float> addrspace(1)* %out
     35   ret void
     36 }
     37 
     38 ; ALL: 'fdiv_f64'
     39 ; CIFASTF64: estimated cost of 29 for {{.*}} fdiv double
     40 ; CISLOWF64: estimated cost of 33 for {{.*}} fdiv double
     41 ; SIFASTF64: estimated cost of 32 for {{.*}} fdiv double
     42 ; SISLOWF64: estimated cost of 36 for {{.*}} fdiv double
     43 define amdgpu_kernel void @fdiv_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
     44   %vec = load double, double addrspace(1)* %vaddr
     45   %add = fdiv double %vec, %b
     46   store double %add, double addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; ALL: 'fdiv_v2f64'
     51 ; CIFASTF64: estimated cost of 58 for {{.*}} fdiv <2 x double>
     52 ; CISLOWF64: estimated cost of 66 for {{.*}} fdiv <2 x double>
     53 ; SIFASTF64: estimated cost of 64 for {{.*}} fdiv <2 x double>
     54 ; SISLOWF64: estimated cost of 72 for {{.*}} fdiv <2 x double>
     55 define amdgpu_kernel void @fdiv_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
     56   %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
     57   %add = fdiv <2 x double> %vec, %b
     58   store <2 x double> %add, <2 x double> addrspace(1)* %out
     59   ret void
     60 }
     61 
     62 ; ALL: 'fdiv_v3f64'
     63 ; CIFASTF64: estimated cost of 87 for {{.*}} fdiv <3 x double>
     64 ; CISLOWF64: estimated cost of 99 for {{.*}} fdiv <3 x double>
     65 ; SIFASTF64: estimated cost of 96 for {{.*}} fdiv <3 x double>
     66 ; SISLOWF64: estimated cost of 108 for {{.*}} fdiv <3 x double>
     67 define amdgpu_kernel void @fdiv_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
     68   %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
     69   %add = fdiv <3 x double> %vec, %b
     70   store <3 x double> %add, <3 x double> addrspace(1)* %out
     71   ret void
     72 }
     73 
     74 ; ALL: 'fdiv_f16'
     75 ; NOFP16-NOFP32DENORM: estimated cost of 12 for {{.*}} fdiv half
     76 ; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half
     77 ; FP16: estimated cost of 10 for {{.*}} fdiv half
     78 define amdgpu_kernel void @fdiv_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
     79   %vec = load half, half addrspace(1)* %vaddr
     80   %add = fdiv half %vec, %b
     81   store half %add, half addrspace(1)* %out
     82   ret void
     83 }
     84 
     85 ; ALL: 'fdiv_v2f16'
     86 ; NOFP16-NOFP32DENORM: estimated cost of 24 for {{.*}} fdiv <2 x half>
     87 ; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half>
     88 ; FP16: estimated cost of 20 for {{.*}} fdiv <2 x half>
     89 define amdgpu_kernel void @fdiv_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
     90   %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
     91   %add = fdiv <2 x half> %vec, %b
     92   store <2 x half> %add, <2 x half> addrspace(1)* %out
     93   ret void
     94 }
     95 
     96 ; ALL: 'fdiv_v4f16'
     97 ; NOFP16-NOFP32DENORM: estimated cost of 48 for {{.*}} fdiv <4 x half>
     98 ; NOFP16-FP32DENORM: estimated cost of 40 for {{.*}} fdiv <4 x half>
     99 ; FP16: estimated cost of 40 for {{.*}} fdiv <4 x half>
    100 define amdgpu_kernel void @fdiv_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
    101   %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
    102   %add = fdiv <4 x half> %vec, %b
    103   store <4 x half> %add, <4 x half> addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; ALL: 'rcp_f32'
    108 ; NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv float
    109 ; SLOWFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float
    110 ; FASTFP32DENORMS: estimated cost of 10 for {{.*}} fdiv float
    111 define amdgpu_kernel void @rcp_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr) #0 {
    112   %vec = load float, float addrspace(1)* %vaddr
    113   %add = fdiv float 1.0, %vec
    114   store float %add, float addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; ALL: 'rcp_f16'
    119 ; NOFP16-NOFP32DENORM: estimated cost of 3 for {{.*}} fdiv half
    120 ; NOFP16-FP32DENORM: estimated cost of 10 for {{.*}} fdiv half
    121 ; FP16: estimated cost of 3 for {{.*}} fdiv half
    122 define amdgpu_kernel void @rcp_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr) #0 {
    123   %vec = load half, half addrspace(1)* %vaddr
    124   %add = fdiv half 1.0, %vec
    125   store half %add, half addrspace(1)* %out
    126   ret void
    127 }
    128 
    129 ; ALL: 'rcp_f64'
    130 ; CIFASTF64: estimated cost of 29 for {{.*}} fdiv double
    131 ; CISLOWF64: estimated cost of 33 for {{.*}} fdiv double
    132 ; SIFASTF64: estimated cost of 32 for {{.*}} fdiv double
    133 ; SISLOWF64: estimated cost of 36 for {{.*}} fdiv double
    134 define amdgpu_kernel void @rcp_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr) #0 {
    135   %vec = load double, double addrspace(1)* %vaddr
    136   %add = fdiv double 1.0, %vec
    137   store double %add, double addrspace(1)* %out
    138   ret void
    139 }
    140 
    141 ; ALL: 'rcp_v2f32'
    142 ; NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x float>
    143 ; SLOWFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float>
    144 ; FASTFP32DENORMS: estimated cost of 20 for {{.*}} fdiv <2 x float>
    145 define amdgpu_kernel void @rcp_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr) #0 {
    146   %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
    147   %add = fdiv <2 x float> <float 1.0, float 1.0>, %vec
    148   store <2 x float> %add, <2 x float> addrspace(1)* %out
    149   ret void
    150 }
    151 
    152 ; ALL: 'rcp_v2f16'
    153 ; NOFP16-NOFP32DENORM: estimated cost of 6 for {{.*}} fdiv <2 x half>
    154 ; NOFP16-FP32DENORM: estimated cost of 20 for {{.*}} fdiv <2 x half>
    155 ; FP16: estimated cost of 6 for {{.*}} fdiv <2 x half>
    156 define amdgpu_kernel void @rcp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr) #0 {
    157   %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
    158   %add = fdiv <2 x half> <half 1.0, half 1.0>, %vec
    159   store <2 x half> %add, <2 x half> addrspace(1)* %out
    160   ret void
    161 }
    162 
    163 attributes #0 = { nounwind }
    164