Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck -check-prefix=FASTF64 -check-prefix=ALL %s
      2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefix=SLOWF64 -check-prefix=ALL %s
      3 
      4 ; ALL: 'fadd_f32'
      5 ; ALL: estimated cost of 1 for {{.*}} fadd float
      6 define void @fadd_f32(float addrspace(1)* %out, float addrspace(1)* %vaddr, float %b) #0 {
      7   %vec = load float, float addrspace(1)* %vaddr
      8   %add = fadd float %vec, %b
      9   store float %add, float addrspace(1)* %out
     10   ret void
     11 }
     12 
     13 ; ALL: 'fadd_v2f32'
     14 ; ALL: estimated cost of 2 for {{.*}} fadd <2 x float>
     15 define void @fadd_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %vaddr, <2 x float> %b) #0 {
     16   %vec = load <2 x float>, <2 x float> addrspace(1)* %vaddr
     17   %add = fadd <2 x float> %vec, %b
     18   store <2 x float> %add, <2 x float> addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; ALL: 'fadd_v3f32'
     23 ; ALL: estimated cost of 3 for {{.*}} fadd <3 x float>
     24 define void @fadd_v3f32(<3 x float> addrspace(1)* %out, <3 x float> addrspace(1)* %vaddr, <3 x float> %b) #0 {
     25   %vec = load <3 x float>, <3 x float> addrspace(1)* %vaddr
     26   %add = fadd <3 x float> %vec, %b
     27   store <3 x float> %add, <3 x float> addrspace(1)* %out
     28   ret void
     29 }
     30 
     31 ; ALL: 'fadd_f64'
     32 ; FASTF64: estimated cost of 2 for {{.*}} fadd double
     33 ; SLOWF64: estimated cost of 3 for {{.*}} fadd double
     34 define void @fadd_f64(double addrspace(1)* %out, double addrspace(1)* %vaddr, double %b) #0 {
     35   %vec = load double, double addrspace(1)* %vaddr
     36   %add = fadd double %vec, %b
     37   store double %add, double addrspace(1)* %out
     38   ret void
     39 }
     40 
     41 ; ALL: 'fadd_v2f64'
     42 ; FASTF64: estimated cost of 4 for {{.*}} fadd <2 x double>
     43 ; SLOWF64: estimated cost of 6 for {{.*}} fadd <2 x double>
     44 define void @fadd_v2f64(<2 x double> addrspace(1)* %out, <2 x double> addrspace(1)* %vaddr, <2 x double> %b) #0 {
     45   %vec = load <2 x double>, <2 x double> addrspace(1)* %vaddr
     46   %add = fadd <2 x double> %vec, %b
     47   store <2 x double> %add, <2 x double> addrspace(1)* %out
     48   ret void
     49 }
     50 
     51 ; ALL: 'fadd_v3f64'
     52 ; FASTF64: estimated cost of 6 for {{.*}} fadd <3 x double>
     53 ; SLOWF64: estimated cost of 9 for {{.*}} fadd <3 x double>
     54 define void @fadd_v3f64(<3 x double> addrspace(1)* %out, <3 x double> addrspace(1)* %vaddr, <3 x double> %b) #0 {
     55   %vec = load <3 x double>, <3 x double> addrspace(1)* %vaddr
     56   %add = fadd <3 x double> %vec, %b
     57   store <3 x double> %add, <3 x double> addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; ALL 'fadd_f16'
     62 ; ALL estimated cost of 1 for {{.*}} fadd half
     63 define void @fadd_f16(half addrspace(1)* %out, half addrspace(1)* %vaddr, half %b) #0 {
     64   %vec = load half, half addrspace(1)* %vaddr
     65   %add = fadd half %vec, %b
     66   store half %add, half addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; ALL 'fadd_v2f16'
     71 ; ALL estimated cost of 2 for {{.*}} fadd <2 x half>
     72 define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %vaddr, <2 x half> %b) #0 {
     73   %vec = load <2 x half>, <2 x half> addrspace(1)* %vaddr
     74   %add = fadd <2 x half> %vec, %b
     75   store <2 x half> %add, <2 x half> addrspace(1)* %out
     76   ret void
     77 }
     78 
     79 ; ALL 'fadd_v4f16'
     80 ; ALL estimated cost of 4 for {{.*}} fadd <4 x half>
     81 define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %vaddr, <4 x half> %b) #0 {
     82   %vec = load <4 x half>, <4 x half> addrspace(1)* %vaddr
     83   %add = fadd <4 x half> %vec, %b
     84   store <4 x half> %add, <4 x half> addrspace(1)* %out
     85   ret void
     86 }
     87 
     88 attributes #0 = { nounwind }
     89