Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s
      2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s
      3 
      4 ; CHECK: 'add_i32'
      5 ; CHECK: estimated cost of 1 for {{.*}} add i32
      6 define void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
      7   %vec = load i32, i32 addrspace(1)* %vaddr
      8   %add = add i32 %vec, %b
      9   store i32 %add, i32 addrspace(1)* %out
     10   ret void
     11 }
     12 
     13 ; CHECK: 'add_v2i32'
     14 ; CHECK: estimated cost of 2 for {{.*}} add <2 x i32>
     15 define void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 {
     16   %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr
     17   %add = add <2 x i32> %vec, %b
     18   store <2 x i32> %add, <2 x i32> addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; CHECK: 'add_v3i32'
     23 ; CHECK: estimated cost of 3 for {{.*}} add <3 x i32>
     24 define void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 {
     25   %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr
     26   %add = add <3 x i32> %vec, %b
     27   store <3 x i32> %add, <3 x i32> addrspace(1)* %out
     28   ret void
     29 }
     30 
     31 ; CHECK: 'add_v4i32'
     32 ; CHECK: estimated cost of 4 for {{.*}} add <4 x i32>
     33 define void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 {
     34   %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr
     35   %add = add <4 x i32> %vec, %b
     36   store <4 x i32> %add, <4 x i32> addrspace(1)* %out
     37   ret void
     38 }
     39 
     40 ; CHECK: 'add_i64'
     41 ; CHECK: estimated cost of 2 for {{.*}} add i64
     42 define void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
     43   %vec = load i64, i64 addrspace(1)* %vaddr
     44   %add = add i64 %vec, %b
     45   store i64 %add, i64 addrspace(1)* %out
     46   ret void
     47 }
     48 
     49 ; CHECK: 'add_v2i64'
     50 ; CHECK: estimated cost of 4 for {{.*}} add <2 x i64>
     51 define void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 {
     52   %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr
     53   %add = add <2 x i64> %vec, %b
     54   store <2 x i64> %add, <2 x i64> addrspace(1)* %out
     55   ret void
     56 }
     57 
     58 ; CHECK: 'add_v3i64'
     59 ; CHECK: estimated cost of 6 for {{.*}} add <3 x i64>
     60 define void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 {
     61   %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr
     62   %add = add <3 x i64> %vec, %b
     63   store <3 x i64> %add, <3 x i64> addrspace(1)* %out
     64   ret void
     65 }
     66 
     67 ; CHECK: 'add_v4i64'
     68 ; CHECK: estimated cost of 8 for {{.*}} add <4 x i64>
     69 define void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 {
     70   %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr
     71   %add = add <4 x i64> %vec, %b
     72   store <4 x i64> %add, <4 x i64> addrspace(1)* %out
     73   ret void
     74 }
     75 
     76 ; CHECK: 'add_v16i64'
     77 ; CHECK: estimated cost of 32 for {{.*}} add <16 x i64>
     78 define void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 {
     79   %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr
     80   %add = add <16 x i64> %vec, %b
     81   store <16 x i64> %add, <16 x i64> addrspace(1)* %out
     82   ret void
     83 }
     84 
     85 ; CHECK: 'add_i16'
     86 ; CHECK: estimated cost of 1 for {{.*}} add i16
     87 define void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
     88   %vec = load i16, i16 addrspace(1)* %vaddr
     89   %add = add i16 %vec, %b
     90   store i16 %add, i16 addrspace(1)* %out
     91   ret void
     92 }
     93 
     94 ; CHECK: 'add_v2i16'
     95 ; CHECK: estimated cost of 2 for {{.*}} add <2 x i16>
     96 define void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
     97   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
     98   %add = add <2 x i16> %vec, %b
     99   store <2 x i16> %add, <2 x i16> addrspace(1)* %out
    100   ret void
    101 }
    102 
    103 ; CHECK: 'sub_i32'
    104 ; CHECK: estimated cost of 1 for {{.*}} sub i32
    105 define void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 {
    106   %vec = load i32, i32 addrspace(1)* %vaddr
    107   %sub = sub i32 %vec, %b
    108   store i32 %sub, i32 addrspace(1)* %out
    109   ret void
    110 }
    111 
    112 ; CHECK: 'sub_i64'
    113 ; CHECK: estimated cost of 2 for {{.*}} sub i64
    114 define void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 {
    115   %vec = load i64, i64 addrspace(1)* %vaddr
    116   %sub = sub i64 %vec, %b
    117   store i64 %sub, i64 addrspace(1)* %out
    118   ret void
    119 }
    120 ; CHECK: 'sub_i16'
    121 ; CHECK: estimated cost of 1 for {{.*}} sub i16
    122 define void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 {
    123   %vec = load i16, i16 addrspace(1)* %vaddr
    124   %sub = sub i16 %vec, %b
    125   store i16 %sub, i16 addrspace(1)* %out
    126   ret void
    127 }
    128 
    129 ; CHECK: 'sub_v2i16'
    130 ; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16>
    131 define void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 {
    132   %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr
    133   %sub = sub <2 x i16> %vec, %b
    134   store <2 x i16> %sub, <2 x i16> addrspace(1)* %out
    135   ret void
    136 }
    137 
    138 attributes #0 = { nounwind }
    139