1 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=+half-rate-64-ops < %s | FileCheck %s 2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck %s 3 4 ; CHECK: 'add_i32' 5 ; CHECK: estimated cost of 1 for {{.*}} add i32 6 define void @add_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { 7 %vec = load i32, i32 addrspace(1)* %vaddr 8 %add = add i32 %vec, %b 9 store i32 %add, i32 addrspace(1)* %out 10 ret void 11 } 12 13 ; CHECK: 'add_v2i32' 14 ; CHECK: estimated cost of 2 for {{.*}} add <2 x i32> 15 define void @add_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %vaddr, <2 x i32> %b) #0 { 16 %vec = load <2 x i32>, <2 x i32> addrspace(1)* %vaddr 17 %add = add <2 x i32> %vec, %b 18 store <2 x i32> %add, <2 x i32> addrspace(1)* %out 19 ret void 20 } 21 22 ; CHECK: 'add_v3i32' 23 ; CHECK: estimated cost of 3 for {{.*}} add <3 x i32> 24 define void @add_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %vaddr, <3 x i32> %b) #0 { 25 %vec = load <3 x i32>, <3 x i32> addrspace(1)* %vaddr 26 %add = add <3 x i32> %vec, %b 27 store <3 x i32> %add, <3 x i32> addrspace(1)* %out 28 ret void 29 } 30 31 ; CHECK: 'add_v4i32' 32 ; CHECK: estimated cost of 4 for {{.*}} add <4 x i32> 33 define void @add_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %vaddr, <4 x i32> %b) #0 { 34 %vec = load <4 x i32>, <4 x i32> addrspace(1)* %vaddr 35 %add = add <4 x i32> %vec, %b 36 store <4 x i32> %add, <4 x i32> addrspace(1)* %out 37 ret void 38 } 39 40 ; CHECK: 'add_i64' 41 ; CHECK: estimated cost of 2 for {{.*}} add i64 42 define void @add_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { 43 %vec = load i64, i64 addrspace(1)* %vaddr 44 %add = add i64 %vec, %b 45 store i64 %add, i64 addrspace(1)* %out 46 ret void 47 } 48 49 ; CHECK: 'add_v2i64' 50 ; CHECK: estimated cost of 4 for {{.*}} add <2 x i64> 51 define void @add_v2i64(<2 x i64> addrspace(1)* %out, <2 x i64> addrspace(1)* %vaddr, <2 x i64> %b) #0 { 52 %vec = load <2 x i64>, <2 x i64> addrspace(1)* %vaddr 53 %add = add <2 x i64> %vec, %b 54 store <2 x i64> %add, <2 x i64> addrspace(1)* %out 55 ret void 56 } 57 58 ; CHECK: 'add_v3i64' 59 ; CHECK: estimated cost of 6 for {{.*}} add <3 x i64> 60 define void @add_v3i64(<3 x i64> addrspace(1)* %out, <3 x i64> addrspace(1)* %vaddr, <3 x i64> %b) #0 { 61 %vec = load <3 x i64>, <3 x i64> addrspace(1)* %vaddr 62 %add = add <3 x i64> %vec, %b 63 store <3 x i64> %add, <3 x i64> addrspace(1)* %out 64 ret void 65 } 66 67 ; CHECK: 'add_v4i64' 68 ; CHECK: estimated cost of 8 for {{.*}} add <4 x i64> 69 define void @add_v4i64(<4 x i64> addrspace(1)* %out, <4 x i64> addrspace(1)* %vaddr, <4 x i64> %b) #0 { 70 %vec = load <4 x i64>, <4 x i64> addrspace(1)* %vaddr 71 %add = add <4 x i64> %vec, %b 72 store <4 x i64> %add, <4 x i64> addrspace(1)* %out 73 ret void 74 } 75 76 ; CHECK: 'add_v16i64' 77 ; CHECK: estimated cost of 32 for {{.*}} add <16 x i64> 78 define void @add_v16i64(<16 x i64> addrspace(1)* %out, <16 x i64> addrspace(1)* %vaddr, <16 x i64> %b) #0 { 79 %vec = load <16 x i64>, <16 x i64> addrspace(1)* %vaddr 80 %add = add <16 x i64> %vec, %b 81 store <16 x i64> %add, <16 x i64> addrspace(1)* %out 82 ret void 83 } 84 85 ; CHECK: 'add_i16' 86 ; CHECK: estimated cost of 1 for {{.*}} add i16 87 define void @add_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { 88 %vec = load i16, i16 addrspace(1)* %vaddr 89 %add = add i16 %vec, %b 90 store i16 %add, i16 addrspace(1)* %out 91 ret void 92 } 93 94 ; CHECK: 'add_v2i16' 95 ; CHECK: estimated cost of 2 for {{.*}} add <2 x i16> 96 define void @add_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { 97 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr 98 %add = add <2 x i16> %vec, %b 99 store <2 x i16> %add, <2 x i16> addrspace(1)* %out 100 ret void 101 } 102 103 ; CHECK: 'sub_i32' 104 ; CHECK: estimated cost of 1 for {{.*}} sub i32 105 define void @sub_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %vaddr, i32 %b) #0 { 106 %vec = load i32, i32 addrspace(1)* %vaddr 107 %sub = sub i32 %vec, %b 108 store i32 %sub, i32 addrspace(1)* %out 109 ret void 110 } 111 112 ; CHECK: 'sub_i64' 113 ; CHECK: estimated cost of 2 for {{.*}} sub i64 114 define void @sub_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %vaddr, i64 %b) #0 { 115 %vec = load i64, i64 addrspace(1)* %vaddr 116 %sub = sub i64 %vec, %b 117 store i64 %sub, i64 addrspace(1)* %out 118 ret void 119 } 120 ; CHECK: 'sub_i16' 121 ; CHECK: estimated cost of 1 for {{.*}} sub i16 122 define void @sub_i16(i16 addrspace(1)* %out, i16 addrspace(1)* %vaddr, i16 %b) #0 { 123 %vec = load i16, i16 addrspace(1)* %vaddr 124 %sub = sub i16 %vec, %b 125 store i16 %sub, i16 addrspace(1)* %out 126 ret void 127 } 128 129 ; CHECK: 'sub_v2i16' 130 ; CHECK: estimated cost of 2 for {{.*}} sub <2 x i16> 131 define void @sub_v2i16(<2 x i16> addrspace(1)* %out, <2 x i16> addrspace(1)* %vaddr, <2 x i16> %b) #0 { 132 %vec = load <2 x i16>, <2 x i16> addrspace(1)* %vaddr 133 %sub = sub <2 x i16> %vec, %b 134 store <2 x i16> %sub, <2 x i16> addrspace(1)* %out 135 ret void 136 } 137 138 attributes #0 = { nounwind } 139