1 ; RUN: opt -mtriple=amdgcn-unknown-amdhsa -S -amdgpu-annotate-kernel-features < %s | FileCheck -check-prefix=HSA %s 2 3 declare i32 @llvm.amdgcn.workgroup.id.x() #0 4 declare i32 @llvm.amdgcn.workgroup.id.y() #0 5 declare i32 @llvm.amdgcn.workgroup.id.z() #0 6 7 declare i32 @llvm.amdgcn.workitem.id.x() #0 8 declare i32 @llvm.amdgcn.workitem.id.y() #0 9 declare i32 @llvm.amdgcn.workitem.id.z() #0 10 11 declare i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 12 declare i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 13 declare i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 14 15 ; HSA: define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 16 define amdgpu_kernel void @use_tgid_x(i32 addrspace(1)* %ptr) #1 { 17 %val = call i32 @llvm.amdgcn.workgroup.id.x() 18 store i32 %val, i32 addrspace(1)* %ptr 19 ret void 20 } 21 22 ; HSA: define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #2 { 23 define amdgpu_kernel void @use_tgid_y(i32 addrspace(1)* %ptr) #1 { 24 %val = call i32 @llvm.amdgcn.workgroup.id.y() 25 store i32 %val, i32 addrspace(1)* %ptr 26 ret void 27 } 28 29 ; HSA: define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #2 { 30 define amdgpu_kernel void @multi_use_tgid_y(i32 addrspace(1)* %ptr) #1 { 31 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 32 store volatile i32 %val0, i32 addrspace(1)* %ptr 33 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 34 store volatile i32 %val1, i32 addrspace(1)* %ptr 35 ret void 36 } 37 38 ; HSA: define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #2 { 39 define amdgpu_kernel void @use_tgid_x_y(i32 addrspace(1)* %ptr) #1 { 40 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 41 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 42 store volatile i32 %val0, i32 addrspace(1)* %ptr 43 store volatile i32 %val1, i32 addrspace(1)* %ptr 44 ret void 45 } 46 47 ; HSA: define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #3 { 48 define amdgpu_kernel void @use_tgid_z(i32 addrspace(1)* %ptr) #1 { 49 %val = call i32 @llvm.amdgcn.workgroup.id.z() 50 store i32 %val, i32 addrspace(1)* %ptr 51 ret void 52 } 53 54 ; HSA: define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #3 { 55 define amdgpu_kernel void @use_tgid_x_z(i32 addrspace(1)* %ptr) #1 { 56 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 57 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 58 store volatile i32 %val0, i32 addrspace(1)* %ptr 59 store volatile i32 %val1, i32 addrspace(1)* %ptr 60 ret void 61 } 62 63 ; HSA: define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #4 { 64 define amdgpu_kernel void @use_tgid_y_z(i32 addrspace(1)* %ptr) #1 { 65 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 66 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 67 store volatile i32 %val0, i32 addrspace(1)* %ptr 68 store volatile i32 %val1, i32 addrspace(1)* %ptr 69 ret void 70 } 71 72 ; HSA: define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #4 { 73 define amdgpu_kernel void @use_tgid_x_y_z(i32 addrspace(1)* %ptr) #1 { 74 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 75 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 76 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 77 store volatile i32 %val0, i32 addrspace(1)* %ptr 78 store volatile i32 %val1, i32 addrspace(1)* %ptr 79 store volatile i32 %val2, i32 addrspace(1)* %ptr 80 ret void 81 } 82 83 ; HSA: define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 84 define amdgpu_kernel void @use_tidig_x(i32 addrspace(1)* %ptr) #1 { 85 %val = call i32 @llvm.amdgcn.workitem.id.x() 86 store i32 %val, i32 addrspace(1)* %ptr 87 ret void 88 } 89 90 ; HSA: define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #5 { 91 define amdgpu_kernel void @use_tidig_y(i32 addrspace(1)* %ptr) #1 { 92 %val = call i32 @llvm.amdgcn.workitem.id.y() 93 store i32 %val, i32 addrspace(1)* %ptr 94 ret void 95 } 96 97 ; HSA: define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #6 { 98 define amdgpu_kernel void @use_tidig_z(i32 addrspace(1)* %ptr) #1 { 99 %val = call i32 @llvm.amdgcn.workitem.id.z() 100 store i32 %val, i32 addrspace(1)* %ptr 101 ret void 102 } 103 104 ; HSA: define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 105 define amdgpu_kernel void @use_tidig_x_tgid_x(i32 addrspace(1)* %ptr) #1 { 106 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 107 %val1 = call i32 @llvm.amdgcn.workgroup.id.x() 108 store volatile i32 %val0, i32 addrspace(1)* %ptr 109 store volatile i32 %val1, i32 addrspace(1)* %ptr 110 ret void 111 } 112 113 ; HSA: define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #7 { 114 define amdgpu_kernel void @use_tidig_y_tgid_y(i32 addrspace(1)* %ptr) #1 { 115 %val0 = call i32 @llvm.amdgcn.workitem.id.y() 116 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 117 store volatile i32 %val0, i32 addrspace(1)* %ptr 118 store volatile i32 %val1, i32 addrspace(1)* %ptr 119 ret void 120 } 121 122 ; HSA: define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #8 { 123 define amdgpu_kernel void @use_tidig_x_y_z(i32 addrspace(1)* %ptr) #1 { 124 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 125 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 126 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 127 store volatile i32 %val0, i32 addrspace(1)* %ptr 128 store volatile i32 %val1, i32 addrspace(1)* %ptr 129 store volatile i32 %val2, i32 addrspace(1)* %ptr 130 ret void 131 } 132 133 ; HSA: define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #9 { 134 define amdgpu_kernel void @use_all_workitems(i32 addrspace(1)* %ptr) #1 { 135 %val0 = call i32 @llvm.amdgcn.workitem.id.x() 136 %val1 = call i32 @llvm.amdgcn.workitem.id.y() 137 %val2 = call i32 @llvm.amdgcn.workitem.id.z() 138 %val3 = call i32 @llvm.amdgcn.workgroup.id.x() 139 %val4 = call i32 @llvm.amdgcn.workgroup.id.y() 140 %val5 = call i32 @llvm.amdgcn.workgroup.id.z() 141 store volatile i32 %val0, i32 addrspace(1)* %ptr 142 store volatile i32 %val1, i32 addrspace(1)* %ptr 143 store volatile i32 %val2, i32 addrspace(1)* %ptr 144 store volatile i32 %val3, i32 addrspace(1)* %ptr 145 store volatile i32 %val4, i32 addrspace(1)* %ptr 146 store volatile i32 %val5, i32 addrspace(1)* %ptr 147 ret void 148 } 149 150 ; HSA: define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #10 { 151 define amdgpu_kernel void @use_dispatch_ptr(i32 addrspace(1)* %ptr) #1 { 152 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() 153 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 154 %val = load i32, i32 addrspace(4)* %bc 155 store i32 %val, i32 addrspace(1)* %ptr 156 ret void 157 } 158 159 ; HSA: define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #11 { 160 define amdgpu_kernel void @use_queue_ptr(i32 addrspace(1)* %ptr) #1 { 161 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.queue.ptr() 162 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 163 %val = load i32, i32 addrspace(4)* %bc 164 store i32 %val, i32 addrspace(1)* %ptr 165 ret void 166 } 167 168 ; HSA: define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #12 { 169 define amdgpu_kernel void @use_kernarg_segment_ptr(i32 addrspace(1)* %ptr) #1 { 170 %dispatch.ptr = call i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() 171 %bc = bitcast i8 addrspace(4)* %dispatch.ptr to i32 addrspace(4)* 172 %val = load i32, i32 addrspace(4)* %bc 173 store i32 %val, i32 addrspace(1)* %ptr 174 ret void 175 } 176 177 ; HSA: define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #11 { 178 define amdgpu_kernel void @use_group_to_flat_addrspacecast(i32 addrspace(3)* %ptr) #1 { 179 %stof = addrspacecast i32 addrspace(3)* %ptr to i32* 180 store volatile i32 0, i32* %stof 181 ret void 182 } 183 184 ; HSA: define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #11 { 185 define amdgpu_kernel void @use_private_to_flat_addrspacecast(i32 addrspace(5)* %ptr) #1 { 186 %stof = addrspacecast i32 addrspace(5)* %ptr to i32* 187 store volatile i32 0, i32* %stof 188 ret void 189 } 190 191 ; HSA: define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 192 define amdgpu_kernel void @use_flat_to_group_addrspacecast(i32* %ptr) #1 { 193 %ftos = addrspacecast i32* %ptr to i32 addrspace(3)* 194 store volatile i32 0, i32 addrspace(3)* %ftos 195 ret void 196 } 197 198 ; HSA: define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 199 define amdgpu_kernel void @use_flat_to_private_addrspacecast(i32* %ptr) #1 { 200 %ftos = addrspacecast i32* %ptr to i32 addrspace(5)* 201 store volatile i32 0, i32 addrspace(5)* %ftos 202 ret void 203 } 204 205 ; No-op addrspacecast should not use queue ptr 206 ; HSA: define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 207 define amdgpu_kernel void @use_global_to_flat_addrspacecast(i32 addrspace(1)* %ptr) #1 { 208 %stof = addrspacecast i32 addrspace(1)* %ptr to i32* 209 store volatile i32 0, i32* %stof 210 ret void 211 } 212 213 ; HSA: define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 214 define amdgpu_kernel void @use_constant_to_flat_addrspacecast(i32 addrspace(4)* %ptr) #1 { 215 %stof = addrspacecast i32 addrspace(4)* %ptr to i32* 216 %ld = load volatile i32, i32* %stof 217 ret void 218 } 219 220 ; HSA: define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 221 define amdgpu_kernel void @use_flat_to_global_addrspacecast(i32* %ptr) #1 { 222 %ftos = addrspacecast i32* %ptr to i32 addrspace(1)* 223 store volatile i32 0, i32 addrspace(1)* %ftos 224 ret void 225 } 226 227 ; HSA: define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 228 define amdgpu_kernel void @use_flat_to_constant_addrspacecast(i32* %ptr) #1 { 229 %ftos = addrspacecast i32* %ptr to i32 addrspace(4)* 230 %ld = load volatile i32, i32 addrspace(4)* %ftos 231 ret void 232 } 233 234 attributes #0 = { nounwind readnone speculatable } 235 attributes #1 = { nounwind } 236 237 ; HSA: attributes #0 = { nounwind readnone speculatable } 238 ; HSA: attributes #1 = { nounwind } 239 ; HSA: attributes #2 = { nounwind "amdgpu-work-group-id-y" } 240 ; HSA: attributes #3 = { nounwind "amdgpu-work-group-id-z" } 241 ; HSA: attributes #4 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" } 242 ; HSA: attributes #5 = { nounwind "amdgpu-work-item-id-y" } 243 ; HSA: attributes #6 = { nounwind "amdgpu-work-item-id-z" } 244 ; HSA: attributes #7 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-item-id-y" } 245 ; HSA: attributes #8 = { nounwind "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 246 ; HSA: attributes #9 = { nounwind "amdgpu-work-group-id-y" "amdgpu-work-group-id-z" "amdgpu-work-item-id-y" "amdgpu-work-item-id-z" } 247 ; HSA: attributes #10 = { nounwind "amdgpu-dispatch-ptr" } 248 ; HSA: attributes #11 = { nounwind "amdgpu-queue-ptr" } 249 ; HSA: attributes #12 = { nounwind "amdgpu-kernarg-segment-ptr" } 250