1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s 2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s 3 4 ; GCN-LABEL: {{^}}use_dispatch_ptr: 5 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 6 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 7 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 8 define void @use_dispatch_ptr() #1 { 9 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 10 %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 11 %value = load volatile i32, i32 addrspace(4)* %header_ptr 12 ret void 13 } 14 15 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr: 16 ; GCN: enable_sgpr_dispatch_ptr = 1 17 ; GCN: s_mov_b64 s[6:7], s[4:5] 18 define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 { 19 call void @use_dispatch_ptr() 20 ret void 21 } 22 23 ; GCN-LABEL: {{^}}use_queue_ptr: 24 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 25 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 26 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 27 define void @use_queue_ptr() #1 { 28 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 29 %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 30 %value = load volatile i32, i32 addrspace(4)* %header_ptr 31 ret void 32 } 33 34 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr: 35 ; GCN: enable_sgpr_queue_ptr = 1 36 ; GCN: s_mov_b64 s[6:7], s[4:5] 37 ; GCN: s_swappc_b64 38 define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 { 39 call void @use_queue_ptr() 40 ret void 41 } 42 43 ; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast: 44 ; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1] 45 ; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]] 46 ; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16 47 ; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]] 48 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}} 49 ; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}} 50 define void @use_queue_ptr_addrspacecast() #1 { 51 %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32* 52 store volatile i32 0, i32* %asc 53 ret void 54 } 55 56 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast: 57 ; CIVI: enable_sgpr_queue_ptr = 1 58 59 ; CIVI: s_mov_b64 s[6:7], s[4:5] 60 ; GFX9-NOT: s_mov_b64 61 ; GCN: s_swappc_b64 62 define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 { 63 call void @use_queue_ptr_addrspacecast() 64 ret void 65 } 66 67 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr: 68 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 69 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 70 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 71 define void @use_kernarg_segment_ptr() #1 { 72 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 73 %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 74 %value = load volatile i32, i32 addrspace(4)* %header_ptr 75 ret void 76 } 77 78 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr: 79 ; GCN: enable_sgpr_kernarg_segment_ptr = 1 80 ; GCN: s_mov_b64 s[6:7], s[4:5] 81 ; GCN: s_swappc_b64 82 define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 { 83 call void @use_kernarg_segment_ptr() 84 ret void 85 } 86 87 ; GCN-LABEL: {{^}}use_dispatch_id: 88 ; GCN: ; use s[6:7] 89 define void @use_dispatch_id() #1 { 90 %id = call i64 @llvm.amdgcn.dispatch.id() 91 call void asm sideeffect "; use $0", "s"(i64 %id) 92 ret void 93 } 94 95 ; No kernarg segment so that there is a mov to check. With kernarg 96 ; pointer enabled, it happens to end up in the right place anyway. 97 98 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id: 99 ; GCN: enable_sgpr_dispatch_id = 1 100 101 ; GCN: s_mov_b64 s[6:7], s[4:5] 102 define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 { 103 call void @use_dispatch_id() 104 ret void 105 } 106 107 ; GCN-LABEL: {{^}}use_workgroup_id_x: 108 ; GCN: s_waitcnt 109 ; GCN: ; use s6 110 define void @use_workgroup_id_x() #1 { 111 %val = call i32 @llvm.amdgcn.workgroup.id.x() 112 call void asm sideeffect "; use $0", "s"(i32 %val) 113 ret void 114 } 115 116 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x: 117 ; GCN: s_waitcnt 118 ; GCN: s_mov_b32 s5, s32 119 ; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4 120 ; GCN: ; use s6 121 ; GCN: s_setpc_b64 122 define void @use_stack_workgroup_id_x() #1 { 123 %alloca = alloca i32, addrspace(5) 124 store volatile i32 0, i32 addrspace(5)* %alloca 125 %val = call i32 @llvm.amdgcn.workgroup.id.x() 126 call void asm sideeffect "; use $0", "s"(i32 %val) 127 ret void 128 } 129 130 ; GCN-LABEL: {{^}}use_workgroup_id_y: 131 ; GCN: s_waitcnt 132 ; GCN: ; use s6 133 define void @use_workgroup_id_y() #1 { 134 %val = call i32 @llvm.amdgcn.workgroup.id.y() 135 call void asm sideeffect "; use $0", "s"(i32 %val) 136 ret void 137 } 138 139 ; GCN-LABEL: {{^}}use_workgroup_id_z: 140 ; GCN: s_waitcnt 141 ; GCN: ; use s6 142 define void @use_workgroup_id_z() #1 { 143 %val = call i32 @llvm.amdgcn.workgroup.id.z() 144 call void asm sideeffect "; use $0", "s"(i32 %val) 145 ret void 146 } 147 148 ; GCN-LABEL: {{^}}use_workgroup_id_xy: 149 ; GCN: ; use s6 150 ; GCN: ; use s7 151 define void @use_workgroup_id_xy() #1 { 152 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 153 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 154 call void asm sideeffect "; use $0", "s"(i32 %val0) 155 call void asm sideeffect "; use $0", "s"(i32 %val1) 156 ret void 157 } 158 159 ; GCN-LABEL: {{^}}use_workgroup_id_xyz: 160 ; GCN: ; use s6 161 ; GCN: ; use s7 162 ; GCN: ; use s8 163 define void @use_workgroup_id_xyz() #1 { 164 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 165 %val1 = call i32 @llvm.amdgcn.workgroup.id.y() 166 %val2 = call i32 @llvm.amdgcn.workgroup.id.z() 167 call void asm sideeffect "; use $0", "s"(i32 %val0) 168 call void asm sideeffect "; use $0", "s"(i32 %val1) 169 call void asm sideeffect "; use $0", "s"(i32 %val2) 170 ret void 171 } 172 173 ; GCN-LABEL: {{^}}use_workgroup_id_xz: 174 ; GCN: ; use s6 175 ; GCN: ; use s7 176 define void @use_workgroup_id_xz() #1 { 177 %val0 = call i32 @llvm.amdgcn.workgroup.id.x() 178 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 179 call void asm sideeffect "; use $0", "s"(i32 %val0) 180 call void asm sideeffect "; use $0", "s"(i32 %val1) 181 ret void 182 } 183 184 ; GCN-LABEL: {{^}}use_workgroup_id_yz: 185 ; GCN: ; use s6 186 ; GCN: ; use s7 187 define void @use_workgroup_id_yz() #1 { 188 %val0 = call i32 @llvm.amdgcn.workgroup.id.y() 189 %val1 = call i32 @llvm.amdgcn.workgroup.id.z() 190 call void asm sideeffect "; use $0", "s"(i32 %val0) 191 call void asm sideeffect "; use $0", "s"(i32 %val1) 192 ret void 193 } 194 195 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x: 196 ; GCN: enable_sgpr_workgroup_id_x = 1 197 ; GCN: enable_sgpr_workgroup_id_y = 0 198 ; GCN: enable_sgpr_workgroup_id_z = 0 199 200 ; GCN-NOT: s6 201 ; GCN: s_mov_b32 s33, s7 202 ; GCN-NOT: s6 203 ; GCN: s_mov_b32 s4, s33 204 ; GCN-NOT: s6 205 ; GCN: s_mov_b32 s32, s33 206 ; GCN: s_swappc_b64 207 define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 { 208 call void @use_workgroup_id_x() 209 ret void 210 } 211 212 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y: 213 ; GCN: enable_sgpr_workgroup_id_x = 1 214 ; GCN: enable_sgpr_workgroup_id_y = 1 215 ; GCN: enable_sgpr_workgroup_id_z = 0 216 217 ; GCN: s_mov_b32 s33, s8 218 ; GCN-DAG: s_mov_b32 s4, s33 219 ; GCN-DAG: s_mov_b32 s6, s7 220 ; GCN: s_mov_b32 s32, s33 221 ; GCN: s_swappc_b64 222 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 { 223 call void @use_workgroup_id_y() 224 ret void 225 } 226 227 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z: 228 ; GCN: enable_sgpr_workgroup_id_x = 1 229 ; GCN: enable_sgpr_workgroup_id_y = 0 230 ; GCN: enable_sgpr_workgroup_id_z = 1 231 232 ; GCN: s_mov_b32 s33, s8 233 ; GCN-DAG: s_mov_b32 s4, s33 234 ; GCN-DAG: s_mov_b32 s6, s7 235 ; GCN: s_swappc_b64 236 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 { 237 call void @use_workgroup_id_z() 238 ret void 239 } 240 241 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy: 242 ; GCN: enable_sgpr_workgroup_id_x = 1 243 ; GCN: enable_sgpr_workgroup_id_y = 1 244 ; GCN: enable_sgpr_workgroup_id_z = 0 245 246 ; GCN: s_mov_b32 s33, s8 247 ; GCN-NOT: s6 248 ; GCN-NOT: s7 249 ; GCN: s_mov_b32 s4, s33 250 ; GCN-NOT: s6 251 ; GCN-NOT: s7 252 ; GCN: s_mov_b32 s32, s33 253 ; GCN-NOT: s6 254 ; GCN-NOT: s7 255 ; GCN: s_swappc_b64 256 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 { 257 call void @use_workgroup_id_xy() 258 ret void 259 } 260 261 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz: 262 ; GCN: enable_sgpr_workgroup_id_x = 1 263 ; GCN: enable_sgpr_workgroup_id_y = 1 264 ; GCN: enable_sgpr_workgroup_id_z = 1 265 266 ; GCN: s_mov_b32 s33, s9 267 268 ; GCN-NOT: s6 269 ; GCN-NOT: s7 270 ; GCN-NOT: s8 271 272 ; GCN: s_mov_b32 s4, s33 273 274 ; GCN-NOT: s6 275 ; GCN-NOT: s7 276 ; GCN-NOT: s8 277 278 ; GCN: s_mov_b32 s32, s33 279 280 ; GCN-NOT: s6 281 ; GCN-NOT: s7 282 ; GCN-NOT: s8 283 284 ; GCN: s_swappc_b64 285 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 { 286 call void @use_workgroup_id_xyz() 287 ret void 288 } 289 290 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz: 291 ; GCN: enable_sgpr_workgroup_id_x = 1 292 ; GCN: enable_sgpr_workgroup_id_y = 0 293 ; GCN: enable_sgpr_workgroup_id_z = 1 294 295 ; GCN: s_mov_b32 s33, s8 296 ; GCN-NOT: s6 297 ; GCN-NOT: s7 298 299 ; GCN: s_mov_b32 s4, s33 300 ; GCN-NOT: s6 301 ; GCN-NOT: s7 302 303 ; GCN: s_mov_b32 s32, s33 304 ; GCN-NOT: s6 305 ; GCN-NOT: s7 306 307 ; GCN: s_swappc_b64 308 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 { 309 call void @use_workgroup_id_xz() 310 ret void 311 } 312 313 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz: 314 ; GCN: enable_sgpr_workgroup_id_x = 1 315 ; GCN: enable_sgpr_workgroup_id_y = 1 316 ; GCN: enable_sgpr_workgroup_id_z = 1 317 318 ; GCN: s_mov_b32 s33, s9 319 ; GCN: s_mov_b32 s6, s7 320 ; GCN: s_mov_b32 s4, s33 321 ; GCN: s_mov_b32 s7, s8 322 ; GCN: s_mov_b32 s32, s33 323 ; GCN: s_swappc_b64 324 define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 { 325 call void @use_workgroup_id_yz() 326 ret void 327 } 328 329 ; Argument is in right place already 330 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x: 331 ; GCN-NOT: s6 332 define void @func_indirect_use_workgroup_id_x() #1 { 333 call void @use_workgroup_id_x() 334 ret void 335 } 336 337 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y: 338 ; GCN-NOT: s6 339 define void @func_indirect_use_workgroup_id_y() #1 { 340 call void @use_workgroup_id_y() 341 ret void 342 } 343 344 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z: 345 ; GCN-NOT: s6 346 define void @func_indirect_use_workgroup_id_z() #1 { 347 call void @use_workgroup_id_z() 348 ret void 349 } 350 351 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x: 352 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 353 ; GCN: ; use s6 354 define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 { 355 %val = call i32 @llvm.amdgcn.workgroup.id.x() 356 store volatile i32 %arg0, i32 addrspace(1)* undef 357 call void asm sideeffect "; use $0", "s"(i32 %val) 358 ret void 359 } 360 361 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y: 362 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 363 ; GCN: ; use s6 364 define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 { 365 %val = call i32 @llvm.amdgcn.workgroup.id.y() 366 store volatile i32 %arg0, i32 addrspace(1)* undef 367 call void asm sideeffect "; use $0", "s"(i32 %val) 368 ret void 369 } 370 371 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z: 372 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 373 ; GCN: ; use s6 374 define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 { 375 %val = call i32 @llvm.amdgcn.workgroup.id.z() 376 store volatile i32 %arg0, i32 addrspace(1)* undef 377 call void asm sideeffect "; use $0", "s"(i32 %val) 378 ret void 379 } 380 381 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x: 382 ; GCN: enable_sgpr_workgroup_id_x = 1 383 ; GCN: enable_sgpr_workgroup_id_y = 0 384 ; GCN: enable_sgpr_workgroup_id_z = 0 385 386 ; GCN-DAG: s_mov_b32 s33, s7 387 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b 388 389 ; GCN-NOT: s6 390 ; GCN: s_mov_b32 s4, s33 391 ; GCN-NOT: s6 392 ; GCN-DAG: s_mov_b32 s32, s33 393 ; GCN: s_swappc_b64 394 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 { 395 call void @other_arg_use_workgroup_id_x(i32 555) 396 ret void 397 } 398 399 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y: 400 ; GCN: enable_sgpr_workgroup_id_x = 1 401 ; GCN: enable_sgpr_workgroup_id_y = 1 402 ; GCN: enable_sgpr_workgroup_id_z = 0 403 404 ; GCN-DAG: s_mov_b32 s33, s8 405 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b 406 ; GCN-DAG: s_mov_b32 s4, s33 407 ; GCN-DAG: s_mov_b32 s6, s7 408 ; GCN-DAG: s_mov_b32 s32, s33 409 ; GCN: s_swappc_b64 410 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 { 411 call void @other_arg_use_workgroup_id_y(i32 555) 412 ret void 413 } 414 415 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z: 416 ; GCN: enable_sgpr_workgroup_id_x = 1 417 ; GCN: enable_sgpr_workgroup_id_y = 0 418 ; GCN: enable_sgpr_workgroup_id_z = 1 419 420 ; GCN: s_mov_b32 s33, s8 421 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b 422 ; GCN-DAG: s_mov_b32 s4, s33 423 ; GCN-DAG: s_mov_b32 s6, s7 424 425 ; GCN: s_mov_b32 s32, s33 426 ; GCN: s_swappc_b64 427 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 { 428 call void @other_arg_use_workgroup_id_z(i32 555) 429 ret void 430 } 431 432 ; GCN-LABEL: {{^}}use_every_sgpr_input: 433 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 434 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6 435 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7 436 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 437 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8 438 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9 439 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 440 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10 441 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11 442 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 443 ; GCN: ; use s[12:13] 444 ; GCN: ; use s14 445 ; GCN: ; use s15 446 ; GCN: ; use s16 447 define void @use_every_sgpr_input() #1 { 448 %alloca = alloca i32, align 4, addrspace(5) 449 store volatile i32 0, i32 addrspace(5)* %alloca 450 451 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 452 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 453 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 454 455 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 456 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 457 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 458 459 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 460 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 461 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 462 463 %val3 = call i64 @llvm.amdgcn.dispatch.id() 464 call void asm sideeffect "; use $0", "s"(i64 %val3) 465 466 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 467 call void asm sideeffect "; use $0", "s"(i32 %val4) 468 469 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 470 call void asm sideeffect "; use $0", "s"(i32 %val5) 471 472 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 473 call void asm sideeffect "; use $0", "s"(i32 %val6) 474 475 ret void 476 } 477 478 ; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input: 479 ; GCN: enable_sgpr_workgroup_id_x = 1 480 ; GCN: enable_sgpr_workgroup_id_y = 1 481 ; GCN: enable_sgpr_workgroup_id_z = 1 482 ; GCN: enable_sgpr_workgroup_info = 0 483 484 ; GCN: enable_sgpr_private_segment_buffer = 1 485 ; GCN: enable_sgpr_dispatch_ptr = 1 486 ; GCN: enable_sgpr_queue_ptr = 1 487 ; GCN: enable_sgpr_kernarg_segment_ptr = 1 488 ; GCN: enable_sgpr_dispatch_id = 1 489 ; GCN: enable_sgpr_flat_scratch_init = 1 490 491 ; GCN: s_mov_b32 s33, s17 492 ; GCN: s_mov_b64 s[12:13], s[10:11] 493 ; GCN: s_mov_b64 s[10:11], s[8:9] 494 ; GCN: s_mov_b64 s[8:9], s[6:7] 495 ; GCN: s_mov_b64 s[6:7], s[4:5] 496 ; GCN: s_mov_b32 s4, s33 497 ; GCN: s_mov_b32 s32, s33 498 ; GCN: s_swappc_b64 499 define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 { 500 call void @use_every_sgpr_input() 501 ret void 502 } 503 504 ; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input: 505 ; GCN-NOT: s6 506 ; GCN-NOT: s7 507 ; GCN-NOT: s8 508 ; GCN-NOT: s9 509 ; GCN-NOT: s10 510 ; GCN-NOT: s11 511 ; GCN-NOT: s12 512 ; GCN-NOT: s13 513 ; GCN-NOT: s[6:7] 514 ; GCN-NOT: s[8:9] 515 ; GCN-NOT: s[10:11] 516 ; GCN-NOT: s[12:13] 517 define void @func_indirect_use_every_sgpr_input() #1 { 518 call void @use_every_sgpr_input() 519 ret void 520 } 521 522 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz: 523 ; GCN-DAG: s_mov_b32 s6, s14 524 ; GCN-DAG: s_mov_b32 s7, s15 525 ; GCN-DAG: s_mov_b32 s8, s16 526 ; GCN: s_swappc_b64 527 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 { 528 %alloca = alloca i32, align 4, addrspace(5) 529 store volatile i32 0, i32 addrspace(5)* %alloca 530 531 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 532 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 533 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 534 535 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 536 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 537 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 538 539 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 540 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 541 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 542 543 %val3 = call i64 @llvm.amdgcn.dispatch.id() 544 call void asm sideeffect "; use $0", "s"(i64 %val3) 545 546 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 547 call void asm sideeffect "; use $0", "s"(i32 %val4) 548 549 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 550 call void asm sideeffect "; use $0", "s"(i32 %val5) 551 552 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 553 call void asm sideeffect "; use $0", "s"(i32 %val6) 554 555 call void @use_workgroup_id_xyz() 556 ret void 557 } 558 559 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill: 560 ; GCN: s_mov_b32 s5, s32 561 ; GCN: s_add_u32 s32, s32, 0x400 562 563 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14 564 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15 565 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16 566 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7] 567 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9] 568 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11] 569 570 ; GCN-DAG: s_mov_b32 s6, s14 571 ; GCN-DAG: s_mov_b32 s7, s15 572 ; GCN-DAG: s_mov_b32 s8, s16 573 574 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7] 575 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9] 576 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11] 577 578 ; GCN: s_swappc_b64 579 580 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4 581 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]] 582 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]] 583 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 584 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]] 585 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]] 586 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 587 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]] 588 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]] 589 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 590 ; GCN: ; use 591 ; GCN: ; use [[SAVE_X]] 592 ; GCN: ; use [[SAVE_Y]] 593 ; GCN: ; use [[SAVE_Z]] 594 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 { 595 %alloca = alloca i32, align 4, addrspace(5) 596 call void @use_workgroup_id_xyz() 597 598 store volatile i32 0, i32 addrspace(5)* %alloca 599 600 %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 601 %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)* 602 %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc 603 604 %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 605 %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)* 606 %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc 607 608 %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 609 %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)* 610 %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc 611 612 %val3 = call i64 @llvm.amdgcn.dispatch.id() 613 call void asm sideeffect "; use $0", "s"(i64 %val3) 614 615 %val4 = call i32 @llvm.amdgcn.workgroup.id.x() 616 call void asm sideeffect "; use $0", "s"(i32 %val4) 617 618 %val5 = call i32 @llvm.amdgcn.workgroup.id.y() 619 call void asm sideeffect "; use $0", "s"(i32 %val5) 620 621 %val6 = call i32 @llvm.amdgcn.workgroup.id.z() 622 call void asm sideeffect "; use $0", "s"(i32 %val6) 623 624 ret void 625 } 626 627 declare i32 @llvm.amdgcn.workgroup.id.x() #0 628 declare i32 @llvm.amdgcn.workgroup.id.y() #0 629 declare i32 @llvm.amdgcn.workgroup.id.z() #0 630 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0 631 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0 632 declare i64 @llvm.amdgcn.dispatch.id() #0 633 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0 634 635 attributes #0 = { nounwind readnone speculatable } 636 attributes #1 = { nounwind noinline } 637