Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,CIVI %s
      2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -enable-var-scope -check-prefixes=GCN,GFX9 %s
      3 
      4 ; GCN-LABEL: {{^}}use_dispatch_ptr:
      5 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
      6 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
      7 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
      8 define void @use_dispatch_ptr() #1 {
      9   %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
     10   %header_ptr = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
     11   %value = load volatile i32, i32 addrspace(4)* %header_ptr
     12   ret void
     13 }
     14 
     15 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_ptr:
     16 ; GCN: enable_sgpr_dispatch_ptr = 1
     17 ; GCN: s_mov_b64 s[6:7], s[4:5]
     18 define amdgpu_kernel void @kern_indirect_use_dispatch_ptr(i32) #1 {
     19   call void @use_dispatch_ptr()
     20   ret void
     21 }
     22 
     23 ; GCN-LABEL: {{^}}use_queue_ptr:
     24 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
     25 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
     26 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
     27 define void @use_queue_ptr() #1 {
     28   %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
     29   %header_ptr = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
     30   %value = load volatile i32, i32 addrspace(4)* %header_ptr
     31   ret void
     32 }
     33 
     34 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr:
     35 ; GCN: enable_sgpr_queue_ptr = 1
     36 ; GCN: s_mov_b64 s[6:7], s[4:5]
     37 ; GCN: s_swappc_b64
     38 define amdgpu_kernel void @kern_indirect_use_queue_ptr(i32) #1 {
     39   call void @use_queue_ptr()
     40   ret void
     41 }
     42 
     43 ; GCN-LABEL: {{^}}use_queue_ptr_addrspacecast:
     44 ; CIVI: flat_load_dword v[[HI:[0-9]+]], v[0:1]
     45 ; GFX9: s_getreg_b32 [[APERTURE_LOAD:s[0-9]+]]
     46 ; CIVI: v_mov_b32_e32 v[[LO:[0-9]+]], 16
     47 ; GFX9: v_mov_b32_e32 v[[HI:[0-9]+]], [[APERTURE_LOAD]]
     48 ; GFX9: {{flat|global}}_store_dword v{{\[[0-9]+}}:[[HI]]{{\]}}
     49 ; CIVI: {{flat|global}}_store_dword v{{\[}}[[LO]]:[[HI]]{{\]}}
     50 define void @use_queue_ptr_addrspacecast() #1 {
     51   %asc = addrspacecast i32 addrspace(3)* inttoptr (i32 16 to i32 addrspace(3)*) to i32*
     52   store volatile i32 0, i32* %asc
     53   ret void
     54 }
     55 
     56 ; GCN-LABEL: {{^}}kern_indirect_use_queue_ptr_addrspacecast:
     57 ; CIVI: enable_sgpr_queue_ptr = 1
     58 
     59 ; CIVI: s_mov_b64 s[6:7], s[4:5]
     60 ; GFX9-NOT: s_mov_b64
     61 ; GCN: s_swappc_b64
     62 define amdgpu_kernel void @kern_indirect_use_queue_ptr_addrspacecast(i32) #1 {
     63   call void @use_queue_ptr_addrspacecast()
     64   ret void
     65 }
     66 
     67 ; GCN-LABEL: {{^}}use_kernarg_segment_ptr:
     68 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
     69 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
     70 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
     71 define void @use_kernarg_segment_ptr() #1 {
     72   %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
     73   %header_ptr = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
     74   %value = load volatile i32, i32 addrspace(4)* %header_ptr
     75   ret void
     76 }
     77 
     78 ; GCN-LABEL: {{^}}kern_indirect_use_kernarg_segment_ptr:
     79 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
     80 ; GCN: s_mov_b64 s[6:7], s[4:5]
     81 ; GCN: s_swappc_b64
     82 define amdgpu_kernel void @kern_indirect_use_kernarg_segment_ptr(i32) #1 {
     83   call void @use_kernarg_segment_ptr()
     84   ret void
     85 }
     86 
     87 ; GCN-LABEL: {{^}}use_dispatch_id:
     88 ; GCN: ; use s[6:7]
     89 define void @use_dispatch_id() #1 {
     90   %id = call i64 @llvm.amdgcn.dispatch.id()
     91   call void asm sideeffect "; use $0", "s"(i64 %id)
     92   ret void
     93 }
     94 
     95 ; No kernarg segment so that there is a mov to check. With kernarg
     96 ; pointer enabled, it happens to end up in the right place anyway.
     97 
     98 ; GCN-LABEL: {{^}}kern_indirect_use_dispatch_id:
     99 ; GCN: enable_sgpr_dispatch_id = 1
    100 
    101 ; GCN: s_mov_b64 s[6:7], s[4:5]
    102 define amdgpu_kernel void @kern_indirect_use_dispatch_id() #1 {
    103   call void @use_dispatch_id()
    104   ret void
    105 }
    106 
    107 ; GCN-LABEL: {{^}}use_workgroup_id_x:
    108 ; GCN: s_waitcnt
    109 ; GCN: ; use s6
    110 define void @use_workgroup_id_x() #1 {
    111   %val = call i32 @llvm.amdgcn.workgroup.id.x()
    112   call void asm sideeffect "; use $0", "s"(i32 %val)
    113   ret void
    114 }
    115 
    116 ; GCN-LABEL: {{^}}use_stack_workgroup_id_x:
    117 ; GCN: s_waitcnt
    118 ; GCN: s_mov_b32 s5, s32
    119 ; GCN: buffer_store_dword v0, off, s[0:3], s5 offset:4
    120 ; GCN: ; use s6
    121 ; GCN: s_setpc_b64
    122 define void @use_stack_workgroup_id_x() #1 {
    123   %alloca = alloca i32, addrspace(5)
    124   store volatile i32 0, i32 addrspace(5)* %alloca
    125   %val = call i32 @llvm.amdgcn.workgroup.id.x()
    126   call void asm sideeffect "; use $0", "s"(i32 %val)
    127   ret void
    128 }
    129 
    130 ; GCN-LABEL: {{^}}use_workgroup_id_y:
    131 ; GCN: s_waitcnt
    132 ; GCN: ; use s6
    133 define void @use_workgroup_id_y() #1 {
    134   %val = call i32 @llvm.amdgcn.workgroup.id.y()
    135   call void asm sideeffect "; use $0", "s"(i32 %val)
    136   ret void
    137 }
    138 
    139 ; GCN-LABEL: {{^}}use_workgroup_id_z:
    140 ; GCN: s_waitcnt
    141 ; GCN: ; use s6
    142 define void @use_workgroup_id_z() #1 {
    143   %val = call i32 @llvm.amdgcn.workgroup.id.z()
    144   call void asm sideeffect "; use $0", "s"(i32 %val)
    145   ret void
    146 }
    147 
    148 ; GCN-LABEL: {{^}}use_workgroup_id_xy:
    149 ; GCN: ; use s6
    150 ; GCN: ; use s7
    151 define void @use_workgroup_id_xy() #1 {
    152   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
    153   %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
    154   call void asm sideeffect "; use $0", "s"(i32 %val0)
    155   call void asm sideeffect "; use $0", "s"(i32 %val1)
    156   ret void
    157 }
    158 
    159 ; GCN-LABEL: {{^}}use_workgroup_id_xyz:
    160 ; GCN: ; use s6
    161 ; GCN: ; use s7
    162 ; GCN: ; use s8
    163 define void @use_workgroup_id_xyz() #1 {
    164   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
    165   %val1 = call i32 @llvm.amdgcn.workgroup.id.y()
    166   %val2 = call i32 @llvm.amdgcn.workgroup.id.z()
    167   call void asm sideeffect "; use $0", "s"(i32 %val0)
    168   call void asm sideeffect "; use $0", "s"(i32 %val1)
    169   call void asm sideeffect "; use $0", "s"(i32 %val2)
    170   ret void
    171 }
    172 
    173 ; GCN-LABEL: {{^}}use_workgroup_id_xz:
    174 ; GCN: ; use s6
    175 ; GCN: ; use s7
    176 define void @use_workgroup_id_xz() #1 {
    177   %val0 = call i32 @llvm.amdgcn.workgroup.id.x()
    178   %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
    179   call void asm sideeffect "; use $0", "s"(i32 %val0)
    180   call void asm sideeffect "; use $0", "s"(i32 %val1)
    181   ret void
    182 }
    183 
    184 ; GCN-LABEL: {{^}}use_workgroup_id_yz:
    185 ; GCN: ; use s6
    186 ; GCN: ; use s7
    187 define void @use_workgroup_id_yz() #1 {
    188   %val0 = call i32 @llvm.amdgcn.workgroup.id.y()
    189   %val1 = call i32 @llvm.amdgcn.workgroup.id.z()
    190   call void asm sideeffect "; use $0", "s"(i32 %val0)
    191   call void asm sideeffect "; use $0", "s"(i32 %val1)
    192   ret void
    193 }
    194 
    195 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_x:
    196 ; GCN: enable_sgpr_workgroup_id_x = 1
    197 ; GCN: enable_sgpr_workgroup_id_y = 0
    198 ; GCN: enable_sgpr_workgroup_id_z = 0
    199 
    200 ; GCN-NOT: s6
    201 ; GCN: s_mov_b32 s33, s7
    202 ; GCN-NOT: s6
    203 ; GCN: s_mov_b32 s4, s33
    204 ; GCN-NOT: s6
    205 ; GCN: s_mov_b32 s32, s33
    206 ; GCN: s_swappc_b64
    207 define amdgpu_kernel void @kern_indirect_use_workgroup_id_x() #1 {
    208   call void @use_workgroup_id_x()
    209   ret void
    210 }
    211 
    212 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_y:
    213 ; GCN: enable_sgpr_workgroup_id_x = 1
    214 ; GCN: enable_sgpr_workgroup_id_y = 1
    215 ; GCN: enable_sgpr_workgroup_id_z = 0
    216 
    217 ; GCN: s_mov_b32 s33, s8
    218 ; GCN-DAG: s_mov_b32 s4, s33
    219 ; GCN-DAG: s_mov_b32 s6, s7
    220 ; GCN: s_mov_b32 s32, s33
    221 ; GCN: s_swappc_b64
    222 define amdgpu_kernel void @kern_indirect_use_workgroup_id_y() #1 {
    223   call void @use_workgroup_id_y()
    224   ret void
    225 }
    226 
    227 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_z:
    228 ; GCN: enable_sgpr_workgroup_id_x = 1
    229 ; GCN: enable_sgpr_workgroup_id_y = 0
    230 ; GCN: enable_sgpr_workgroup_id_z = 1
    231 
    232 ; GCN: s_mov_b32 s33, s8
    233 ; GCN-DAG: s_mov_b32 s4, s33
    234 ; GCN-DAG: s_mov_b32 s6, s7
    235 ; GCN: s_swappc_b64
    236 define amdgpu_kernel void @kern_indirect_use_workgroup_id_z() #1 {
    237   call void @use_workgroup_id_z()
    238   ret void
    239 }
    240 
    241 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xy:
    242 ; GCN: enable_sgpr_workgroup_id_x = 1
    243 ; GCN: enable_sgpr_workgroup_id_y = 1
    244 ; GCN: enable_sgpr_workgroup_id_z = 0
    245 
    246 ; GCN: s_mov_b32 s33, s8
    247 ; GCN-NOT: s6
    248 ; GCN-NOT: s7
    249 ; GCN: s_mov_b32 s4, s33
    250 ; GCN-NOT: s6
    251 ; GCN-NOT: s7
    252 ; GCN: s_mov_b32 s32, s33
    253 ; GCN-NOT: s6
    254 ; GCN-NOT: s7
    255 ; GCN: s_swappc_b64
    256 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xy() #1 {
    257   call void @use_workgroup_id_xy()
    258   ret void
    259 }
    260 
    261 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xyz:
    262 ; GCN: enable_sgpr_workgroup_id_x = 1
    263 ; GCN: enable_sgpr_workgroup_id_y = 1
    264 ; GCN: enable_sgpr_workgroup_id_z = 1
    265 
    266 ; GCN: s_mov_b32 s33, s9
    267 
    268 ; GCN-NOT: s6
    269 ; GCN-NOT: s7
    270 ; GCN-NOT: s8
    271 
    272 ; GCN: s_mov_b32 s4, s33
    273 
    274 ; GCN-NOT: s6
    275 ; GCN-NOT: s7
    276 ; GCN-NOT: s8
    277 
    278 ; GCN: s_mov_b32 s32, s33
    279 
    280 ; GCN-NOT: s6
    281 ; GCN-NOT: s7
    282 ; GCN-NOT: s8
    283 
    284 ; GCN: s_swappc_b64
    285 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xyz() #1 {
    286   call void @use_workgroup_id_xyz()
    287   ret void
    288 }
    289 
    290 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_xz:
    291 ; GCN: enable_sgpr_workgroup_id_x = 1
    292 ; GCN: enable_sgpr_workgroup_id_y = 0
    293 ; GCN: enable_sgpr_workgroup_id_z = 1
    294 
    295 ; GCN: s_mov_b32 s33, s8
    296 ; GCN-NOT: s6
    297 ; GCN-NOT: s7
    298 
    299 ; GCN: s_mov_b32 s4, s33
    300 ; GCN-NOT: s6
    301 ; GCN-NOT: s7
    302 
    303 ; GCN: s_mov_b32 s32, s33
    304 ; GCN-NOT: s6
    305 ; GCN-NOT: s7
    306 
    307 ; GCN: s_swappc_b64
    308 define amdgpu_kernel void @kern_indirect_use_workgroup_id_xz() #1 {
    309   call void @use_workgroup_id_xz()
    310   ret void
    311 }
    312 
    313 ; GCN-LABEL: {{^}}kern_indirect_use_workgroup_id_yz:
    314 ; GCN: enable_sgpr_workgroup_id_x = 1
    315 ; GCN: enable_sgpr_workgroup_id_y = 1
    316 ; GCN: enable_sgpr_workgroup_id_z = 1
    317 
    318 ; GCN: s_mov_b32 s33, s9
    319 ; GCN: s_mov_b32 s6, s7
    320 ; GCN: s_mov_b32 s4, s33
    321 ; GCN: s_mov_b32 s7, s8
    322 ; GCN: s_mov_b32 s32, s33
    323 ; GCN: s_swappc_b64
    324 define amdgpu_kernel void @kern_indirect_use_workgroup_id_yz() #1 {
    325   call void @use_workgroup_id_yz()
    326   ret void
    327 }
    328 
    329 ; Argument is in right place already
    330 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_x:
    331 ; GCN-NOT: s6
    332 define void @func_indirect_use_workgroup_id_x() #1 {
    333   call void @use_workgroup_id_x()
    334   ret void
    335 }
    336 
    337 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_y:
    338 ; GCN-NOT: s6
    339 define void @func_indirect_use_workgroup_id_y() #1 {
    340   call void @use_workgroup_id_y()
    341   ret void
    342 }
    343 
    344 ; GCN-LABEL: {{^}}func_indirect_use_workgroup_id_z:
    345 ; GCN-NOT: s6
    346 define void @func_indirect_use_workgroup_id_z() #1 {
    347   call void @use_workgroup_id_z()
    348   ret void
    349 }
    350 
    351 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_x:
    352 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    353 ; GCN: ; use s6
    354 define void @other_arg_use_workgroup_id_x(i32 %arg0) #1 {
    355   %val = call i32 @llvm.amdgcn.workgroup.id.x()
    356   store volatile i32 %arg0, i32 addrspace(1)* undef
    357   call void asm sideeffect "; use $0", "s"(i32 %val)
    358   ret void
    359 }
    360 
    361 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_y:
    362 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    363 ; GCN: ; use s6
    364 define void @other_arg_use_workgroup_id_y(i32 %arg0) #1 {
    365   %val = call i32 @llvm.amdgcn.workgroup.id.y()
    366   store volatile i32 %arg0, i32 addrspace(1)* undef
    367   call void asm sideeffect "; use $0", "s"(i32 %val)
    368   ret void
    369 }
    370 
    371 ; GCN-LABEL: {{^}}other_arg_use_workgroup_id_z:
    372 ; GCN: {{flat|global}}_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    373 ; GCN: ; use s6
    374 define void @other_arg_use_workgroup_id_z(i32 %arg0) #1 {
    375   %val = call i32 @llvm.amdgcn.workgroup.id.z()
    376   store volatile i32 %arg0, i32 addrspace(1)* undef
    377   call void asm sideeffect "; use $0", "s"(i32 %val)
    378   ret void
    379 }
    380 
    381 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_x:
    382 ; GCN: enable_sgpr_workgroup_id_x = 1
    383 ; GCN: enable_sgpr_workgroup_id_y = 0
    384 ; GCN: enable_sgpr_workgroup_id_z = 0
    385 
    386 ; GCN-DAG: s_mov_b32 s33, s7
    387 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
    388 
    389 ; GCN-NOT: s6
    390 ; GCN: s_mov_b32 s4, s33
    391 ; GCN-NOT: s6
    392 ; GCN-DAG: s_mov_b32 s32, s33
    393 ; GCN: s_swappc_b64
    394 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_x() #1 {
    395   call void @other_arg_use_workgroup_id_x(i32 555)
    396   ret void
    397 }
    398 
    399 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_y:
    400 ; GCN: enable_sgpr_workgroup_id_x = 1
    401 ; GCN: enable_sgpr_workgroup_id_y = 1
    402 ; GCN: enable_sgpr_workgroup_id_z = 0
    403 
    404 ; GCN-DAG: s_mov_b32 s33, s8
    405 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
    406 ; GCN-DAG: s_mov_b32 s4, s33
    407 ; GCN-DAG: s_mov_b32 s6, s7
    408 ; GCN-DAG: s_mov_b32 s32, s33
    409 ; GCN: s_swappc_b64
    410 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_y() #1 {
    411   call void @other_arg_use_workgroup_id_y(i32 555)
    412   ret void
    413 }
    414 
    415 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workgroup_id_z:
    416 ; GCN: enable_sgpr_workgroup_id_x = 1
    417 ; GCN: enable_sgpr_workgroup_id_y = 0
    418 ; GCN: enable_sgpr_workgroup_id_z = 1
    419 
    420 ; GCN: s_mov_b32 s33, s8
    421 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
    422 ; GCN-DAG: s_mov_b32 s4, s33
    423 ; GCN-DAG: s_mov_b32 s6, s7
    424 
    425 ; GCN: s_mov_b32 s32, s33
    426 ; GCN: s_swappc_b64
    427 define amdgpu_kernel void @kern_indirect_other_arg_use_workgroup_id_z() #1 {
    428   call void @other_arg_use_workgroup_id_z(i32 555)
    429   ret void
    430 }
    431 
    432 ; GCN-LABEL: {{^}}use_every_sgpr_input:
    433 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
    434 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s6
    435 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s7
    436 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    437 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s8
    438 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s9
    439 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    440 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s10
    441 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s11
    442 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    443 ; GCN: ; use s[12:13]
    444 ; GCN: ; use s14
    445 ; GCN: ; use s15
    446 ; GCN: ; use s16
    447 define void @use_every_sgpr_input() #1 {
    448   %alloca = alloca i32, align 4, addrspace(5)
    449   store volatile i32 0, i32 addrspace(5)* %alloca
    450 
    451   %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
    452   %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
    453   %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
    454 
    455   %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
    456   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
    457   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
    458 
    459   %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
    460   %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
    461   %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
    462 
    463   %val3 = call i64 @llvm.amdgcn.dispatch.id()
    464   call void asm sideeffect "; use $0", "s"(i64 %val3)
    465 
    466   %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
    467   call void asm sideeffect "; use $0", "s"(i32 %val4)
    468 
    469   %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
    470   call void asm sideeffect "; use $0", "s"(i32 %val5)
    471 
    472   %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
    473   call void asm sideeffect "; use $0", "s"(i32 %val6)
    474 
    475   ret void
    476 }
    477 
    478 ; GCN-LABEL: {{^}}kern_indirect_use_every_sgpr_input:
    479 ; GCN: enable_sgpr_workgroup_id_x = 1
    480 ; GCN: enable_sgpr_workgroup_id_y = 1
    481 ; GCN: enable_sgpr_workgroup_id_z = 1
    482 ; GCN: enable_sgpr_workgroup_info = 0
    483 
    484 ; GCN: enable_sgpr_private_segment_buffer = 1
    485 ; GCN: enable_sgpr_dispatch_ptr = 1
    486 ; GCN: enable_sgpr_queue_ptr = 1
    487 ; GCN: enable_sgpr_kernarg_segment_ptr = 1
    488 ; GCN: enable_sgpr_dispatch_id = 1
    489 ; GCN: enable_sgpr_flat_scratch_init = 1
    490 
    491 ; GCN: s_mov_b32 s33, s17
    492 ; GCN: s_mov_b64 s[12:13], s[10:11]
    493 ; GCN: s_mov_b64 s[10:11], s[8:9]
    494 ; GCN: s_mov_b64 s[8:9], s[6:7]
    495 ; GCN: s_mov_b64 s[6:7], s[4:5]
    496 ; GCN: s_mov_b32 s4, s33
    497 ; GCN: s_mov_b32 s32, s33
    498 ; GCN: s_swappc_b64
    499 define amdgpu_kernel void @kern_indirect_use_every_sgpr_input() #1 {
    500   call void @use_every_sgpr_input()
    501   ret void
    502 }
    503 
    504 ; GCN-LABEL: {{^}}func_indirect_use_every_sgpr_input:
    505 ; GCN-NOT: s6
    506 ; GCN-NOT: s7
    507 ; GCN-NOT: s8
    508 ; GCN-NOT: s9
    509 ; GCN-NOT: s10
    510 ; GCN-NOT: s11
    511 ; GCN-NOT: s12
    512 ; GCN-NOT: s13
    513 ; GCN-NOT: s[6:7]
    514 ; GCN-NOT: s[8:9]
    515 ; GCN-NOT: s[10:11]
    516 ; GCN-NOT: s[12:13]
    517 define void @func_indirect_use_every_sgpr_input() #1 {
    518   call void @use_every_sgpr_input()
    519   ret void
    520 }
    521 
    522 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz:
    523 ; GCN-DAG: s_mov_b32 s6, s14
    524 ; GCN-DAG: s_mov_b32 s7, s15
    525 ; GCN-DAG: s_mov_b32 s8, s16
    526 ; GCN: s_swappc_b64
    527 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz() #1 {
    528   %alloca = alloca i32, align 4, addrspace(5)
    529   store volatile i32 0, i32 addrspace(5)* %alloca
    530 
    531   %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
    532   %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
    533   %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
    534 
    535   %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
    536   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
    537   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
    538 
    539   %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
    540   %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
    541   %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
    542 
    543   %val3 = call i64 @llvm.amdgcn.dispatch.id()
    544   call void asm sideeffect "; use $0", "s"(i64 %val3)
    545 
    546   %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
    547   call void asm sideeffect "; use $0", "s"(i32 %val4)
    548 
    549   %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
    550   call void asm sideeffect "; use $0", "s"(i32 %val5)
    551 
    552   %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
    553   call void asm sideeffect "; use $0", "s"(i32 %val6)
    554 
    555   call void @use_workgroup_id_xyz()
    556   ret void
    557 }
    558 
    559 ; GCN-LABEL: {{^}}func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill:
    560 ; GCN: s_mov_b32 s5, s32
    561 ; GCN: s_add_u32 s32, s32, 0x400
    562 
    563 ; GCN-DAG: s_mov_b32 [[SAVE_X:s[0-57-9][0-9]*]], s14
    564 ; GCN-DAG: s_mov_b32 [[SAVE_Y:s[0-68-9][0-9]*]], s15
    565 ; GCN-DAG: s_mov_b32 [[SAVE_Z:s[0-79][0-9]*]], s16
    566 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[6:7]
    567 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[8:9]
    568 ; GCN-DAG: s_mov_b64 {{s\[[0-9]+:[0-9]+\]}}, s[10:11]
    569 
    570 ; GCN-DAG: s_mov_b32 s6, s14
    571 ; GCN-DAG: s_mov_b32 s7, s15
    572 ; GCN-DAG: s_mov_b32 s8, s16
    573 
    574 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_X:[0-9]+]]{{\:}}[[HI_X:[0-9]+]]{{\]}}, s[6:7]
    575 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Y:[0-9]+]]{{\:}}[[HI_Y:[0-9]+]]{{\]}}, s[8:9]
    576 ; GCN-DAG: s_mov_b64 s{{\[}}[[LO_Z:[0-9]+]]{{\:}}[[HI_Z:[0-9]+]]{{\]}}, s[10:11]
    577 
    578 ; GCN: s_swappc_b64
    579 
    580 ; GCN: buffer_store_dword v{{[0-9]+}}, off, s[0:3], s5 offset:4
    581 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_X]]
    582 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_X]]
    583 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    584 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Y]]
    585 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Y]]
    586 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    587 ; GCN: v_mov_b32_e32 v[[LO:[0-9]+]], s[[LO_Z]]
    588 ; GCN: v_mov_b32_e32 v[[HI:[0-9]+]], s[[HI_Z]]
    589 ; GCN: {{flat|global}}_load_dword v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    590 ; GCN: ; use
    591 ; GCN: ; use [[SAVE_X]]
    592 ; GCN: ; use [[SAVE_Y]]
    593 ; GCN: ; use [[SAVE_Z]]
    594 define void @func_use_every_sgpr_input_call_use_workgroup_id_xyz_spill() #1 {
    595   %alloca = alloca i32, align 4, addrspace(5)
    596   call void @use_workgroup_id_xyz()
    597 
    598   store volatile i32 0, i32 addrspace(5)* %alloca
    599 
    600   %dispatch_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
    601   %dispatch_ptr.bc = bitcast i8 addrspace(4)* %dispatch_ptr to i32 addrspace(4)*
    602   %val0 = load volatile i32, i32 addrspace(4)* %dispatch_ptr.bc
    603 
    604   %queue_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
    605   %queue_ptr.bc = bitcast i8 addrspace(4)* %queue_ptr to i32 addrspace(4)*
    606   %val1 = load volatile i32, i32 addrspace(4)* %queue_ptr.bc
    607 
    608   %kernarg_segment_ptr = call noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
    609   %kernarg_segment_ptr.bc = bitcast i8 addrspace(4)* %kernarg_segment_ptr to i32 addrspace(4)*
    610   %val2 = load volatile i32, i32 addrspace(4)* %kernarg_segment_ptr.bc
    611 
    612   %val3 = call i64 @llvm.amdgcn.dispatch.id()
    613   call void asm sideeffect "; use $0", "s"(i64 %val3)
    614 
    615   %val4 = call i32 @llvm.amdgcn.workgroup.id.x()
    616   call void asm sideeffect "; use $0", "s"(i32 %val4)
    617 
    618   %val5 = call i32 @llvm.amdgcn.workgroup.id.y()
    619   call void asm sideeffect "; use $0", "s"(i32 %val5)
    620 
    621   %val6 = call i32 @llvm.amdgcn.workgroup.id.z()
    622   call void asm sideeffect "; use $0", "s"(i32 %val6)
    623 
    624   ret void
    625 }
    626 
    627 declare i32 @llvm.amdgcn.workgroup.id.x() #0
    628 declare i32 @llvm.amdgcn.workgroup.id.y() #0
    629 declare i32 @llvm.amdgcn.workgroup.id.z() #0
    630 declare noalias i8 addrspace(4)* @llvm.amdgcn.queue.ptr() #0
    631 declare noalias i8 addrspace(4)* @llvm.amdgcn.kernarg.segment.ptr() #0
    632 declare i64 @llvm.amdgcn.dispatch.id() #0
    633 declare noalias i8 addrspace(4)* @llvm.amdgcn.dispatch.ptr() #0
    634 
    635 attributes #0 = { nounwind readnone speculatable }
    636 attributes #1 = { nounwind noinline }
    637