Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=kaveri -enable-ipra=0 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GCN %s
      2 
      3 ; GCN-LABEL: {{^}}use_workitem_id_x:
      4 ; GCN: s_waitcnt
      5 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
      6 ; GCN-NEXT: s_waitcnt
      7 ; GCN-NEXT: s_setpc_b64
      8 define void @use_workitem_id_x() #1 {
      9   %val = call i32 @llvm.amdgcn.workitem.id.x()
     10   store volatile i32 %val, i32 addrspace(1)* undef
     11   ret void
     12 }
     13 
     14 ; GCN-LABEL: {{^}}use_workitem_id_y:
     15 ; GCN: s_waitcnt
     16 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     17 ; GCN-NEXT: s_waitcnt
     18 ; GCN-NEXT: s_setpc_b64
     19 define void @use_workitem_id_y() #1 {
     20   %val = call i32 @llvm.amdgcn.workitem.id.y()
     21   store volatile i32 %val, i32 addrspace(1)* undef
     22   ret void
     23 }
     24 
     25 ; GCN-LABEL: {{^}}use_workitem_id_z:
     26 ; GCN: s_waitcnt
     27 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     28 ; GCN-NEXT: s_waitcnt
     29 ; GCN-NEXT: s_setpc_b64
     30 define void @use_workitem_id_z() #1 {
     31   %val = call i32 @llvm.amdgcn.workitem.id.z()
     32   store volatile i32 %val, i32 addrspace(1)* undef
     33   ret void
     34 }
     35 
     36 ; GCN-LABEL: {{^}}use_workitem_id_xy:
     37 ; GCN: s_waitcnt
     38 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     39 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
     40 ; GCN-NEXT: s_waitcnt
     41 ; GCN-NEXT: s_setpc_b64
     42 define void @use_workitem_id_xy() #1 {
     43   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
     44   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
     45   store volatile i32 %val0, i32 addrspace(1)* undef
     46   store volatile i32 %val1, i32 addrspace(1)* undef
     47   ret void
     48 }
     49 
     50 ; GCN-LABEL: {{^}}use_workitem_id_xyz:
     51 ; GCN: s_waitcnt
     52 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     53 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
     54 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v2
     55 ; GCN-NEXT: s_waitcnt
     56 ; GCN-NEXT: s_setpc_b64
     57 define void @use_workitem_id_xyz() #1 {
     58   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
     59   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
     60   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
     61   store volatile i32 %val0, i32 addrspace(1)* undef
     62   store volatile i32 %val1, i32 addrspace(1)* undef
     63   store volatile i32 %val2, i32 addrspace(1)* undef
     64   ret void
     65 }
     66 
     67 ; GCN-LABEL: {{^}}use_workitem_id_xz:
     68 ; GCN: s_waitcnt
     69 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     70 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
     71 ; GCN-NEXT: s_waitcnt
     72 ; GCN-NEXT: s_setpc_b64
     73 define void @use_workitem_id_xz() #1 {
     74   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
     75   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
     76   store volatile i32 %val0, i32 addrspace(1)* undef
     77   store volatile i32 %val1, i32 addrspace(1)* undef
     78   ret void
     79 }
     80 
     81 ; GCN-LABEL: {{^}}use_workitem_id_yz:
     82 ; GCN: s_waitcnt
     83 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v0
     84 ; GCN-NEXT: flat_store_dword v{{\[[0-9]:[0-9]+\]}}, v1
     85 ; GCN-NEXT: s_waitcnt
     86 ; GCN-NEXT: s_setpc_b64
     87 define void @use_workitem_id_yz() #1 {
     88   %val0 = call i32 @llvm.amdgcn.workitem.id.y()
     89   %val1 = call i32 @llvm.amdgcn.workitem.id.z()
     90   store volatile i32 %val0, i32 addrspace(1)* undef
     91   store volatile i32 %val1, i32 addrspace(1)* undef
     92   ret void
     93 }
     94 
     95 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_x:
     96 ; GCN: enable_vgpr_workitem_id = 0
     97 
     98 ; GCN-NOT: v0
     99 ; GCN: s_swappc_b64
    100 ; GCN-NOT: v0
    101 define amdgpu_kernel void @kern_indirect_use_workitem_id_x() #1 {
    102   call void @use_workitem_id_x()
    103   ret void
    104 }
    105 
    106 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_y:
    107 ; GCN: enable_vgpr_workitem_id = 1
    108 
    109 ; GCN-NOT: v0
    110 ; GCN-NOT: v1
    111 ; GCN: v_mov_b32_e32 v0, v1
    112 ; GCN-NOT: v0
    113 ; GCN-NOT: v1
    114 ; GCN: s_swappc_b64
    115 define amdgpu_kernel void @kern_indirect_use_workitem_id_y() #1 {
    116   call void @use_workitem_id_y()
    117   ret void
    118 }
    119 
    120 ; GCN-LABEL: {{^}}kern_indirect_use_workitem_id_z:
    121 ; GCN: enable_vgpr_workitem_id = 2
    122 
    123 ; GCN-NOT: v0
    124 ; GCN-NOT: v2
    125 ; GCN: v_mov_b32_e32 v0, v2
    126 ; GCN-NOT: v0
    127 ; GCN-NOT: v2
    128 ; GCN: s_swappc_b64
    129 define amdgpu_kernel void @kern_indirect_use_workitem_id_z() #1 {
    130   call void @use_workitem_id_z()
    131   ret void
    132 }
    133 
    134 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_x:
    135 ; GCN-NOT: v0
    136 ; GCN: s_swappc_b64
    137 ; GCN-NOT: v0
    138 define void @func_indirect_use_workitem_id_x() #1 {
    139   call void @use_workitem_id_x()
    140   ret void
    141 }
    142 
    143 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_y:
    144 ; GCN-NOT: v0
    145 ; GCN: s_swappc_b64
    146 ; GCN-NOT: v0
    147 define void @func_indirect_use_workitem_id_y() #1 {
    148   call void @use_workitem_id_y()
    149   ret void
    150 }
    151 
    152 ; GCN-LABEL: {{^}}func_indirect_use_workitem_id_z:
    153 ; GCN-NOT: v0
    154 ; GCN: s_swappc_b64
    155 ; GCN-NOT: v0
    156 define void @func_indirect_use_workitem_id_z() #1 {
    157   call void @use_workitem_id_z()
    158   ret void
    159 }
    160 
    161 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_x:
    162 ; GCN: s_waitcnt
    163 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    164 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
    165 define void @other_arg_use_workitem_id_x(i32 %arg0) #1 {
    166   %val = call i32 @llvm.amdgcn.workitem.id.x()
    167   store volatile i32 %arg0, i32 addrspace(1)* undef
    168   store volatile i32 %val, i32 addrspace(1)* undef
    169   ret void
    170 }
    171 
    172 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_y:
    173 ; GCN: s_waitcnt
    174 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    175 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
    176 define void @other_arg_use_workitem_id_y(i32 %arg0) #1 {
    177   %val = call i32 @llvm.amdgcn.workitem.id.y()
    178   store volatile i32 %arg0, i32 addrspace(1)* undef
    179   store volatile i32 %val, i32 addrspace(1)* undef
    180   ret void
    181 }
    182 
    183 ; GCN-LABEL: {{^}}other_arg_use_workitem_id_z:
    184 ; GCN: s_waitcnt
    185 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0
    186 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1
    187 define void @other_arg_use_workitem_id_z(i32 %arg0) #1 {
    188   %val = call i32 @llvm.amdgcn.workitem.id.z()
    189   store volatile i32 %arg0, i32 addrspace(1)* undef
    190   store volatile i32 %val, i32 addrspace(1)* undef
    191   ret void
    192 }
    193 
    194 
    195 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_x:
    196 ; GCN: enable_vgpr_workitem_id = 0
    197 
    198 ; GCN: v_mov_b32_e32 v1, v0
    199 ; GCN: v_mov_b32_e32 v0, 0x22b
    200 ; GCN: s_swappc_b64
    201 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_x() #1 {
    202   call void @other_arg_use_workitem_id_x(i32 555)
    203   ret void
    204 }
    205 
    206 
    207 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_y:
    208 ; GCN: enable_vgpr_workitem_id = 1
    209 
    210 ; GCN-NOT: v1
    211 ; GCN: v_mov_b32_e32 v0, 0x22b
    212 ; GCN-NOT: v1
    213 ; GCN: s_swappc_b64
    214 ; GCN-NOT: v0
    215 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_y() #1 {
    216   call void @other_arg_use_workitem_id_y(i32 555)
    217   ret void
    218 }
    219 
    220 ; GCN-LABEL: {{^}}kern_indirect_other_arg_use_workitem_id_z:
    221 ; GCN: enable_vgpr_workitem_id = 2
    222 
    223 ; GCN-DAG: v_mov_b32_e32 v0, 0x22b
    224 ; GCN-DAG: v_mov_b32_e32 v1, v2
    225 ; GCN: s_swappc_b64
    226 ; GCN-NOT: v0
    227 define amdgpu_kernel void @kern_indirect_other_arg_use_workitem_id_z() #1 {
    228   call void @other_arg_use_workitem_id_z(i32 555)
    229   ret void
    230 }
    231 
    232 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x:
    233 ; GCN: s_mov_b32 s5, s32
    234 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
    235 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
    236 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
    237 
    238 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
    239 ; GCN-NEXT: s_waitcnt
    240 ; GCN-NEXT: s_setpc_b64
    241 define void @too_many_args_use_workitem_id_x(
    242   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    243   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    244   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    245   i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
    246   %val = call i32 @llvm.amdgcn.workitem.id.x()
    247   store volatile i32 %val, i32 addrspace(1)* undef
    248 
    249   store volatile i32 %arg0, i32 addrspace(1)* undef
    250   store volatile i32 %arg1, i32 addrspace(1)* undef
    251   store volatile i32 %arg2, i32 addrspace(1)* undef
    252   store volatile i32 %arg3, i32 addrspace(1)* undef
    253   store volatile i32 %arg4, i32 addrspace(1)* undef
    254   store volatile i32 %arg5, i32 addrspace(1)* undef
    255   store volatile i32 %arg6, i32 addrspace(1)* undef
    256   store volatile i32 %arg7, i32 addrspace(1)* undef
    257 
    258   store volatile i32 %arg8, i32 addrspace(1)* undef
    259   store volatile i32 %arg9, i32 addrspace(1)* undef
    260   store volatile i32 %arg10, i32 addrspace(1)* undef
    261   store volatile i32 %arg11, i32 addrspace(1)* undef
    262   store volatile i32 %arg12, i32 addrspace(1)* undef
    263   store volatile i32 %arg13, i32 addrspace(1)* undef
    264   store volatile i32 %arg14, i32 addrspace(1)* undef
    265   store volatile i32 %arg15, i32 addrspace(1)* undef
    266 
    267   store volatile i32 %arg16, i32 addrspace(1)* undef
    268   store volatile i32 %arg17, i32 addrspace(1)* undef
    269   store volatile i32 %arg18, i32 addrspace(1)* undef
    270   store volatile i32 %arg19, i32 addrspace(1)* undef
    271   store volatile i32 %arg20, i32 addrspace(1)* undef
    272   store volatile i32 %arg21, i32 addrspace(1)* undef
    273   store volatile i32 %arg22, i32 addrspace(1)* undef
    274   store volatile i32 %arg23, i32 addrspace(1)* undef
    275 
    276   store volatile i32 %arg24, i32 addrspace(1)* undef
    277   store volatile i32 %arg25, i32 addrspace(1)* undef
    278   store volatile i32 %arg26, i32 addrspace(1)* undef
    279   store volatile i32 %arg27, i32 addrspace(1)* undef
    280   store volatile i32 %arg28, i32 addrspace(1)* undef
    281   store volatile i32 %arg29, i32 addrspace(1)* undef
    282   store volatile i32 %arg30, i32 addrspace(1)* undef
    283   store volatile i32 %arg31, i32 addrspace(1)* undef
    284 
    285   ret void
    286 }
    287 
    288 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x:
    289 ; GCN: enable_vgpr_workitem_id = 0
    290 
    291 ; GCN: s_mov_b32 s33, s7
    292 ; GCN: s_mov_b32 s32, s33
    293 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:8
    294 ; GCN: s_mov_b32 s4, s33
    295 ; GCN: s_swappc_b64
    296 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x() #1 {
    297   call void @too_many_args_use_workitem_id_x(
    298     i32 10, i32 20, i32 30, i32 40,
    299     i32 50, i32 60, i32 70, i32 80,
    300     i32 90, i32 100, i32 110, i32 120,
    301     i32 130, i32 140, i32 150, i32 160,
    302     i32 170, i32 180, i32 190, i32 200,
    303     i32 210, i32 220, i32 230, i32 240,
    304     i32 250, i32 260, i32 270, i32 280,
    305     i32 290, i32 300, i32 310, i32 320)
    306   ret void
    307 }
    308 
    309 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x:
    310 ; GCN: s_mov_b32 s5, s32
    311 ; GCN: buffer_store_dword v1, off, s[0:3], s32 offset:8
    312 ; GCN: s_swappc_b64
    313 define void @func_call_too_many_args_use_workitem_id_x(i32 %arg0) #1 {
    314   store volatile i32 %arg0, i32 addrspace(1)* undef
    315   call void @too_many_args_use_workitem_id_x(
    316     i32 10, i32 20, i32 30, i32 40,
    317     i32 50, i32 60, i32 70, i32 80,
    318     i32 90, i32 100, i32 110, i32 120,
    319     i32 130, i32 140, i32 150, i32 160,
    320     i32 170, i32 180, i32 190, i32 200,
    321     i32 210, i32 220, i32 230, i32 240,
    322     i32 250, i32 260, i32 270, i32 280,
    323     i32 290, i32 300, i32 310, i32 320)
    324   ret void
    325 }
    326 
    327 ; Requires loading and storing to stack slot.
    328 ; GCN-LABEL: {{^}}too_many_args_call_too_many_args_use_workitem_id_x:
    329 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Spill
    330 ; GCN: s_add_u32 s32, s32, 0x400{{$}}
    331 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
    332 
    333 ; GCN: buffer_store_dword v32, off, s[0:3], s32 offset:8{{$}}
    334 
    335 ; GCN: s_swappc_b64
    336 
    337 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8 ; 4-byte Folded Reload
    338 ; GCN: s_sub_u32 s32, s32, 0x400{{$}}
    339 ; GCN: s_setpc_b64
    340 define void @too_many_args_call_too_many_args_use_workitem_id_x(
    341   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    342   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    343   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    344   i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
    345   call void @too_many_args_use_workitem_id_x(
    346     i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    347     i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    348     i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    349     i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31)
    350   ret void
    351 }
    352 
    353 ; stack layout:
    354 ; frame[0] = emergency stack slot
    355 ; frame[1] = byval arg32
    356 ; frame[2] = stack passed workitem ID x
    357 ; frame[3] = VGPR spill slot
    358 
    359 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_byval:
    360 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Spill
    361 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
    362 ; GCN-NEXT: s_waitcnt
    363 ; GCN-NEXT: flat_store_dword v{{\[[0-9]+:[0-9]+\]}}, v32
    364 ; GCN: buffer_load_dword v0, off, s[0:3], s5 offset:4
    365 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12 ; 4-byte Folded Reload
    366 ; GCN: s_setpc_b64
    367 define void @too_many_args_use_workitem_id_x_byval(
    368   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    369   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    370   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    371   i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31, i32 addrspace(5)* byval %arg32) #1 {
    372   %val = call i32 @llvm.amdgcn.workitem.id.x()
    373   store volatile i32 %val, i32 addrspace(1)* undef
    374 
    375   store volatile i32 %arg0, i32 addrspace(1)* undef
    376   store volatile i32 %arg1, i32 addrspace(1)* undef
    377   store volatile i32 %arg2, i32 addrspace(1)* undef
    378   store volatile i32 %arg3, i32 addrspace(1)* undef
    379   store volatile i32 %arg4, i32 addrspace(1)* undef
    380   store volatile i32 %arg5, i32 addrspace(1)* undef
    381   store volatile i32 %arg6, i32 addrspace(1)* undef
    382   store volatile i32 %arg7, i32 addrspace(1)* undef
    383 
    384   store volatile i32 %arg8, i32 addrspace(1)* undef
    385   store volatile i32 %arg9, i32 addrspace(1)* undef
    386   store volatile i32 %arg10, i32 addrspace(1)* undef
    387   store volatile i32 %arg11, i32 addrspace(1)* undef
    388   store volatile i32 %arg12, i32 addrspace(1)* undef
    389   store volatile i32 %arg13, i32 addrspace(1)* undef
    390   store volatile i32 %arg14, i32 addrspace(1)* undef
    391   store volatile i32 %arg15, i32 addrspace(1)* undef
    392 
    393   store volatile i32 %arg16, i32 addrspace(1)* undef
    394   store volatile i32 %arg17, i32 addrspace(1)* undef
    395   store volatile i32 %arg18, i32 addrspace(1)* undef
    396   store volatile i32 %arg19, i32 addrspace(1)* undef
    397   store volatile i32 %arg20, i32 addrspace(1)* undef
    398   store volatile i32 %arg21, i32 addrspace(1)* undef
    399   store volatile i32 %arg22, i32 addrspace(1)* undef
    400   store volatile i32 %arg23, i32 addrspace(1)* undef
    401 
    402   store volatile i32 %arg24, i32 addrspace(1)* undef
    403   store volatile i32 %arg25, i32 addrspace(1)* undef
    404   store volatile i32 %arg26, i32 addrspace(1)* undef
    405   store volatile i32 %arg27, i32 addrspace(1)* undef
    406   store volatile i32 %arg28, i32 addrspace(1)* undef
    407   store volatile i32 %arg29, i32 addrspace(1)* undef
    408   store volatile i32 %arg30, i32 addrspace(1)* undef
    409   store volatile i32 %arg31, i32 addrspace(1)* undef
    410   %private = load volatile i32, i32 addrspace(5)* %arg32
    411   ret void
    412 }
    413 
    414 ; frame[0] = emergency stack slot
    415 ; frame[1] =
    416 
    417 ; sp[0] = callee emergency stack slot reservation
    418 ; sp[1] = byval
    419 ; sp[2] = ??
    420 ; sp[3] = stack passed workitem ID x
    421 
    422 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_byval:
    423 ; GCN: enable_vgpr_workitem_id = 0
    424 
    425 ; GCN: s_mov_b32 s33, s7
    426 ; GCN: s_add_u32 s32, s33, 0x400{{$}}
    427 
    428 ; GCN-NOT: s32
    429 ; GCN-DAG: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
    430 ; GCN: buffer_store_dword [[K]], off, s[0:3], s33 offset:4
    431 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
    432 
    433 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s33 offset:4
    434 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
    435 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
    436 ; GCN: s_swappc_b64
    437 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_byval() #1 {
    438   %alloca = alloca i32, align 4, addrspace(5)
    439   store volatile i32 999, i32 addrspace(5)* %alloca
    440   call void @too_many_args_use_workitem_id_x_byval(
    441     i32 10, i32 20, i32 30, i32 40,
    442     i32 50, i32 60, i32 70, i32 80,
    443     i32 90, i32 100, i32 110, i32 120,
    444     i32 130, i32 140, i32 150, i32 160,
    445     i32 170, i32 180, i32 190, i32 200,
    446     i32 210, i32 220, i32 230, i32 240,
    447     i32 250, i32 260, i32 270, i32 280,
    448     i32 290, i32 300, i32 310, i32 320,
    449     i32 addrspace(5)* %alloca)
    450   ret void
    451 }
    452 
    453 ; GCN-LABEL: {{^}}func_call_too_many_args_use_workitem_id_x_byval:
    454 ; GCN: v_mov_b32_e32 [[K:v[0-9]+]], 0x3e7{{$}}
    455 ; GCN: buffer_store_dword [[K]], off, s[0:3], s5 offset:4
    456 ; GCN: buffer_store_dword v0, off, s[0:3], s32 offset:12
    457 
    458 ; GCN: buffer_load_dword [[RELOAD_BYVAL:v[0-9]+]], off, s[0:3], s5 offset:4
    459 ; GCN: buffer_store_dword [[RELOAD_BYVAL]], off, s[0:3], s32 offset:4{{$}}
    460 ; GCN: v_mov_b32_e32 [[RELOAD_BYVAL]],
    461 ; GCN: s_swappc_b64
    462 define void @func_call_too_many_args_use_workitem_id_x_byval() #1 {
    463   %alloca = alloca i32, align 4, addrspace(5)
    464   store volatile i32 999, i32 addrspace(5)* %alloca
    465   call void @too_many_args_use_workitem_id_x_byval(
    466     i32 10, i32 20, i32 30, i32 40,
    467     i32 50, i32 60, i32 70, i32 80,
    468     i32 90, i32 100, i32 110, i32 120,
    469     i32 130, i32 140, i32 150, i32 160,
    470     i32 170, i32 180, i32 190, i32 200,
    471     i32 210, i32 220, i32 230, i32 240,
    472     i32 250, i32 260, i32 270, i32 280,
    473     i32 290, i32 300, i32 310, i32 320,
    474     i32 addrspace(5)* %alloca)
    475   ret void
    476 }
    477 
    478 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_xyz:
    479 ; GCN: s_mov_b32 s5, s32
    480 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Spill
    481 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4{{$}}
    482 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
    483 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8{{$}}
    484 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
    485 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:12{{$}}
    486 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v32
    487 
    488 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:16 ; 4-byte Folded Reload
    489 ; GCN-NEXT: s_waitcnt
    490 ; GCN-NEXT: s_setpc_b64
    491 define void @too_many_args_use_workitem_id_xyz(
    492   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    493   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    494   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    495   i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30, i32 %arg31) #1 {
    496   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
    497   store volatile i32 %val0, i32 addrspace(1)* undef
    498   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
    499   store volatile i32 %val1, i32 addrspace(1)* undef
    500   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
    501   store volatile i32 %val2, i32 addrspace(1)* undef
    502 
    503   store volatile i32 %arg0, i32 addrspace(1)* undef
    504   store volatile i32 %arg1, i32 addrspace(1)* undef
    505   store volatile i32 %arg2, i32 addrspace(1)* undef
    506   store volatile i32 %arg3, i32 addrspace(1)* undef
    507   store volatile i32 %arg4, i32 addrspace(1)* undef
    508   store volatile i32 %arg5, i32 addrspace(1)* undef
    509   store volatile i32 %arg6, i32 addrspace(1)* undef
    510   store volatile i32 %arg7, i32 addrspace(1)* undef
    511 
    512   store volatile i32 %arg8, i32 addrspace(1)* undef
    513   store volatile i32 %arg9, i32 addrspace(1)* undef
    514   store volatile i32 %arg10, i32 addrspace(1)* undef
    515   store volatile i32 %arg11, i32 addrspace(1)* undef
    516   store volatile i32 %arg12, i32 addrspace(1)* undef
    517   store volatile i32 %arg13, i32 addrspace(1)* undef
    518   store volatile i32 %arg14, i32 addrspace(1)* undef
    519   store volatile i32 %arg15, i32 addrspace(1)* undef
    520 
    521   store volatile i32 %arg16, i32 addrspace(1)* undef
    522   store volatile i32 %arg17, i32 addrspace(1)* undef
    523   store volatile i32 %arg18, i32 addrspace(1)* undef
    524   store volatile i32 %arg19, i32 addrspace(1)* undef
    525   store volatile i32 %arg20, i32 addrspace(1)* undef
    526   store volatile i32 %arg21, i32 addrspace(1)* undef
    527   store volatile i32 %arg22, i32 addrspace(1)* undef
    528   store volatile i32 %arg23, i32 addrspace(1)* undef
    529 
    530   store volatile i32 %arg24, i32 addrspace(1)* undef
    531   store volatile i32 %arg25, i32 addrspace(1)* undef
    532   store volatile i32 %arg26, i32 addrspace(1)* undef
    533   store volatile i32 %arg27, i32 addrspace(1)* undef
    534   store volatile i32 %arg28, i32 addrspace(1)* undef
    535   store volatile i32 %arg29, i32 addrspace(1)* undef
    536   store volatile i32 %arg30, i32 addrspace(1)* undef
    537   store volatile i32 %arg31, i32 addrspace(1)* undef
    538 
    539   ret void
    540 }
    541 
    542 ; frame[0] = kernel emergency stack slot
    543 ; frame[1] = callee emergency stack slot
    544 ; frame[2] = ID X
    545 ; frame[3] = ID Y
    546 ; frame[4] = ID Z
    547 
    548 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_xyz:
    549 ; GCN: enable_vgpr_workitem_id = 2
    550 
    551 ; GCN: s_mov_b32 s33, s7
    552 ; GCN: s_mov_b32 s32, s33
    553 
    554 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s32 offset:8
    555 ; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:12
    556 ; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:16
    557 ; GCN: s_swappc_b64
    558 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_xyz() #1 {
    559   call void @too_many_args_use_workitem_id_xyz(
    560     i32 10, i32 20, i32 30, i32 40,
    561     i32 50, i32 60, i32 70, i32 80,
    562     i32 90, i32 100, i32 110, i32 120,
    563     i32 130, i32 140, i32 150, i32 160,
    564     i32 170, i32 180, i32 190, i32 200,
    565     i32 210, i32 220, i32 230, i32 240,
    566     i32 250, i32 260, i32 270, i32 280,
    567     i32 290, i32 300, i32 310, i32 320)
    568   ret void
    569 }
    570 
    571 ; workitem ID X in register, yz on stack
    572 ; v31 = workitem ID X
    573 ; frame[0] = emergency slot
    574 ; frame[1] = workitem Y
    575 ; frame[2] = workitem Z
    576 
    577 ; GCN-LABEL: {{^}}too_many_args_use_workitem_id_x_stack_yz:
    578 ; GCN: s_mov_b32 s5, s32
    579 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
    580 ; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:4{{$}}
    581 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
    582 ; GCN: buffer_load_dword v31, off, s[0:3], s5 offset:8{{$}}
    583 ; GCN: flat_store_dword v{{\[[0-9]+:[0-9]+]}}, v31
    584 
    585 ; GCN: s_waitcnt
    586 ; GCN-NEXT: s_setpc_b64
    587 ; GCN: ScratchSize: 12
    588 define void @too_many_args_use_workitem_id_x_stack_yz(
    589   i32 %arg0, i32 %arg1, i32 %arg2, i32 %arg3, i32 %arg4, i32 %arg5, i32 %arg6, i32 %arg7,
    590   i32 %arg8, i32 %arg9, i32 %arg10, i32 %arg11, i32 %arg12, i32 %arg13, i32 %arg14, i32 %arg15,
    591   i32 %arg16, i32 %arg17, i32 %arg18, i32 %arg19, i32 %arg20, i32 %arg21, i32 %arg22, i32 %arg23,
    592   i32 %arg24, i32 %arg25, i32 %arg26, i32 %arg27, i32 %arg28, i32 %arg29, i32 %arg30) #1 {
    593   %val0 = call i32 @llvm.amdgcn.workitem.id.x()
    594   store volatile i32 %val0, i32 addrspace(1)* undef
    595   %val1 = call i32 @llvm.amdgcn.workitem.id.y()
    596   store volatile i32 %val1, i32 addrspace(1)* undef
    597   %val2 = call i32 @llvm.amdgcn.workitem.id.z()
    598   store volatile i32 %val2, i32 addrspace(1)* undef
    599 
    600   store volatile i32 %arg0, i32 addrspace(1)* undef
    601   store volatile i32 %arg1, i32 addrspace(1)* undef
    602   store volatile i32 %arg2, i32 addrspace(1)* undef
    603   store volatile i32 %arg3, i32 addrspace(1)* undef
    604   store volatile i32 %arg4, i32 addrspace(1)* undef
    605   store volatile i32 %arg5, i32 addrspace(1)* undef
    606   store volatile i32 %arg6, i32 addrspace(1)* undef
    607   store volatile i32 %arg7, i32 addrspace(1)* undef
    608 
    609   store volatile i32 %arg8, i32 addrspace(1)* undef
    610   store volatile i32 %arg9, i32 addrspace(1)* undef
    611   store volatile i32 %arg10, i32 addrspace(1)* undef
    612   store volatile i32 %arg11, i32 addrspace(1)* undef
    613   store volatile i32 %arg12, i32 addrspace(1)* undef
    614   store volatile i32 %arg13, i32 addrspace(1)* undef
    615   store volatile i32 %arg14, i32 addrspace(1)* undef
    616   store volatile i32 %arg15, i32 addrspace(1)* undef
    617 
    618   store volatile i32 %arg16, i32 addrspace(1)* undef
    619   store volatile i32 %arg17, i32 addrspace(1)* undef
    620   store volatile i32 %arg18, i32 addrspace(1)* undef
    621   store volatile i32 %arg19, i32 addrspace(1)* undef
    622   store volatile i32 %arg20, i32 addrspace(1)* undef
    623   store volatile i32 %arg21, i32 addrspace(1)* undef
    624   store volatile i32 %arg22, i32 addrspace(1)* undef
    625   store volatile i32 %arg23, i32 addrspace(1)* undef
    626 
    627   store volatile i32 %arg24, i32 addrspace(1)* undef
    628   store volatile i32 %arg25, i32 addrspace(1)* undef
    629   store volatile i32 %arg26, i32 addrspace(1)* undef
    630   store volatile i32 %arg27, i32 addrspace(1)* undef
    631   store volatile i32 %arg28, i32 addrspace(1)* undef
    632   store volatile i32 %arg29, i32 addrspace(1)* undef
    633   store volatile i32 %arg30, i32 addrspace(1)* undef
    634 
    635   ret void
    636 }
    637 
    638 ; frame[0] = kernel emergency stack slot
    639 ; frame[1] = callee emergency stack slot
    640 ; frame[2] = ID Y
    641 ; frame[3] = ID Z
    642 
    643 ; GCN-LABEL: {{^}}kern_call_too_many_args_use_workitem_id_x_stack_yz:
    644 ; GCN: enable_vgpr_workitem_id = 2
    645 
    646 ; GCN: s_mov_b32 s33, s7
    647 ; GCN: s_mov_b32 s32, s33
    648 
    649 ; GCN-DAG: v_mov_b32_e32 v31, v0
    650 ; GCN-DAG: buffer_store_dword v1, off, s[0:3], s32 offset:8
    651 ; GCN-DAG: buffer_store_dword v2, off, s[0:3], s32 offset:12
    652 ; GCN: s_swappc_b64
    653 define amdgpu_kernel void @kern_call_too_many_args_use_workitem_id_x_stack_yz() #1 {
    654   call void @too_many_args_use_workitem_id_x_stack_yz(
    655     i32 10, i32 20, i32 30, i32 40,
    656     i32 50, i32 60, i32 70, i32 80,
    657     i32 90, i32 100, i32 110, i32 120,
    658     i32 130, i32 140, i32 150, i32 160,
    659     i32 170, i32 180, i32 190, i32 200,
    660     i32 210, i32 220, i32 230, i32 240,
    661     i32 250, i32 260, i32 270, i32 280,
    662     i32 290, i32 300, i32 310)
    663   ret void
    664 }
    665 
    666 declare i32 @llvm.amdgcn.workitem.id.x() #0
    667 declare i32 @llvm.amdgcn.workitem.id.y() #0
    668 declare i32 @llvm.amdgcn.workitem.id.z() #0
    669 
    670 attributes #0 = { nounwind readnone speculatable }
    671 attributes #1 = { nounwind noinline }
    672