Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=hawaii -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefix=GCN -check-prefix=CI %s
      2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck  -enable-var-scope -check-prefix=GCN -check-prefix=GFX9 %s
      3 
      4 ; GCN-LABEL: {{^}}callee_no_stack:
      5 ; GCN: ; %bb.0:
      6 ; GCN-NEXT: s_waitcnt
      7 ; GCN-NEXT: s_setpc_b64
      8 define void @callee_no_stack() #0 {
      9   ret void
     10 }
     11 
     12 ; GCN-LABEL: {{^}}callee_no_stack_no_fp_elim:
     13 ; GCN: ; %bb.0:
     14 ; GCN-NEXT: s_waitcnt
     15 ; GCN-NEXT: s_setpc_b64
     16 define void @callee_no_stack_no_fp_elim() #1 {
     17   ret void
     18 }
     19 
     20 ; Requires frame pointer for access to local regular object.
     21 
     22 ; GCN-LABEL: {{^}}callee_with_stack:
     23 ; GCN: ; %bb.0:
     24 ; GCN-NEXT: s_waitcnt
     25 ; GCN-NEXT: s_mov_b32 s5, s32
     26 ; GCN-NEXT: v_mov_b32_e32 v0, 0{{$}}
     27 ; GCN-NEXT: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
     28 ; GCN-NEXT: s_waitcnt
     29 ; GCN-NEXT: s_setpc_b64
     30 define void @callee_with_stack() #0 {
     31   %alloca = alloca i32, addrspace(5)
     32   store volatile i32 0, i32 addrspace(5)* %alloca
     33   ret void
     34 }
     35 
     36 ; GCN-LABEL: {{^}}callee_with_stack_and_call:
     37 ; GCN: ; %bb.0:
     38 ; GCN-NEXT: s_waitcnt
     39 ; GCN: s_mov_b32 s5, s32
     40 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:8
     41 
     42 ; GCN-DAG: v_writelane_b32 v32, s33,
     43 ; GCN-DAG: v_writelane_b32 v32, s34,
     44 ; GCN-DAG: v_writelane_b32 v32, s35,
     45 ; GCN-DAG: s_add_u32 s32, s32, 0x400{{$}}
     46 ; GCN-DAG: v_mov_b32_e32 v0, 0{{$}}
     47 ; GCN-DAG: buffer_store_dword v0, off, s[0:3], s5 offset:4{{$}}
     48 ; GCN-DAG: s_mov_b32 s33, s5
     49 
     50 
     51 ; GCN: s_swappc_b64
     52 ; GCN: s_mov_b32 s5, s33
     53 ; GCN-DAG: v_readlane_b32 s35,
     54 ; GCN-DAG: v_readlane_b32 s34,
     55 ; GCN-DAG: v_readlane_b32 s33,
     56 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:8
     57 ; GCN: s_waitcnt
     58 ; GCN-NEXT: s_setpc_b64
     59 define void @callee_with_stack_and_call() #0 {
     60   %alloca = alloca i32, addrspace(5)
     61   store volatile i32 0, i32 addrspace(5)* %alloca
     62   call void @external_void_func_void()
     63   ret void
     64 }
     65 
     66 ; Should be able to copy incoming stack pointer directly to inner
     67 ; call's stack pointer argument.
     68 
     69 ; There is stack usage only because of the need to evict a VGPR for
     70 ; spilling CSR SGPRs.
     71 
     72 ; GCN-LABEL: {{^}}callee_no_stack_with_call:
     73 ; GCN: s_waitcnt
     74 ; GCN: s_mov_b32 s5, s32
     75 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4
     76 ; GCN-DAG: v_writelane_b32 v32, s33, 0
     77 ; GCN-DAG: v_writelane_b32 v32, s34, 1
     78 ; GCN: s_mov_b32 s33, s5
     79 ; GCN: s_swappc_b64
     80 ; GCN: s_mov_b32 s5, s33
     81 
     82 ; GCN-DAG: v_readlane_b32 s34, v32, 1
     83 ; GCN-DAG: v_readlane_b32 s33, v32, 0
     84 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4
     85 ; GCN: s_sub_u32 s32, s32, 0x400
     86 
     87 ; GCN: s_setpc_b64
     88 define void @callee_no_stack_with_call() #0 {
     89   call void @external_void_func_void()
     90   ret void
     91 }
     92 
     93 declare void @external_void_func_void() #0
     94 
     95 ; Make sure if a CSR vgpr is used for SGPR spilling, it is saved and restored
     96 ; GCN-LABEL: {{^}}callee_func_sgpr_spill_no_calls:
     97 ; GCN: buffer_store_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Spill
     98 ; GCN: v_writelane_b32 v32
     99 ; GCN: ;;#ASMSTART
    100 ; GCN: v_readlane_b32 s{{[0-9]+}}, v32
    101 ; GCN: buffer_load_dword v32, off, s[0:3], s5 offset:4 ; 4-byte Folded Reload
    102 ; GCN-NEXT: s_waitcnt
    103 ; GCN-NEXT: s_setpc_b64
    104 define void @callee_func_sgpr_spill_no_calls(i32 %in) #0 {
    105   call void asm sideeffect "", "~{v0},~{v1},~{v2},~{v3},~{v4},~{v5},~{v6},~{v7}"() #0
    106   call void asm sideeffect "", "~{v8},~{v9},~{v10},~{v11},~{v12},~{v13},~{v14},~{v15}"() #0
    107   call void asm sideeffect "", "~{v16},~{v17},~{v18},~{v19},~{v20},~{v21},~{v22},~{v23}"() #0
    108   call void asm sideeffect "", "~{v24},~{v25},~{v26},~{v27},~{v28},~{v29},~{v30},~{v31}"() #0
    109 
    110   %wide.sgpr0 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
    111   %wide.sgpr1 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
    112   %wide.sgpr2 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
    113   %wide.sgpr5 = call <16 x i32> asm sideeffect "; def $0", "=s" () #0
    114   %wide.sgpr3 = call <8 x i32> asm sideeffect "; def $0", "=s" () #0
    115   %wide.sgpr4 = call <2 x i32> asm sideeffect "; def $0", "=s" () #0
    116 
    117   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr0) #0
    118   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr1) #0
    119   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr2) #0
    120   call void asm sideeffect "; use $0", "s"(<8 x i32> %wide.sgpr3) #0
    121   call void asm sideeffect "; use $0", "s"(<2 x i32> %wide.sgpr4) #0
    122   call void asm sideeffect "; use $0", "s"(<16 x i32> %wide.sgpr5) #0
    123   ret void
    124 }
    125 
    126 attributes #0 = { nounwind }
    127 attributes #1 = { nounwind "no-frame-pointer-elim"="true" }
    128