Home | History | Annotate | Download | only in AMDGPU
      1 # RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts  %s -o - | FileCheck -check-prefix=GFX89 %s
      2 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts  %s -o - | FileCheck -check-prefix=GFX89 %s
      3 
      4 --- |
      5   define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4,
      6                                  <4 x i32> addrspace(1)* %global16,
      7                                  i32* %flat4,
      8                                  <4 x i32>* %flat16) {
      9     ret void
     10   }
     11 
     12   define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() {
     13     ret void
     14   }
     15 
     16   define amdgpu_kernel void @single_branch_successor_not_next_block() {
     17     ret void
     18   }
     19 
     20 ...
     21 ---
     22 
     23 # CHECK-LABEL: name: flat_zero_waitcnt
     24 
     25 # CHECK-LABEL: bb.0:
     26 # CHECK: FLAT_LOAD_DWORD
     27 # CHECK: FLAT_LOAD_DWORDX4
     28 # Global loads will return in order so we should:
     29 # s_waitcnt vmcnt(1) lgkmcnt(1)
     30 # CHECK-NEXT: S_WAITCNT 369
     31 
     32 # CHECK-LABEL: bb.1:
     33 # CHECK: FLAT_LOAD_DWORD
     34 # GFX89: S_WAITCNT 112
     35 # CHECK: FLAT_LOAD_DWORDX4
     36 
     37 # CHECK-LABEL: bb.2:
     38 # CHECK: FLAT_LOAD_DWORD
     39 # GFX89: S_WAITCNT 112
     40 # CHECK: FLAT_LOAD_DWORDX4
     41 
     42 name: flat_zero_waitcnt
     43 
     44 body: |
     45   bb.0:
     46     successors: %bb.1
     47     $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4)
     48     $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
     49     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     50     S_BRANCH %bb.1
     51 
     52   bb.1:
     53     successors: %bb.2
     54     $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
     55     $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16)
     56     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     57     S_BRANCH %bb.2
     58 
     59   bb.2:
     60     $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4)
     61     $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16)
     62     $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec
     63     S_ENDPGM
     64 ...
     65 ---
     66 # There is only a single fallthrough successor block, so there's no
     67 # need to wait immediately.
     68 
     69 # CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait
     70 # CHECK:   $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2
     71 # CHECK-NOT: S_WAITCNT
     72 
     73 # CHECK: bb.1:
     74 # CHECK-NEXT: V_LSHLREV_B64
     75 # CHECK-NEXT: S_WAITCNT 112
     76 # CHECK-NEXT: FLAT_STORE_DWORD
     77 name: single_fallthrough_successor_no_end_block_wait
     78 
     79 body: |
     80   bb.0:
     81     successors: %bb.1
     82     $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
     83 
     84   bb.1:
     85     $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
     86     FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
     87     S_ENDPGM
     88 ...
     89 ---
     90 # The block has a single predecessor with a single successor, but it
     91 # is not the next block so it's non-obvious that the wait is not needed.
     92 
     93 
     94 # CHECK-LABEL: name: single_branch_successor_not_next_block
     95 
     96 # CHECK: bb.1
     97 # CHECK-NEXT: FLAT_STORE_DWORD
     98 # CHECK-NEXT: S_ENDPGM
     99 
    100 # CHECK: bb.2:
    101 # CHECK-NEXT: V_LSHLREV_B64
    102 # CHECK-NEXT: S_WAITCNT 112
    103 # CHECK-NEXT: FLAT_STORE_DWORD
    104 name: single_branch_successor_not_next_block
    105 
    106 body: |
    107   bb.0:
    108     successors: %bb.2
    109     $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr
    110    S_BRANCH %bb.2
    111 
    112   bb.1:
    113     FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr
    114     S_ENDPGM
    115 
    116   bb.2:
    117      $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec
    118     FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr
    119     S_ENDPGM
    120 ...
    121