1 # RUN: llc -march=amdgcn -mcpu=gfx803 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s 2 # RUN: llc -march=amdgcn -mcpu=gfx900 -run-pass si-insert-waitcnts %s -o - | FileCheck -check-prefix=GFX89 %s 3 4 --- | 5 define amdgpu_kernel void @flat_zero_waitcnt(i32 addrspace(1)* %global4, 6 <4 x i32> addrspace(1)* %global16, 7 i32* %flat4, 8 <4 x i32>* %flat16) { 9 ret void 10 } 11 12 define amdgpu_kernel void @single_fallthrough_successor_no_end_block_wait() { 13 ret void 14 } 15 16 define amdgpu_kernel void @single_branch_successor_not_next_block() { 17 ret void 18 } 19 20 ... 21 --- 22 23 # CHECK-LABEL: name: flat_zero_waitcnt 24 25 # CHECK-LABEL: bb.0: 26 # CHECK: FLAT_LOAD_DWORD 27 # CHECK: FLAT_LOAD_DWORDX4 28 # Global loads will return in order so we should: 29 # s_waitcnt vmcnt(1) lgkmcnt(1) 30 # CHECK-NEXT: S_WAITCNT 369 31 32 # CHECK-LABEL: bb.1: 33 # CHECK: FLAT_LOAD_DWORD 34 # GFX89: S_WAITCNT 112 35 # CHECK: FLAT_LOAD_DWORDX4 36 37 # CHECK-LABEL: bb.2: 38 # CHECK: FLAT_LOAD_DWORD 39 # GFX89: S_WAITCNT 112 40 # CHECK: FLAT_LOAD_DWORDX4 41 42 name: flat_zero_waitcnt 43 44 body: | 45 bb.0: 46 successors: %bb.1 47 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.global4) 48 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 49 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 50 S_BRANCH %bb.1 51 52 bb.1: 53 successors: %bb.2 54 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 55 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.global16) 56 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 57 S_BRANCH %bb.2 58 59 bb.2: 60 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 4 from %ir.flat4) 61 $vgpr3_vgpr4_vgpr5_vgpr6 = FLAT_LOAD_DWORDX4 $vgpr7_vgpr8, 0, 0, 0, implicit $exec, implicit $flat_scr :: (load 16 from %ir.flat16) 62 $vgpr0 = V_MOV_B32_e32 $vgpr1, implicit $exec 63 S_ENDPGM 64 ... 65 --- 66 # There is only a single fallthrough successor block, so there's no 67 # need to wait immediately. 68 69 # CHECK-LABEL: name: single_fallthrough_successor_no_end_block_wait 70 # CHECK: $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2 71 # CHECK-NOT: S_WAITCNT 72 73 # CHECK: bb.1: 74 # CHECK-NEXT: V_LSHLREV_B64 75 # CHECK-NEXT: S_WAITCNT 112 76 # CHECK-NEXT: FLAT_STORE_DWORD 77 name: single_fallthrough_successor_no_end_block_wait 78 79 body: | 80 bb.0: 81 successors: %bb.1 82 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 83 84 bb.1: 85 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 86 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 87 S_ENDPGM 88 ... 89 --- 90 # The block has a single predecessor with a single successor, but it 91 # is not the next block so it's non-obvious that the wait is not needed. 92 93 94 # CHECK-LABEL: name: single_branch_successor_not_next_block 95 96 # CHECK: bb.1 97 # CHECK-NEXT: FLAT_STORE_DWORD 98 # CHECK-NEXT: S_ENDPGM 99 100 # CHECK: bb.2: 101 # CHECK-NEXT: V_LSHLREV_B64 102 # CHECK-NEXT: S_WAITCNT 112 103 # CHECK-NEXT: FLAT_STORE_DWORD 104 name: single_branch_successor_not_next_block 105 106 body: | 107 bb.0: 108 successors: %bb.2 109 $vgpr0 = FLAT_LOAD_DWORD $vgpr1_vgpr2, 0, 0, 0, implicit $exec, implicit $flat_scr 110 S_BRANCH %bb.2 111 112 bb.1: 113 FLAT_STORE_DWORD $vgpr8_vgpr9, $vgpr10, 0, 0, 0, implicit $exec, implicit $flat_scr 114 S_ENDPGM 115 116 bb.2: 117 $vgpr3_vgpr4 = V_LSHLREV_B64 4, $vgpr7_vgpr8, implicit $exec 118 FLAT_STORE_DWORD $vgpr3_vgpr4, $vgpr0, 0, 0, 0, implicit $exec, implicit $flat_scr 119 S_ENDPGM 120 ... 121