1 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=fiji -verify-machineinstrs < %s | FileCheck %s 2 3 ; Exactly 1 wave per execution unit. 4 ; CHECK-LABEL: {{^}}empty_exactly_1: 5 ; CHECK: SGPRBlocks: 12 6 ; CHECK: VGPRBlocks: 32 7 ; CHECK: NumSGPRsForWavesPerEU: 102 8 ; CHECK: NumVGPRsForWavesPerEU: 129 9 define amdgpu_kernel void @empty_exactly_1() #0 { 10 entry: 11 ret void 12 } 13 attributes #0 = {"amdgpu-waves-per-eu"="1,1"} 14 15 ; Exactly 5 waves per execution unit. 16 ; CHECK-LABEL: {{^}}empty_exactly_5: 17 ; CHECK: SGPRBlocks: 12 18 ; CHECK: VGPRBlocks: 10 19 ; CHECK: NumSGPRsForWavesPerEU: 102 20 ; CHECK: NumVGPRsForWavesPerEU: 41 21 define amdgpu_kernel void @empty_exactly_5() #1 { 22 entry: 23 ret void 24 } 25 attributes #1 = {"amdgpu-waves-per-eu"="5,5"} 26 27 ; Exactly 10 waves per execution unit. 28 ; CHECK-LABEL: {{^}}empty_exactly_10: 29 ; CHECK: SGPRBlocks: 0 30 ; CHECK: VGPRBlocks: 0 31 ; CHECK: NumSGPRsForWavesPerEU: 1 32 ; CHECK: NumVGPRsForWavesPerEU: 1 33 define amdgpu_kernel void @empty_exactly_10() #2 { 34 entry: 35 ret void 36 } 37 attributes #2 = {"amdgpu-waves-per-eu"="10,10"} 38 39 ; At least 1 wave per execution unit. 40 ; CHECK-LABEL: {{^}}empty_at_least_1: 41 ; CHECK: SGPRBlocks: 0 42 ; CHECK: VGPRBlocks: 0 43 ; CHECK: NumSGPRsForWavesPerEU: 1 44 ; CHECK: NumVGPRsForWavesPerEU: 1 45 define amdgpu_kernel void @empty_at_least_1() #3 { 46 entry: 47 ret void 48 } 49 attributes #3 = {"amdgpu-waves-per-eu"="1"} 50 51 ; At least 5 waves per execution unit. 52 ; CHECK-LABEL: {{^}}empty_at_least_5: 53 ; CHECK: SGPRBlocks: 0 54 ; CHECK: VGPRBlocks: 0 55 ; CHECK: NumSGPRsForWavesPerEU: 1 56 ; CHECK: NumVGPRsForWavesPerEU: 1 57 define amdgpu_kernel void @empty_at_least_5() #4 { 58 entry: 59 ret void 60 } 61 attributes #4 = {"amdgpu-waves-per-eu"="5"} 62 63 ; At least 10 waves per execution unit. 64 ; CHECK-LABEL: {{^}}empty_at_least_10: 65 ; CHECK: SGPRBlocks: 0 66 ; CHECK: VGPRBlocks: 0 67 ; CHECK: NumSGPRsForWavesPerEU: 1 68 ; CHECK: NumVGPRsForWavesPerEU: 1 69 define amdgpu_kernel void @empty_at_least_10() #5 { 70 entry: 71 ret void 72 } 73 attributes #5 = {"amdgpu-waves-per-eu"="10"} 74 75 ; At most 1 wave per execution unit (same as @empty_exactly_1). 76 77 ; At most 5 waves per execution unit. 78 ; CHECK-LABEL: {{^}}empty_at_most_5: 79 ; CHECK: SGPRBlocks: 12 80 ; CHECK: VGPRBlocks: 10 81 ; CHECK: NumSGPRsForWavesPerEU: 102 82 ; CHECK: NumVGPRsForWavesPerEU: 41 83 define amdgpu_kernel void @empty_at_most_5() #6 { 84 entry: 85 ret void 86 } 87 attributes #6 = {"amdgpu-waves-per-eu"="1,5"} 88 89 ; At most 10 waves per execution unit. 90 ; CHECK-LABEL: {{^}}empty_at_most_10: 91 ; CHECK: SGPRBlocks: 0 92 ; CHECK: VGPRBlocks: 0 93 ; CHECK: NumSGPRsForWavesPerEU: 1 94 ; CHECK: NumVGPRsForWavesPerEU: 1 95 define amdgpu_kernel void @empty_at_most_10() #7 { 96 entry: 97 ret void 98 } 99 attributes #7 = {"amdgpu-waves-per-eu"="1,10"} 100 101 ; Between 1 and 5 waves per execution unit (same as @empty_at_most_5). 102 103 ; Between 5 and 10 waves per execution unit. 104 ; CHECK-LABEL: {{^}}empty_between_5_and_10: 105 ; CHECK: SGPRBlocks: 0 106 ; CHECK: VGPRBlocks: 0 107 ; CHECK: NumSGPRsForWavesPerEU: 1 108 ; CHECK: NumVGPRsForWavesPerEU: 1 109 define amdgpu_kernel void @empty_between_5_and_10() #8 { 110 entry: 111 ret void 112 } 113 attributes #8 = {"amdgpu-waves-per-eu"="5,10"} 114 115 @var = addrspace(1) global float 0.0 116 117 ; Exactly 10 waves per execution unit. 118 ; CHECK-LABEL: {{^}}exactly_10: 119 ; CHECK: SGPRBlocks: 1 120 ; CHECK: VGPRBlocks: 5 121 ; CHECK: NumSGPRsForWavesPerEU: 12 122 ; CHECK: NumVGPRsForWavesPerEU: 24 123 define amdgpu_kernel void @exactly_10() #9 { 124 %val0 = load volatile float, float addrspace(1)* @var 125 %val1 = load volatile float, float addrspace(1)* @var 126 %val2 = load volatile float, float addrspace(1)* @var 127 %val3 = load volatile float, float addrspace(1)* @var 128 %val4 = load volatile float, float addrspace(1)* @var 129 %val5 = load volatile float, float addrspace(1)* @var 130 %val6 = load volatile float, float addrspace(1)* @var 131 %val7 = load volatile float, float addrspace(1)* @var 132 %val8 = load volatile float, float addrspace(1)* @var 133 %val9 = load volatile float, float addrspace(1)* @var 134 %val10 = load volatile float, float addrspace(1)* @var 135 %val11 = load volatile float, float addrspace(1)* @var 136 %val12 = load volatile float, float addrspace(1)* @var 137 %val13 = load volatile float, float addrspace(1)* @var 138 %val14 = load volatile float, float addrspace(1)* @var 139 %val15 = load volatile float, float addrspace(1)* @var 140 %val16 = load volatile float, float addrspace(1)* @var 141 %val17 = load volatile float, float addrspace(1)* @var 142 %val18 = load volatile float, float addrspace(1)* @var 143 %val19 = load volatile float, float addrspace(1)* @var 144 %val20 = load volatile float, float addrspace(1)* @var 145 %val21 = load volatile float, float addrspace(1)* @var 146 %val22 = load volatile float, float addrspace(1)* @var 147 %val23 = load volatile float, float addrspace(1)* @var 148 %val24 = load volatile float, float addrspace(1)* @var 149 %val25 = load volatile float, float addrspace(1)* @var 150 %val26 = load volatile float, float addrspace(1)* @var 151 %val27 = load volatile float, float addrspace(1)* @var 152 %val28 = load volatile float, float addrspace(1)* @var 153 %val29 = load volatile float, float addrspace(1)* @var 154 %val30 = load volatile float, float addrspace(1)* @var 155 156 store volatile float %val0, float addrspace(1)* @var 157 store volatile float %val1, float addrspace(1)* @var 158 store volatile float %val2, float addrspace(1)* @var 159 store volatile float %val3, float addrspace(1)* @var 160 store volatile float %val4, float addrspace(1)* @var 161 store volatile float %val5, float addrspace(1)* @var 162 store volatile float %val6, float addrspace(1)* @var 163 store volatile float %val7, float addrspace(1)* @var 164 store volatile float %val8, float addrspace(1)* @var 165 store volatile float %val9, float addrspace(1)* @var 166 store volatile float %val10, float addrspace(1)* @var 167 store volatile float %val11, float addrspace(1)* @var 168 store volatile float %val12, float addrspace(1)* @var 169 store volatile float %val13, float addrspace(1)* @var 170 store volatile float %val14, float addrspace(1)* @var 171 store volatile float %val15, float addrspace(1)* @var 172 store volatile float %val16, float addrspace(1)* @var 173 store volatile float %val17, float addrspace(1)* @var 174 store volatile float %val18, float addrspace(1)* @var 175 store volatile float %val19, float addrspace(1)* @var 176 store volatile float %val20, float addrspace(1)* @var 177 store volatile float %val21, float addrspace(1)* @var 178 store volatile float %val22, float addrspace(1)* @var 179 store volatile float %val23, float addrspace(1)* @var 180 store volatile float %val24, float addrspace(1)* @var 181 store volatile float %val25, float addrspace(1)* @var 182 store volatile float %val26, float addrspace(1)* @var 183 store volatile float %val27, float addrspace(1)* @var 184 store volatile float %val28, float addrspace(1)* @var 185 store volatile float %val29, float addrspace(1)* @var 186 store volatile float %val30, float addrspace(1)* @var 187 188 ret void 189 } 190 attributes #9 = {"amdgpu-waves-per-eu"="10,10"} 191 192 ; Exactly 256 workitems and exactly 2 waves. 193 ; CHECK-LABEL: {{^}}empty_workitems_exactly_256_waves_exactly_2: 194 ; CHECK: SGPRBlocks: 12 195 ; CHECK: VGPRBlocks: 21 196 ; CHECK: NumSGPRsForWavesPerEU: 102 197 ; CHECK: NumVGPRsForWavesPerEU: 85 198 define amdgpu_kernel void @empty_workitems_exactly_256_waves_exactly_2() #10 { 199 entry: 200 ret void 201 } 202 attributes #10 = {"amdgpu-flat-work-group-size"="256,256" "amdgpu-waves-per-eu"="2,2"} 203