1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s 4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s 5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 6 7 8 ; FUNC-LABEL: {{^}}ngroups_x: 9 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 10 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X 11 12 ; HSA: .amd_kernel_code_t 13 14 ; HSA: enable_sgpr_private_segment_buffer = 1 15 ; HSA: enable_sgpr_dispatch_ptr = 0 16 ; HSA: enable_sgpr_queue_ptr = 0 17 ; HSA: enable_sgpr_kernarg_segment_ptr = 1 18 ; HSA: enable_sgpr_dispatch_id = 0 19 ; HSA: enable_sgpr_flat_scratch_init = 0 20 ; HSA: enable_sgpr_private_segment_size = 0 21 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 22 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 23 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 24 25 ; HSA: .end_amd_kernel_code_t 26 27 28 ; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0 29 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 30 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 31 32 define void @ngroups_x (i32 addrspace(1)* %out) { 33 entry: 34 %0 = call i32 @llvm.r600.read.ngroups.x() #0 35 store i32 %0, i32 addrspace(1)* %out 36 ret void 37 } 38 39 ; FUNC-LABEL: {{^}}ngroups_y: 40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 41 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y 42 43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 45 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 46 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 47 define void @ngroups_y (i32 addrspace(1)* %out) { 48 entry: 49 %0 = call i32 @llvm.r600.read.ngroups.y() #0 50 store i32 %0, i32 addrspace(1)* %out 51 ret void 52 } 53 54 ; FUNC-LABEL: {{^}}ngroups_z: 55 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 56 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z 57 58 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 59 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 60 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 61 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 62 define void @ngroups_z (i32 addrspace(1)* %out) { 63 entry: 64 %0 = call i32 @llvm.r600.read.ngroups.z() #0 65 store i32 %0, i32 addrspace(1)* %out 66 ret void 67 } 68 69 ; FUNC-LABEL: {{^}}global_size_x: 70 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 71 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W 72 73 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 74 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc 75 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 76 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 77 define void @global_size_x (i32 addrspace(1)* %out) { 78 entry: 79 %0 = call i32 @llvm.r600.read.global.size.x() #0 80 store i32 %0, i32 addrspace(1)* %out 81 ret void 82 } 83 84 ; FUNC-LABEL: {{^}}global_size_y: 85 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 86 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X 87 88 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 89 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 90 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 91 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 92 define void @global_size_y (i32 addrspace(1)* %out) { 93 entry: 94 %0 = call i32 @llvm.r600.read.global.size.y() #0 95 store i32 %0, i32 addrspace(1)* %out 96 ret void 97 } 98 99 ; FUNC-LABEL: {{^}}global_size_z: 100 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 101 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y 102 103 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 104 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 105 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 106 ; GCN-NOHSA: buffer_store_dword [[VVAL]] 107 define void @global_size_z (i32 addrspace(1)* %out) { 108 entry: 109 %0 = call i32 @llvm.r600.read.global.size.z() #0 110 store i32 %0, i32 addrspace(1)* %out 111 ret void 112 } 113 114 ; The tgid values are stored in sgprs offset by the number of user 115 ; sgprs. 116 117 ; FUNC-LABEL: {{^}}tgid_x: 118 ; HSA: .amd_kernel_code_t 119 ; HSA: compute_pgm_rsrc2_user_sgpr = 6 120 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1 121 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0 122 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0 123 ; HSA: compute_pgm_rsrc2_tg_size_en = 0 124 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 125 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 126 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 127 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 128 ; HSA: .end_amd_kernel_code_t 129 130 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}} 131 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}} 132 ; GCN: buffer_store_dword [[VVAL]] 133 134 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 135 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 136 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 137 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 138 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 139 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 140 define void @tgid_x(i32 addrspace(1)* %out) { 141 entry: 142 %0 = call i32 @llvm.r600.read.tgid.x() #0 143 store i32 %0, i32 addrspace(1)* %out 144 ret void 145 } 146 147 ; FUNC-LABEL: {{^}}tgid_y: 148 ; HSA: compute_pgm_rsrc2_user_sgpr = 6 149 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1 150 ; HSA: compute_pgm_rsrc2_tgid_y_en = 1 151 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0 152 ; HSA: compute_pgm_rsrc2_tg_size_en = 0 153 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 154 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 155 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 156 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3 157 ; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7 158 ; GCN: buffer_store_dword [[VVAL]] 159 160 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 161 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 162 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 163 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1 164 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0 165 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 166 define void @tgid_y(i32 addrspace(1)* %out) { 167 entry: 168 %0 = call i32 @llvm.r600.read.tgid.y() #0 169 store i32 %0, i32 addrspace(1)* %out 170 ret void 171 } 172 173 ; FUNC-LABEL: {{^}}tgid_z: 174 ; HSA: compute_pgm_rsrc2_user_sgpr = 6 175 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1 176 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0 177 ; HSA: compute_pgm_rsrc2_tgid_z_en = 1 178 ; HSA: compute_pgm_rsrc2_tg_size_en = 0 179 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 180 ; HSA: enable_sgpr_private_segment_buffer = 1 181 ; HSA: enable_sgpr_dispatch_ptr = 0 182 ; HSA: enable_sgpr_queue_ptr = 0 183 ; HSA: enable_sgpr_kernarg_segment_ptr = 1 184 ; HSA: enable_sgpr_dispatch_id = 0 185 ; HSA: enable_sgpr_flat_scratch_init = 0 186 ; HSA: enable_sgpr_private_segment_size = 0 187 ; HSA: enable_sgpr_grid_workgroup_count_x = 0 188 ; HSA: enable_sgpr_grid_workgroup_count_y = 0 189 ; HSA: enable_sgpr_grid_workgroup_count_z = 0 190 191 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}} 192 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}} 193 ; GCN: buffer_store_dword [[VVAL]] 194 195 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6 196 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2 197 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1 198 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0 199 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1 200 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0 201 define void @tgid_z(i32 addrspace(1)* %out) { 202 entry: 203 %0 = call i32 @llvm.r600.read.tgid.z() #0 204 store i32 %0, i32 addrspace(1)* %out 205 ret void 206 } 207 208 ; GCN-NOHSA: .section .AMDGPU.config 209 ; GCN-NOHSA: .long 47180 210 ; GCN-NOHSA-NEXT: .long 132{{$}} 211 212 ; FUNC-LABEL: {{^}}tidig_x: 213 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0 214 ; GCN: buffer_store_dword v0 215 define void @tidig_x(i32 addrspace(1)* %out) { 216 entry: 217 %0 = call i32 @llvm.r600.read.tidig.x() #0 218 store i32 %0, i32 addrspace(1)* %out 219 ret void 220 } 221 222 ; GCN-NOHSA: .section .AMDGPU.config 223 ; GCN-NOHSA: .long 47180 224 ; GCN-NOHSA-NEXT: .long 2180{{$}} 225 226 ; FUNC-LABEL: {{^}}tidig_y: 227 228 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1 229 ; GCN: buffer_store_dword v1 230 define void @tidig_y(i32 addrspace(1)* %out) { 231 entry: 232 %0 = call i32 @llvm.r600.read.tidig.y() #0 233 store i32 %0, i32 addrspace(1)* %out 234 ret void 235 } 236 237 ; GCN-NOHSA: .section .AMDGPU.config 238 ; GCN-NOHSA: .long 47180 239 ; GCN-NOHSA-NEXT: .long 4228{{$}} 240 241 ; FUNC-LABEL: {{^}}tidig_z: 242 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2 243 ; GCN: buffer_store_dword v2 244 define void @tidig_z(i32 addrspace(1)* %out) { 245 entry: 246 %0 = call i32 @llvm.r600.read.tidig.z() #0 247 store i32 %0, i32 addrspace(1)* %out 248 ret void 249 } 250 251 declare i32 @llvm.r600.read.ngroups.x() #0 252 declare i32 @llvm.r600.read.ngroups.y() #0 253 declare i32 @llvm.r600.read.ngroups.z() #0 254 255 declare i32 @llvm.r600.read.global.size.x() #0 256 declare i32 @llvm.r600.read.global.size.y() #0 257 declare i32 @llvm.r600.read.global.size.z() #0 258 259 declare i32 @llvm.r600.read.tgid.x() #0 260 declare i32 @llvm.r600.read.tgid.y() #0 261 declare i32 @llvm.r600.read.tgid.z() #0 262 263 declare i32 @llvm.r600.read.tidig.x() #0 264 declare i32 @llvm.r600.read.tidig.y() #0 265 declare i32 @llvm.r600.read.tidig.z() #0 266 267 declare i32 @llvm.AMDGPU.read.workdim() #0 268 269 attributes #0 = { readnone } 270