Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      3 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=HSA -check-prefix=CI-HSA -check-prefix=FUNC %s
      4 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=carrizo -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=GCN -check-prefix=HSA -check-prefix=VI-HSA -check-prefix=FUNC %s
      5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      6 
      7 
      8 ; FUNC-LABEL: {{^}}ngroups_x:
      9 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     10 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].X
     11 
     12 ; HSA: .amd_kernel_code_t
     13 
     14 ; HSA: enable_sgpr_private_segment_buffer = 1
     15 ; HSA: enable_sgpr_dispatch_ptr = 0
     16 ; HSA: enable_sgpr_queue_ptr = 0
     17 ; HSA: enable_sgpr_kernarg_segment_ptr = 1
     18 ; HSA: enable_sgpr_dispatch_id = 0
     19 ; HSA: enable_sgpr_flat_scratch_init = 0
     20 ; HSA: enable_sgpr_private_segment_size = 0
     21 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
     22 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
     23 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
     24 
     25 ; HSA: .end_amd_kernel_code_t
     26 
     27 
     28 ; GCN-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0
     29 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     30 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
     31 
     32 define void @ngroups_x (i32 addrspace(1)* %out) {
     33 entry:
     34   %0 = call i32 @llvm.r600.read.ngroups.x() #0
     35   store i32 %0, i32 addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; FUNC-LABEL: {{^}}ngroups_y:
     40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     41 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y
     42 
     43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1
     44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
     45 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     46 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
     47 define void @ngroups_y (i32 addrspace(1)* %out) {
     48 entry:
     49   %0 = call i32 @llvm.r600.read.ngroups.y() #0
     50   store i32 %0, i32 addrspace(1)* %out
     51   ret void
     52 }
     53 
     54 ; FUNC-LABEL: {{^}}ngroups_z:
     55 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     56 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z
     57 
     58 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2
     59 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
     60 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     61 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
     62 define void @ngroups_z (i32 addrspace(1)* %out) {
     63 entry:
     64   %0 = call i32 @llvm.r600.read.ngroups.z() #0
     65   store i32 %0, i32 addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; FUNC-LABEL: {{^}}global_size_x:
     70 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     71 ; EG: MOV {{\*? *}}[[VAL]], KC0[0].W
     72 
     73 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3
     74 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc
     75 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     76 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
     77 define void @global_size_x (i32 addrspace(1)* %out) {
     78 entry:
     79   %0 = call i32 @llvm.r600.read.global.size.x() #0
     80   store i32 %0, i32 addrspace(1)* %out
     81   ret void
     82 }
     83 
     84 ; FUNC-LABEL: {{^}}global_size_y:
     85 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     86 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].X
     87 
     88 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4
     89 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10
     90 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     91 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
     92 define void @global_size_y (i32 addrspace(1)* %out) {
     93 entry:
     94   %0 = call i32 @llvm.r600.read.global.size.y() #0
     95   store i32 %0, i32 addrspace(1)* %out
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}global_size_z:
    100 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
    101 ; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y
    102 
    103 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5
    104 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14
    105 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
    106 ; GCN-NOHSA: buffer_store_dword [[VVAL]]
    107 define void @global_size_z (i32 addrspace(1)* %out) {
    108 entry:
    109   %0 = call i32 @llvm.r600.read.global.size.z() #0
    110   store i32 %0, i32 addrspace(1)* %out
    111   ret void
    112 }
    113 
    114 ; The tgid values are stored in sgprs offset by the number of user
    115 ; sgprs.
    116 
    117 ; FUNC-LABEL: {{^}}tgid_x:
    118 ; HSA: .amd_kernel_code_t
    119 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
    120 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
    121 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0
    122 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0
    123 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
    124 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
    125 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
    126 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
    127 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
    128 ; HSA: .end_amd_kernel_code_t
    129 
    130 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s2{{$}}
    131 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s6{{$}}
    132 ; GCN: buffer_store_dword [[VVAL]]
    133 
    134 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
    135 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
    136 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
    137 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
    138 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
    139 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
    140 define void @tgid_x(i32 addrspace(1)* %out) {
    141 entry:
    142   %0 = call i32 @llvm.r600.read.tgid.x() #0
    143   store i32 %0, i32 addrspace(1)* %out
    144   ret void
    145 }
    146 
    147 ; FUNC-LABEL: {{^}}tgid_y:
    148 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
    149 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
    150 ; HSA: compute_pgm_rsrc2_tgid_y_en = 1
    151 ; HSA: compute_pgm_rsrc2_tgid_z_en = 0
    152 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
    153 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
    154 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
    155 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
    156 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3
    157 ; GCN-HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7
    158 ; GCN: buffer_store_dword [[VVAL]]
    159 
    160 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
    161 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
    162 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
    163 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 1
    164 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 0
    165 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
    166 define void @tgid_y(i32 addrspace(1)* %out) {
    167 entry:
    168   %0 = call i32 @llvm.r600.read.tgid.y() #0
    169   store i32 %0, i32 addrspace(1)* %out
    170   ret void
    171 }
    172 
    173 ; FUNC-LABEL: {{^}}tgid_z:
    174 ; HSA: compute_pgm_rsrc2_user_sgpr = 6
    175 ; HSA: compute_pgm_rsrc2_tgid_x_en = 1
    176 ; HSA: compute_pgm_rsrc2_tgid_y_en = 0
    177 ; HSA: compute_pgm_rsrc2_tgid_z_en = 1
    178 ; HSA: compute_pgm_rsrc2_tg_size_en = 0
    179 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
    180 ; HSA: enable_sgpr_private_segment_buffer = 1
    181 ; HSA: enable_sgpr_dispatch_ptr = 0
    182 ; HSA: enable_sgpr_queue_ptr = 0
    183 ; HSA: enable_sgpr_kernarg_segment_ptr = 1
    184 ; HSA: enable_sgpr_dispatch_id = 0
    185 ; HSA: enable_sgpr_flat_scratch_init = 0
    186 ; HSA: enable_sgpr_private_segment_size = 0
    187 ; HSA: enable_sgpr_grid_workgroup_count_x = 0
    188 ; HSA: enable_sgpr_grid_workgroup_count_y = 0
    189 ; HSA: enable_sgpr_grid_workgroup_count_z = 0
    190 
    191 ; GCN-NOHSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s3{{$}}
    192 ; HSA: v_mov_b32_e32 [[VVAL:v[0-9]+]], s7{{$}}
    193 ; GCN: buffer_store_dword [[VVAL]]
    194 
    195 ; HSA: COMPUTE_PGM_RSRC2:USER_SGPR: 6
    196 ; GCN-NOHSA: COMPUTE_PGM_RSRC2:USER_SGPR: 2
    197 ; GCN: COMPUTE_PGM_RSRC2:TGID_X_EN: 1
    198 ; GCN: COMPUTE_PGM_RSRC2:TGID_Y_EN: 0
    199 ; GCN: COMPUTE_PGM_RSRC2:TGID_Z_EN: 1
    200 ; GCN: COMPUTE_PGM_RSRC2:TIDIG_COMP_CNT: 0
    201 define void @tgid_z(i32 addrspace(1)* %out) {
    202 entry:
    203   %0 = call i32 @llvm.r600.read.tgid.z() #0
    204   store i32 %0, i32 addrspace(1)* %out
    205   ret void
    206 }
    207 
    208 ; GCN-NOHSA: .section .AMDGPU.config
    209 ; GCN-NOHSA: .long 47180
    210 ; GCN-NOHSA-NEXT: .long 132{{$}}
    211 
    212 ; FUNC-LABEL: {{^}}tidig_x:
    213 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 0
    214 ; GCN: buffer_store_dword v0
    215 define void @tidig_x(i32 addrspace(1)* %out) {
    216 entry:
    217   %0 = call i32 @llvm.r600.read.tidig.x() #0
    218   store i32 %0, i32 addrspace(1)* %out
    219   ret void
    220 }
    221 
    222 ; GCN-NOHSA: .section .AMDGPU.config
    223 ; GCN-NOHSA: .long 47180
    224 ; GCN-NOHSA-NEXT: .long 2180{{$}}
    225 
    226 ; FUNC-LABEL: {{^}}tidig_y:
    227 
    228 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 1
    229 ; GCN: buffer_store_dword v1
    230 define void @tidig_y(i32 addrspace(1)* %out) {
    231 entry:
    232   %0 = call i32 @llvm.r600.read.tidig.y() #0
    233   store i32 %0, i32 addrspace(1)* %out
    234   ret void
    235 }
    236 
    237 ; GCN-NOHSA: .section .AMDGPU.config
    238 ; GCN-NOHSA: .long 47180
    239 ; GCN-NOHSA-NEXT: .long 4228{{$}}
    240 
    241 ; FUNC-LABEL: {{^}}tidig_z:
    242 ; HSA: compute_pgm_rsrc2_tidig_comp_cnt = 2
    243 ; GCN: buffer_store_dword v2
    244 define void @tidig_z(i32 addrspace(1)* %out) {
    245 entry:
    246   %0 = call i32 @llvm.r600.read.tidig.z() #0
    247   store i32 %0, i32 addrspace(1)* %out
    248   ret void
    249 }
    250 
    251 declare i32 @llvm.r600.read.ngroups.x() #0
    252 declare i32 @llvm.r600.read.ngroups.y() #0
    253 declare i32 @llvm.r600.read.ngroups.z() #0
    254 
    255 declare i32 @llvm.r600.read.global.size.x() #0
    256 declare i32 @llvm.r600.read.global.size.y() #0
    257 declare i32 @llvm.r600.read.global.size.z() #0
    258 
    259 declare i32 @llvm.r600.read.tgid.x() #0
    260 declare i32 @llvm.r600.read.tgid.y() #0
    261 declare i32 @llvm.r600.read.tgid.z() #0
    262 
    263 declare i32 @llvm.r600.read.tidig.x() #0
    264 declare i32 @llvm.r600.read.tidig.y() #0
    265 declare i32 @llvm.r600.read.tidig.z() #0
    266 
    267 declare i32 @llvm.AMDGPU.read.workdim() #0
    268 
    269 attributes #0 = { readnone }
    270