Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI  -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 
      6 ; FUNC-LABEL: {{^}}local_size_x:
      7 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
      8 ; EG: MOV * [[VAL]], KC0[1].Z
      9 
     10 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
     11 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
     12 ; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1
     13 ; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4
     14 
     15 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     16 ; GCN: buffer_store_dword [[VVAL]]
     17 define void @local_size_x(i32 addrspace(1)* %out) {
     18 entry:
     19   %0 = call i32 @llvm.r600.read.local.size.x() #0
     20   store i32 %0, i32 addrspace(1)* %out
     21   ret void
     22 }
     23 
     24 ; FUNC-LABEL: {{^}}local_size_y:
     25 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     26 ; EG: MOV * [[VAL]], KC0[1].W
     27 
     28 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
     29 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
     30 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     31 ; GCN: buffer_store_dword [[VVAL]]
     32 define void @local_size_y(i32 addrspace(1)* %out) {
     33 entry:
     34   %0 = call i32 @llvm.r600.read.local.size.y() #0
     35   store i32 %0, i32 addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; FUNC-LABEL: {{^}}local_size_z:
     40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]]
     41 ; EG: MOV * [[VAL]], KC0[2].X
     42 
     43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
     44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
     45 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
     46 ; GCN: buffer_store_dword [[VVAL]]
     47 define void @local_size_z(i32 addrspace(1)* %out) {
     48 entry:
     49   %0 = call i32 @llvm.r600.read.local.size.z() #0
     50   store i32 %0, i32 addrspace(1)* %out
     51   ret void
     52 }
     53 
     54 ; FUNC-LABEL: {{^}}local_size_xy:
     55 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
     56 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
     57 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
     58 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
     59 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
     60 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]]
     61 ; GCN: buffer_store_dword [[VAL]]
     62 define void @local_size_xy(i32 addrspace(1)* %out) {
     63 entry:
     64   %x = call i32 @llvm.r600.read.local.size.x() #0
     65   %y = call i32 @llvm.r600.read.local.size.y() #0
     66   %val = mul i32 %x, %y
     67   store i32 %val, i32 addrspace(1)* %out
     68   ret void
     69 }
     70 
     71 ; FUNC-LABEL: {{^}}local_size_xz:
     72 
     73 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
     74 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
     75 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
     76 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
     77 ; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff
     78 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
     79 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]]
     80 ; GCN: buffer_store_dword [[VAL]]
     81 define void @local_size_xz(i32 addrspace(1)* %out) {
     82 entry:
     83   %x = call i32 @llvm.r600.read.local.size.x() #0
     84   %z = call i32 @llvm.r600.read.local.size.z() #0
     85   %val = mul i32 %x, %z
     86   store i32 %val, i32 addrspace(1)* %out
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: {{^}}local_size_yz:
     91 ; HSA: enable_sgpr_private_segment_buffer = 1
     92 ; HSA: enable_sgpr_dispatch_ptr = 1
     93 
     94 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
     95 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
     96 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
     97 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
     98 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
     99 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]]
    100 ; GCN: buffer_store_dword [[VAL]]
    101 define void @local_size_yz(i32 addrspace(1)* %out) {
    102 entry:
    103   %y = call i32 @llvm.r600.read.local.size.y() #0
    104   %z = call i32 @llvm.r600.read.local.size.z() #0
    105   %val = mul i32 %y, %z
    106   store i32 %val, i32 addrspace(1)* %out
    107   ret void
    108 }
    109 
    110 ; FUNC-LABEL: {{^}}local_size_xyz:
    111 ; HSA: enable_sgpr_private_segment_buffer = 1
    112 ; HSA: enable_sgpr_dispatch_ptr = 1
    113 
    114 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6
    115 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7
    116 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8
    117 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18
    118 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c
    119 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20
    120 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]]
    121 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]]
    122 ; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]]
    123 ; GCN: buffer_store_dword [[VAL]]
    124 define void @local_size_xyz(i32 addrspace(1)* %out) {
    125 entry:
    126   %x = call i32 @llvm.r600.read.local.size.x() #0
    127   %y = call i32 @llvm.r600.read.local.size.y() #0
    128   %z = call i32 @llvm.r600.read.local.size.z() #0
    129   %xy = mul i32 %x, %y
    130   %xyz = add i32 %xy, %z
    131   store i32 %xyz, i32 addrspace(1)* %out
    132   ret void
    133 }
    134 
    135 ; FUNC-LABEL: {{^}}local_size_x_known_bits:
    136 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6
    137 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18
    138 ; GCN-NOT: 0xffff
    139 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
    140 ; GCN-NEXT: buffer_store_dword [[VVAL]]
    141 define void @local_size_x_known_bits(i32 addrspace(1)* %out) {
    142 entry:
    143   %size = call i32 @llvm.r600.read.local.size.x() #0
    144   %shl = shl i32 %size, 16
    145   %shr = lshr i32 %shl, 16
    146   store i32 %shr, i32 addrspace(1)* %out
    147   ret void
    148 }
    149 
    150 ; FUNC-LABEL: {{^}}local_size_y_known_bits:
    151 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7
    152 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c
    153 ; GCN-NOT: 0xffff
    154 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
    155 ; GCN-NEXT: buffer_store_dword [[VVAL]]
    156 define void @local_size_y_known_bits(i32 addrspace(1)* %out) {
    157 entry:
    158   %size = call i32 @llvm.r600.read.local.size.y() #0
    159   %shl = shl i32 %size, 16
    160   %shr = lshr i32 %shl, 16
    161   store i32 %shr, i32 addrspace(1)* %out
    162   ret void
    163 }
    164 
    165 ; FUNC-LABEL: {{^}}local_size_z_known_bits:
    166 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8
    167 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20
    168 ; GCN-NOT: 0xffff
    169 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]]
    170 ; GCN-NEXT: buffer_store_dword [[VVAL]]
    171 define void @local_size_z_known_bits(i32 addrspace(1)* %out) {
    172 entry:
    173   %size = call i32 @llvm.r600.read.local.size.z() #0
    174   %shl = shl i32 %size, 16
    175   %shr = lshr i32 %shl, 16
    176   store i32 %shr, i32 addrspace(1)* %out
    177   ret void
    178 }
    179 
    180 declare i32 @llvm.r600.read.local.size.x() #0
    181 declare i32 @llvm.r600.read.local.size.y() #0
    182 declare i32 @llvm.r600.read.local.size.z() #0
    183 
    184 attributes #0 = { nounwind readnone }
    185