1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=GCN -check-prefix=SI-NOHSA -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=VI -check-prefix=VI-NOHSA -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 4 5 6 ; FUNC-LABEL: {{^}}local_size_x: 7 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 8 ; EG: MOV * [[VAL]], KC0[1].Z 9 10 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 11 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 12 ; CI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x1 13 ; VI-HSA: s_load_dword [[XY:s[0-9]+]], s[4:5], 0x4 14 15 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 16 ; GCN: buffer_store_dword [[VVAL]] 17 define void @local_size_x(i32 addrspace(1)* %out) { 18 entry: 19 %0 = call i32 @llvm.r600.read.local.size.x() #0 20 store i32 %0, i32 addrspace(1)* %out 21 ret void 22 } 23 24 ; FUNC-LABEL: {{^}}local_size_y: 25 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 26 ; EG: MOV * [[VAL]], KC0[1].W 27 28 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 29 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 30 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 31 ; GCN: buffer_store_dword [[VVAL]] 32 define void @local_size_y(i32 addrspace(1)* %out) { 33 entry: 34 %0 = call i32 @llvm.r600.read.local.size.y() #0 35 store i32 %0, i32 addrspace(1)* %out 36 ret void 37 } 38 39 ; FUNC-LABEL: {{^}}local_size_z: 40 ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] 41 ; EG: MOV * [[VAL]], KC0[2].X 42 43 ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 44 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 45 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 46 ; GCN: buffer_store_dword [[VVAL]] 47 define void @local_size_z(i32 addrspace(1)* %out) { 48 entry: 49 %0 = call i32 @llvm.r600.read.local.size.z() #0 50 store i32 %0, i32 addrspace(1)* %out 51 ret void 52 } 53 54 ; FUNC-LABEL: {{^}}local_size_xy: 55 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 56 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 57 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 58 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 59 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] 60 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VY]] 61 ; GCN: buffer_store_dword [[VAL]] 62 define void @local_size_xy(i32 addrspace(1)* %out) { 63 entry: 64 %x = call i32 @llvm.r600.read.local.size.x() #0 65 %y = call i32 @llvm.r600.read.local.size.y() #0 66 %val = mul i32 %x, %y 67 store i32 %val, i32 addrspace(1)* %out 68 ret void 69 } 70 71 ; FUNC-LABEL: {{^}}local_size_xz: 72 73 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 74 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 75 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 76 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 77 ; HSA-DAG: s_and_b32 [[X:s[0-9]+]], [[XY]], 0xffff 78 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 79 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[X]], [[VZ]] 80 ; GCN: buffer_store_dword [[VAL]] 81 define void @local_size_xz(i32 addrspace(1)* %out) { 82 entry: 83 %x = call i32 @llvm.r600.read.local.size.x() #0 84 %z = call i32 @llvm.r600.read.local.size.z() #0 85 %val = mul i32 %x, %z 86 store i32 %val, i32 addrspace(1)* %out 87 ret void 88 } 89 90 ; FUNC-LABEL: {{^}}local_size_yz: 91 ; HSA: enable_sgpr_private_segment_buffer = 1 92 ; HSA: enable_sgpr_dispatch_ptr = 1 93 94 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 95 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 96 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 97 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 98 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 99 ; GCN: v_mul_u32_u24_e32 [[VAL:v[0-9]+]], [[Y]], [[VZ]] 100 ; GCN: buffer_store_dword [[VAL]] 101 define void @local_size_yz(i32 addrspace(1)* %out) { 102 entry: 103 %y = call i32 @llvm.r600.read.local.size.y() #0 104 %z = call i32 @llvm.r600.read.local.size.z() #0 105 %val = mul i32 %y, %z 106 store i32 %val, i32 addrspace(1)* %out 107 ret void 108 } 109 110 ; FUNC-LABEL: {{^}}local_size_xyz: 111 ; HSA: enable_sgpr_private_segment_buffer = 1 112 ; HSA: enable_sgpr_dispatch_ptr = 1 113 114 ; SI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x6 115 ; SI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x7 116 ; SI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x8 117 ; VI-NOHSA-DAG: s_load_dword [[X:s[0-9]+]], s[0:1], 0x18 118 ; VI-NOHSA-DAG: s_load_dword [[Y:s[0-9]+]], s[0:1], 0x1c 119 ; VI-NOHSA-DAG: s_load_dword [[Z:s[0-9]+]], s[0:1], 0x20 120 ; GCN-DAG: v_mov_b32_e32 [[VY:v[0-9]+]], [[Y]] 121 ; GCN-DAG: v_mov_b32_e32 [[VZ:v[0-9]+]], [[Z]] 122 ; GCN: v_mad_u32_u24 [[VAL:v[0-9]+]], [[X]], [[VY]], [[VZ]] 123 ; GCN: buffer_store_dword [[VAL]] 124 define void @local_size_xyz(i32 addrspace(1)* %out) { 125 entry: 126 %x = call i32 @llvm.r600.read.local.size.x() #0 127 %y = call i32 @llvm.r600.read.local.size.y() #0 128 %z = call i32 @llvm.r600.read.local.size.z() #0 129 %xy = mul i32 %x, %y 130 %xyz = add i32 %xy, %z 131 store i32 %xyz, i32 addrspace(1)* %out 132 ret void 133 } 134 135 ; FUNC-LABEL: {{^}}local_size_x_known_bits: 136 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 137 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 138 ; GCN-NOT: 0xffff 139 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 140 ; GCN-NEXT: buffer_store_dword [[VVAL]] 141 define void @local_size_x_known_bits(i32 addrspace(1)* %out) { 142 entry: 143 %size = call i32 @llvm.r600.read.local.size.x() #0 144 %shl = shl i32 %size, 16 145 %shr = lshr i32 %shl, 16 146 store i32 %shr, i32 addrspace(1)* %out 147 ret void 148 } 149 150 ; FUNC-LABEL: {{^}}local_size_y_known_bits: 151 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 152 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c 153 ; GCN-NOT: 0xffff 154 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 155 ; GCN-NEXT: buffer_store_dword [[VVAL]] 156 define void @local_size_y_known_bits(i32 addrspace(1)* %out) { 157 entry: 158 %size = call i32 @llvm.r600.read.local.size.y() #0 159 %shl = shl i32 %size, 16 160 %shr = lshr i32 %shl, 16 161 store i32 %shr, i32 addrspace(1)* %out 162 ret void 163 } 164 165 ; FUNC-LABEL: {{^}}local_size_z_known_bits: 166 ; SI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 167 ; VI: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 168 ; GCN-NOT: 0xffff 169 ; GCN: v_mov_b32_e32 [[VVAL:v[0-9]+]], [[VAL]] 170 ; GCN-NEXT: buffer_store_dword [[VVAL]] 171 define void @local_size_z_known_bits(i32 addrspace(1)* %out) { 172 entry: 173 %size = call i32 @llvm.r600.read.local.size.z() #0 174 %shl = shl i32 %size, 16 175 %shr = lshr i32 %shl, 16 176 store i32 %shr, i32 addrspace(1)* %out 177 ret void 178 } 179 180 declare i32 @llvm.r600.read.local.size.x() #0 181 declare i32 @llvm.r600.read.local.size.y() #0 182 declare i32 @llvm.r600.read.local.size.z() #0 183 184 attributes #0 = { nounwind readnone } 185