1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s 4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 6 ; Testing for ds_read/write_128 7 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s 8 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 10 11 ; FUNC-LABEL: {{^}}local_load_i32: 12 ; GCN-NOT: s_wqm_b64 13 ; SICIVI: s_mov_b32 m0, -1 14 ; GFX9-NOT: m0 15 ; GCN: ds_read_b32 16 17 ; EG: LDS_READ_RET 18 define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { 19 entry: 20 %ld = load i32, i32 addrspace(3)* %in 21 store i32 %ld, i32 addrspace(3)* %out 22 ret void 23 } 24 25 ; FUNC-LABEL: {{^}}local_load_v2i32: 26 ; SICIVI: s_mov_b32 m0, -1 27 ; GFX9-NOT: m0 28 29 ; GCN: ds_read_b64 30 define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { 31 entry: 32 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in 33 store <2 x i32> %ld, <2 x i32> addrspace(3)* %out 34 ret void 35 } 36 37 ; FUNC-LABEL: {{^}}local_load_v3i32: 38 ; SICIVI: s_mov_b32 m0, -1 39 ; GFX9-NOT: m0 40 41 ; GCN-DAG: ds_read_b64 42 ; GCN-DAG: ds_read_b32 43 define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 { 44 entry: 45 %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in 46 store <3 x i32> %ld, <3 x i32> addrspace(3)* %out 47 ret void 48 } 49 50 ; FUNC-LABEL: {{^}}local_load_v4i32: 51 ; SICIVI: s_mov_b32 m0, -1 52 ; GFX9-NOT: m0 53 54 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 55 56 define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { 57 entry: 58 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in 59 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}local_load_v8i32: 64 ; SICIVI: s_mov_b32 m0, -1 65 ; GFX9-NOT: m0 66 67 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 68 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 69 define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { 70 entry: 71 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in 72 store <8 x i32> %ld, <8 x i32> addrspace(3)* %out 73 ret void 74 } 75 76 ; FUNC-LABEL: {{^}}local_load_v16i32: 77 ; SICIVI: s_mov_b32 m0, -1 78 ; GFX9-NOT: m0 79 80 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}} 81 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}} 82 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 83 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 84 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 85 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 86 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 87 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 88 define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { 89 entry: 90 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in 91 store <16 x i32> %ld, <16 x i32> addrspace(3)* %out 92 ret void 93 } 94 95 ; FUNC-LABEL: {{^}}local_zextload_i32_to_i64: 96 ; SICIVI: s_mov_b32 m0, -1 97 ; GFX9-NOT: m0 98 99 define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { 100 %ld = load i32, i32 addrspace(3)* %in 101 %ext = zext i32 %ld to i64 102 store i64 %ext, i64 addrspace(3)* %out 103 ret void 104 } 105 106 ; FUNC-LABEL: {{^}}local_sextload_i32_to_i64: 107 ; SICIVI: s_mov_b32 m0, -1 108 ; GFX9-NOT: m0 109 110 define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 { 111 %ld = load i32, i32 addrspace(3)* %in 112 %ext = sext i32 %ld to i64 113 store i64 %ext, i64 addrspace(3)* %out 114 ret void 115 } 116 117 ; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64: 118 ; SICIVI: s_mov_b32 m0, -1 119 ; GFX9-NOT: m0 120 121 define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { 122 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in 123 %ext = zext <1 x i32> %ld to <1 x i64> 124 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 125 ret void 126 } 127 128 ; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64: 129 ; SICIVI: s_mov_b32 m0, -1 130 ; GFX9-NOT: m0 131 132 define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 { 133 %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in 134 %ext = sext <1 x i32> %ld to <1 x i64> 135 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 136 ret void 137 } 138 139 ; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64: 140 ; SICIVI: s_mov_b32 m0, -1 141 ; GFX9-NOT: m0 142 143 define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { 144 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in 145 %ext = zext <2 x i32> %ld to <2 x i64> 146 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 147 ret void 148 } 149 150 ; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64: 151 ; SICIVI: s_mov_b32 m0, -1 152 ; GFX9-NOT: m0 153 154 define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 { 155 %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in 156 %ext = sext <2 x i32> %ld to <2 x i64> 157 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 158 ret void 159 } 160 161 ; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64: 162 ; SICIVI: s_mov_b32 m0, -1 163 ; GFX9-NOT: m0 164 165 define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { 166 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in 167 %ext = zext <4 x i32> %ld to <4 x i64> 168 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 169 ret void 170 } 171 172 ; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64: 173 ; SICIVI: s_mov_b32 m0, -1 174 ; GFX9-NOT: m0 175 176 define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 { 177 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in 178 %ext = sext <4 x i32> %ld to <4 x i64> 179 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 180 ret void 181 } 182 183 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 184 ; FUNC-LABEL: {{^}}local_v4i32_to_128: 185 186 ; SI-NOT: ds_read_b128 187 ; SI-NOT: ds_write_b128 188 189 ; CIVI: ds_read_b128 190 ; CIVI: ds_write_b128 191 192 ; EG: LDS_READ_RET 193 ; EG: LDS_READ_RET 194 ; EG: LDS_READ_RET 195 ; EG: LDS_READ_RET 196 define amdgpu_kernel void @local_v4i32_to_128(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) { 197 %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16 198 store <4 x i32> %ld, <4 x i32> addrspace(3)* %out, align 16 199 ret void 200 } 201 202 ; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64: 203 ; SICIVI: s_mov_b32 m0, -1 204 ; GFX9-NOT: m0 205 206 define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { 207 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in 208 %ext = zext <8 x i32> %ld to <8 x i64> 209 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 210 ret void 211 } 212 213 ; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64: 214 ; SICIVI: s_mov_b32 m0, -1 215 ; GFX9-NOT: m0 216 217 define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 { 218 %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in 219 %ext = sext <8 x i32> %ld to <8 x i64> 220 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 221 ret void 222 } 223 224 ; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64: 225 ; SICIVI: s_mov_b32 m0, -1 226 ; GFX9-NOT: m0 227 228 define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { 229 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in 230 %ext = sext <16 x i32> %ld to <16 x i64> 231 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 232 ret void 233 } 234 235 ; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64 236 ; SICIVI: s_mov_b32 m0, -1 237 ; GFX9-NOT: m0 238 239 define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 { 240 %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in 241 %ext = zext <16 x i32> %ld to <16 x i64> 242 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 243 ret void 244 } 245 246 ; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64: 247 ; SICIVI: s_mov_b32 m0, -1 248 ; GFX9-NOT: m0 249 250 define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { 251 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in 252 %ext = sext <32 x i32> %ld to <32 x i64> 253 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 254 ret void 255 } 256 257 ; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64: 258 ; SICIVI: s_mov_b32 m0, -1 259 ; GFX9-NOT: m0 260 261 define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 { 262 %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in 263 %ext = zext <32 x i32> %ld to <32 x i64> 264 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 265 ret void 266 } 267 268 attributes #0 = { nounwind } 269