1 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s 2 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s 4 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s 6 7 ; FUNC-LABEL: {{^}}store_i1: 8 ; EG: MEM_RAT MSKOR 9 ; EG-NOT: MEM_RAT MSKOR 10 11 ; CM: MEM_RAT MSKOR 12 ; CM-NOT: MEM_RAT MSKOR 13 14 ; SIVI: buffer_store_byte 15 ; GFX9: global_store_byte 16 define amdgpu_kernel void @store_i1(i1 addrspace(1)* %out) { 17 entry: 18 store i1 true, i1 addrspace(1)* %out 19 ret void 20 } 21 22 ; i8 store 23 ; FUNC-LABEL: {{^}}store_i8: 24 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X 25 ; EG-NOT: MEM_RAT MSKOR 26 27 ; EG: VTX_READ_8 28 ; EG: AND_INT 29 ; EG: AND_INT 30 ; EG: LSHL 31 ; EG: LSHL 32 ; EG: LSHL 33 34 ; SIVI: buffer_store_byte 35 ; GFX9: global_store_byte 36 define amdgpu_kernel void @store_i8(i8 addrspace(1)* %out, i8 %in) { 37 entry: 38 store i8 %in, i8 addrspace(1)* %out 39 ret void 40 } 41 42 ; i16 store 43 ; FUNC-LABEL: {{^}}store_i16: 44 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X 45 ; EG-NOT: MEM_RAT MSKOR 46 47 ; EG: VTX_READ_16 48 ; EG: AND_INT 49 ; EG: AND_INT 50 ; EG: LSHL 51 ; EG: LSHL 52 ; EG: LSHL 53 54 55 ; SIVI: buffer_store_short 56 ; GFX9: global_store_short 57 define amdgpu_kernel void @store_i16(i16 addrspace(1)* %out, i16 %in) { 58 entry: 59 store i16 %in, i16 addrspace(1)* %out 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}store_i24: 64 ; SIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16 65 ; SIVI-DAG: buffer_store_byte 66 ; SIVI-DAG: buffer_store_short 67 68 ; GFX9-DAG: global_store_byte_d16_hi v{{\[[0-9]:[0-9]+\]}}, v{{[0-9]+}}, off offset:2 69 ; GFX9-DAG: global_store_short 70 71 ; EG: MEM_RAT MSKOR 72 ; EG: MEM_RAT MSKOR 73 define amdgpu_kernel void @store_i24(i24 addrspace(1)* %out, i24 %in) { 74 entry: 75 store i24 %in, i24 addrspace(1)* %out 76 ret void 77 } 78 79 ; FUNC-LABEL: {{^}}store_i25: 80 ; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}} 81 ; GCN: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]] 82 ; SIVI: buffer_store_dword [[VAND]] 83 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAND]] 84 85 ; EG: MEM_RAT_CACHELESS STORE_RAW 86 ; EG-NOT: MEM_RAT 87 88 ; CM: MEM_RAT_CACHELESS STORE_DWORD 89 ; CM-NOT: MEM_RAT 90 define amdgpu_kernel void @store_i25(i25 addrspace(1)* %out, i25 %in) { 91 entry: 92 store i25 %in, i25 addrspace(1)* %out 93 ret void 94 } 95 96 ; FUNC-LABEL: {{^}}store_v2i8: 97 ; v2i8 is naturally 2B aligned 98 ; EG: MEM_RAT MSKOR 99 ; EG-NOT: MEM_RAT MSKOR 100 101 ; CM: MEM_RAT MSKOR 102 ; CM-NOT: MEM_RAT MSKOR 103 104 ; SIVI: buffer_store_short 105 ; GFX9: global_store_short 106 define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { 107 entry: 108 %0 = trunc <2 x i32> %in to <2 x i8> 109 store <2 x i8> %0, <2 x i8> addrspace(1)* %out 110 ret void 111 } 112 113 ; FUNC-LABEL: {{^}}store_v2i8_unaligned: 114 ; EG: MEM_RAT MSKOR 115 ; EG: MEM_RAT MSKOR 116 ; EG-NOT: MEM_RAT MSKOR 117 118 ; CM: MEM_RAT MSKOR 119 ; CM: MEM_RAT MSKOR 120 ; CM-NOT: MEM_RAT MSKOR 121 122 ; SI: buffer_store_byte 123 define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { 124 entry: 125 %0 = trunc <2 x i32> %in to <2 x i8> 126 store <2 x i8> %0, <2 x i8> addrspace(1)* %out, align 1 127 ret void 128 } 129 130 131 ; FUNC-LABEL: {{^}}store_v2i16: 132 ; EG: MEM_RAT_CACHELESS STORE_RAW 133 134 ; CM: MEM_RAT_CACHELESS STORE_DWORD 135 136 ; SIVI: buffer_store_dword 137 ; GFX9: global_store_dword 138 define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { 139 entry: 140 %0 = trunc <2 x i32> %in to <2 x i16> 141 store <2 x i16> %0, <2 x i16> addrspace(1)* %out 142 ret void 143 } 144 145 ; FUNC-LABEL: {{^}}store_v2i16_unaligned: 146 ; EG: MEM_RAT MSKOR 147 ; EG: MEM_RAT MSKOR 148 ; EG-NOT: MEM_RAT MSKOR 149 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 150 151 ; CM: MEM_RAT MSKOR 152 ; CM: MEM_RAT MSKOR 153 ; CM-NOT: MEM_RAT MSKOR 154 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 155 156 ; SIVI: buffer_store_short 157 ; SIVI: buffer_store_short 158 159 ; GFX9: global_store_short 160 ; GFX9: global_store_short 161 define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { 162 entry: 163 %0 = trunc <2 x i32> %in to <2 x i16> 164 store <2 x i16> %0, <2 x i16> addrspace(1)* %out, align 2 165 ret void 166 } 167 168 ; FUNC-LABEL: {{^}}store_v4i8: 169 ; EG: MEM_RAT_CACHELESS STORE_RAW 170 171 ; CM: MEM_RAT_CACHELESS STORE_DWORD 172 173 ; SIVI: buffer_store_dword 174 ; GFX9: global_store_dword 175 define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { 176 entry: 177 %0 = trunc <4 x i32> %in to <4 x i8> 178 store <4 x i8> %0, <4 x i8> addrspace(1)* %out 179 ret void 180 } 181 182 ; FUNC-LABEL: {{^}}store_v4i8_unaligned: 183 ; EG: MEM_RAT MSKOR 184 ; EG: MEM_RAT MSKOR 185 ; EG: MEM_RAT MSKOR 186 ; EG: MEM_RAT MSKOR 187 ; EG-NOT: MEM_RAT MSKOR 188 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 189 190 ; CM: MEM_RAT MSKOR 191 ; CM: MEM_RAT MSKOR 192 ; CM: MEM_RAT MSKOR 193 ; CM: MEM_RAT MSKOR 194 ; CM-NOT: MEM_RAT MSKOR 195 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 196 197 ; SI: buffer_store_byte 198 ; SI: buffer_store_byte 199 ; SI: buffer_store_byte 200 ; SI: buffer_store_byte 201 ; SI-NOT: buffer_store_dword 202 define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { 203 entry: 204 %0 = trunc <4 x i32> %in to <4 x i8> 205 store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 1 206 ret void 207 } 208 209 ; FUNC-LABEL: {{^}}store_v4i8_halfaligned: 210 ; EG: MEM_RAT MSKOR 211 ; EG: MEM_RAT MSKOR 212 ; EG-NOT: MEM_RAT MSKOR 213 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 214 215 ; CM: MEM_RAT MSKOR 216 ; CM: MEM_RAT MSKOR 217 ; CM-NOT: MEM_RAT MSKOR 218 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 219 220 ; SI: buffer_store_short 221 ; SI: buffer_store_short 222 ; SI-NOT: buffer_store_dword 223 define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { 224 entry: 225 %0 = trunc <4 x i32> %in to <4 x i8> 226 store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 2 227 ret void 228 } 229 230 ; floating-point store 231 ; FUNC-LABEL: {{^}}store_f32: 232 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 233 234 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} 235 236 ; SIVI: buffer_store_dword 237 ; GFX9: global_store_dword 238 239 define amdgpu_kernel void @store_f32(float addrspace(1)* %out, float %in) { 240 store float %in, float addrspace(1)* %out 241 ret void 242 } 243 244 ; FUNC-LABEL: {{^}}store_v4i16: 245 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 246 247 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}} 248 249 ; SIVI: buffer_store_dwordx2 250 ; GFX9: global_store_dwordx2 251 define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { 252 entry: 253 %0 = trunc <4 x i32> %in to <4 x i16> 254 store <4 x i16> %0, <4 x i16> addrspace(1)* %out 255 ret void 256 } 257 258 ; vec2 floating-point stores 259 ; FUNC-LABEL: {{^}}store_v2f32: 260 ; EG: MEM_RAT_CACHELESS STORE_RAW 261 262 ; CM: MEM_RAT_CACHELESS STORE_DWORD 263 264 ; SIVI: buffer_store_dwordx2 265 ; GFX9: global_store_dwordx2 266 267 define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { 268 entry: 269 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 270 %1 = insertelement <2 x float> %0, float %b, i32 1 271 store <2 x float> %1, <2 x float> addrspace(1)* %out 272 ret void 273 } 274 275 ; FUNC-LABEL: {{^}}store_v3i32: 276 ; SIVI-DAG: buffer_store_dwordx2 277 ; SIVI-DAG: buffer_store_dword v 278 279 ; GFX9-DAG: global_store_dwordx2 280 ; GFX9-DAG: global_store_dword v 281 282 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}}, 283 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XY}}, {{T[0-9]+\.[XYZW]}}, 284 define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind { 285 store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16 286 ret void 287 } 288 289 ; FUNC-LABEL: {{^}}store_v4i32: 290 ; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}} 291 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 292 293 ; CM: MEM_RAT_CACHELESS STORE_DWORD 294 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 295 296 ; SIVI: buffer_store_dwordx4 297 ; GFX9: global_store_dwordx4 298 define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { 299 entry: 300 store <4 x i32> %in, <4 x i32> addrspace(1)* %out 301 ret void 302 } 303 304 ; FUNC-LABEL: {{^}}store_v4i32_unaligned: 305 ; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}} 306 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 307 308 ; CM: MEM_RAT_CACHELESS STORE_DWORD 309 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 310 311 ; SIVI: buffer_store_dwordx4 312 ; GFX9: global_store_dwordx4 313 define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { 314 entry: 315 store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4 316 ret void 317 } 318 319 ; v4f32 store 320 ; FUNC-LABEL: {{^}}store_v4f32: 321 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1 322 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 323 324 ; CM: MEM_RAT_CACHELESS STORE_DWORD 325 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 326 327 ; SIVI: buffer_store_dwordx4 328 ; GFX9: global_store_dwordx4 329 define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) { 330 %1 = load <4 x float>, <4 x float> addrspace(1) * %in 331 store <4 x float> %1, <4 x float> addrspace(1)* %out 332 ret void 333 } 334 335 ; FUNC-LABEL: {{^}}store_i64_i8: 336 ; EG: MEM_RAT MSKOR 337 338 ; CM: MEM_RAT MSKOR 339 340 ; SIVI: buffer_store_byte 341 ; GFX9: global_store_byte 342 define amdgpu_kernel void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { 343 entry: 344 %0 = trunc i64 %in to i8 345 store i8 %0, i8 addrspace(1)* %out 346 ret void 347 } 348 349 ; FUNC-LABEL: {{^}}store_i64_i16: 350 ; EG: MEM_RAT MSKOR 351 ; SIVI: buffer_store_short 352 ; GFX9: global_store_short 353 define amdgpu_kernel void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { 354 entry: 355 %0 = trunc i64 %in to i16 356 store i16 %0, i16 addrspace(1)* %out 357 ret void 358 } 359 360 ; The stores in this function are combined by the optimizer to create a 361 ; 64-bit store with 32-bit alignment. This is legal and the legalizer 362 ; should not try to split the 64-bit store back into 2 32-bit stores. 363 364 ; FUNC-LABEL: {{^}}vecload2: 365 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XY, T[0-9]+\.X}}, 1 366 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW 367 368 ; CM: MEM_RAT_CACHELESS STORE_DWORD 369 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD 370 371 ; SIVI: buffer_store_dwordx2 372 ; GFX9: global_store_dwordx2 373 define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 { 374 entry: 375 %0 = load i32, i32 addrspace(4)* %mem, align 4 376 %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1 377 %1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4 378 store i32 %0, i32 addrspace(1)* %out, align 4 379 %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 380 store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 381 ret void 382 } 383 384 ; When i128 was a legal type this program generated cannot select errors: 385 386 ; FUNC-LABEL: {{^}}"i128-const-store": 387 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1 388 389 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X 390 391 ; SIVI: buffer_store_dwordx4 392 ; GFX9: global_store_dwordx4 393 define amdgpu_kernel void @i128-const-store(i32 addrspace(1)* %out) { 394 entry: 395 store i32 1, i32 addrspace(1)* %out, align 4 396 %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1 397 store i32 1, i32 addrspace(1)* %arrayidx2, align 4 398 %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2 399 store i32 2, i32 addrspace(1)* %arrayidx4, align 4 400 %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3 401 store i32 2, i32 addrspace(1)* %arrayidx6, align 4 402 ret void 403 } 404 405 attributes #0 = { nounwind } 406