1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=EG-CHECK --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=CM-CHECK --check-prefix=FUNC %s 3 ; RUN: llc < %s -march=r600 -mcpu=verde -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s 4 5 ;===------------------------------------------------------------------------===; 6 ; Global Address Space 7 ;===------------------------------------------------------------------------===; 8 ; FUNC-LABEL: @store_i1 9 ; EG-CHECK: MEM_RAT MSKOR 10 ; SI-CHECK: BUFFER_STORE_BYTE 11 define void @store_i1(i1 addrspace(1)* %out) { 12 entry: 13 store i1 true, i1 addrspace(1)* %out 14 ret void 15 } 16 17 ; i8 store 18 ; EG-CHECK-LABEL: @store_i8 19 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X 20 ; EG-CHECK: VTX_READ_8 [[VAL:T[0-9]\.X]], [[VAL]] 21 ; IG 0: Get the byte index and truncate the value 22 ; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x 23 ; EG-CHECK-NEXT: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y 24 ; EG-CHECK-NEXT: 3(4.203895e-45), 255(3.573311e-43) 25 ; IG 1: Truncate the calculated the shift amount for the mask 26 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x 27 ; EG-CHECK-NEXT: 3 28 ; IG 2: Shift the value and the mask 29 ; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] 30 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] 31 ; EG-CHECK-NEXT: 255 32 ; IG 3: Initialize the Y and Z channels to zero 33 ; XXX: An optimal scheduler should merge this into one of the prevous IGs. 34 ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 35 ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 36 37 ; SI-CHECK-LABEL: @store_i8 38 ; SI-CHECK: BUFFER_STORE_BYTE 39 40 define void @store_i8(i8 addrspace(1)* %out, i8 %in) { 41 entry: 42 store i8 %in, i8 addrspace(1)* %out 43 ret void 44 } 45 46 ; i16 store 47 ; EG-CHECK-LABEL: @store_i16 48 ; EG-CHECK: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X 49 ; EG-CHECK: VTX_READ_16 [[VAL:T[0-9]\.X]], [[VAL]] 50 ; IG 0: Get the byte index and truncate the value 51 ; EG-CHECK: AND_INT T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x 52 ; EG-CHECK: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], [[VAL]], literal.y 53 ; EG-CHECK-NEXT: 3(4.203895e-45), 65535(9.183409e-41) 54 ; IG 1: Truncate the calculated the shift amount for the mask 55 ; EG-CHECK: LSHL * T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x 56 ; EG-CHECK: 3 57 ; IG 2: Shift the value and the mask 58 ; EG-CHECK: LSHL T[[RW_GPR]].X, T{{[0-9]}}.[[TRUNC_CHAN]], PV.[[SHIFT_CHAN]] 59 ; EG-CHECK: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]] 60 ; EG-CHECK-NEXT: 65535 61 ; IG 3: Initialize the Y and Z channels to zero 62 ; XXX: An optimal scheduler should merge this into one of the prevous IGs. 63 ; EG-CHECK: MOV T[[RW_GPR]].Y, 0.0 64 ; EG-CHECK: MOV * T[[RW_GPR]].Z, 0.0 65 66 ; SI-CHECK-LABEL: @store_i16 67 ; SI-CHECK: BUFFER_STORE_SHORT 68 define void @store_i16(i16 addrspace(1)* %out, i16 %in) { 69 entry: 70 store i16 %in, i16 addrspace(1)* %out 71 ret void 72 } 73 74 ; EG-CHECK-LABEL: @store_v2i8 75 ; EG-CHECK: MEM_RAT MSKOR 76 ; EG-CHECK-NOT: MEM_RAT MSKOR 77 ; SI-CHECK-LABEL: @store_v2i8 78 ; SI-CHECK: BUFFER_STORE_BYTE 79 ; SI-CHECK: BUFFER_STORE_BYTE 80 define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) { 81 entry: 82 %0 = trunc <2 x i32> %in to <2 x i8> 83 store <2 x i8> %0, <2 x i8> addrspace(1)* %out 84 ret void 85 } 86 87 88 ; EG-CHECK-LABEL: @store_v2i16 89 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW 90 ; CM-CHECK-LABEL: @store_v2i16 91 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD 92 ; SI-CHECK-LABEL: @store_v2i16 93 ; SI-CHECK: BUFFER_STORE_SHORT 94 ; SI-CHECK: BUFFER_STORE_SHORT 95 define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) { 96 entry: 97 %0 = trunc <2 x i32> %in to <2 x i16> 98 store <2 x i16> %0, <2 x i16> addrspace(1)* %out 99 ret void 100 } 101 102 ; EG-CHECK-LABEL: @store_v4i8 103 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW 104 ; CM-CHECK-LABEL: @store_v4i8 105 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD 106 ; SI-CHECK-LABEL: @store_v4i8 107 ; SI-CHECK: BUFFER_STORE_BYTE 108 ; SI-CHECK: BUFFER_STORE_BYTE 109 ; SI-CHECK: BUFFER_STORE_BYTE 110 ; SI-CHECK: BUFFER_STORE_BYTE 111 define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) { 112 entry: 113 %0 = trunc <4 x i32> %in to <4 x i8> 114 store <4 x i8> %0, <4 x i8> addrspace(1)* %out 115 ret void 116 } 117 118 ; floating-point store 119 ; EG-CHECK-LABEL: @store_f32 120 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1 121 ; CM-CHECK-LABEL: @store_f32 122 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}} 123 ; SI-CHECK-LABEL: @store_f32 124 ; SI-CHECK: BUFFER_STORE_DWORD 125 126 define void @store_f32(float addrspace(1)* %out, float %in) { 127 store float %in, float addrspace(1)* %out 128 ret void 129 } 130 131 ; EG-CHECK-LABEL: @store_v4i16 132 ; EG-CHECK: MEM_RAT MSKOR 133 ; EG-CHECK: MEM_RAT MSKOR 134 ; EG-CHECK: MEM_RAT MSKOR 135 ; EG-CHECK: MEM_RAT MSKOR 136 ; EG-CHECK-NOT: MEM_RAT MSKOR 137 ; SI-CHECK-LABEL: @store_v4i16 138 ; SI-CHECK: BUFFER_STORE_SHORT 139 ; SI-CHECK: BUFFER_STORE_SHORT 140 ; SI-CHECK: BUFFER_STORE_SHORT 141 ; SI-CHECK: BUFFER_STORE_SHORT 142 ; SI-CHECK-NOT: BUFFER_STORE_BYTE 143 define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) { 144 entry: 145 %0 = trunc <4 x i32> %in to <4 x i16> 146 store <4 x i16> %0, <4 x i16> addrspace(1)* %out 147 ret void 148 } 149 150 ; vec2 floating-point stores 151 ; EG-CHECK-LABEL: @store_v2f32 152 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW 153 ; CM-CHECK-LABEL: @store_v2f32 154 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD 155 ; SI-CHECK-LABEL: @store_v2f32 156 ; SI-CHECK: BUFFER_STORE_DWORDX2 157 158 define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) { 159 entry: 160 %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0 161 %1 = insertelement <2 x float> %0, float %b, i32 1 162 store <2 x float> %1, <2 x float> addrspace(1)* %out 163 ret void 164 } 165 166 ; EG-CHECK-LABEL: @store_v4i32 167 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW 168 ; EG-CHECK-NOT: MEM_RAT_CACHELESS STORE_RAW 169 ; CM-CHECK-LABEL: @store_v4i32 170 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD 171 ; CM-CHECK-NOT: MEM_RAT_CACHELESS STORE_DWORD 172 ; SI-CHECK-LABEL: @store_v4i32 173 ; SI-CHECK: BUFFER_STORE_DWORDX4 174 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) { 175 entry: 176 store <4 x i32> %in, <4 x i32> addrspace(1)* %out 177 ret void 178 } 179 180 ; FUNC-LABEL: @store_i64_i8 181 ; EG-CHECK: MEM_RAT MSKOR 182 ; SI-CHECK: BUFFER_STORE_BYTE 183 define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) { 184 entry: 185 %0 = trunc i64 %in to i8 186 store i8 %0, i8 addrspace(1)* %out 187 ret void 188 } 189 190 ; FUNC-LABEL: @store_i64_i16 191 ; EG-CHECK: MEM_RAT MSKOR 192 ; SI-CHECK: BUFFER_STORE_SHORT 193 define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) { 194 entry: 195 %0 = trunc i64 %in to i16 196 store i16 %0, i16 addrspace(1)* %out 197 ret void 198 } 199 200 ;===------------------------------------------------------------------------===; 201 ; Local Address Space 202 ;===------------------------------------------------------------------------===; 203 204 ; FUNC-LABEL: @store_local_i1 205 ; EG-CHECK: LDS_BYTE_WRITE 206 ; SI-CHECK: DS_WRITE_B8 207 define void @store_local_i1(i1 addrspace(3)* %out) { 208 entry: 209 store i1 true, i1 addrspace(3)* %out 210 ret void 211 } 212 213 ; EG-CHECK-LABEL: @store_local_i8 214 ; EG-CHECK: LDS_BYTE_WRITE 215 ; SI-CHECK-LABEL: @store_local_i8 216 ; SI-CHECK: DS_WRITE_B8 217 define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) { 218 store i8 %in, i8 addrspace(3)* %out 219 ret void 220 } 221 222 ; EG-CHECK-LABEL: @store_local_i16 223 ; EG-CHECK: LDS_SHORT_WRITE 224 ; SI-CHECK-LABEL: @store_local_i16 225 ; SI-CHECK: DS_WRITE_B16 226 define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) { 227 store i16 %in, i16 addrspace(3)* %out 228 ret void 229 } 230 231 ; EG-CHECK-LABEL: @store_local_v2i16 232 ; EG-CHECK: LDS_WRITE 233 ; CM-CHECK-LABEL: @store_local_v2i16 234 ; CM-CHECK: LDS_WRITE 235 ; SI-CHECK-LABEL: @store_local_v2i16 236 ; SI-CHECK: DS_WRITE_B16 237 ; SI-CHECK: DS_WRITE_B16 238 define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) { 239 entry: 240 store <2 x i16> %in, <2 x i16> addrspace(3)* %out 241 ret void 242 } 243 244 ; EG-CHECK-LABEL: @store_local_v4i8 245 ; EG-CHECK: LDS_WRITE 246 ; CM-CHECK-LABEL: @store_local_v4i8 247 ; CM-CHECK: LDS_WRITE 248 ; SI-CHECK-LABEL: @store_local_v4i8 249 ; SI-CHECK: DS_WRITE_B8 250 ; SI-CHECK: DS_WRITE_B8 251 ; SI-CHECK: DS_WRITE_B8 252 ; SI-CHECK: DS_WRITE_B8 253 define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) { 254 entry: 255 store <4 x i8> %in, <4 x i8> addrspace(3)* %out 256 ret void 257 } 258 259 ; EG-CHECK-LABEL: @store_local_v2i32 260 ; EG-CHECK: LDS_WRITE 261 ; EG-CHECK: LDS_WRITE 262 ; CM-CHECK-LABEL: @store_local_v2i32 263 ; CM-CHECK: LDS_WRITE 264 ; CM-CHECK: LDS_WRITE 265 ; SI-CHECK-LABEL: @store_local_v2i32 266 ; SI-CHECK: DS_WRITE_B64 267 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) { 268 entry: 269 store <2 x i32> %in, <2 x i32> addrspace(3)* %out 270 ret void 271 } 272 273 ; EG-CHECK-LABEL: @store_local_v4i32 274 ; EG-CHECK: LDS_WRITE 275 ; EG-CHECK: LDS_WRITE 276 ; EG-CHECK: LDS_WRITE 277 ; EG-CHECK: LDS_WRITE 278 ; CM-CHECK-LABEL: @store_local_v4i32 279 ; CM-CHECK: LDS_WRITE 280 ; CM-CHECK: LDS_WRITE 281 ; CM-CHECK: LDS_WRITE 282 ; CM-CHECK: LDS_WRITE 283 ; SI-CHECK-LABEL: @store_local_v4i32 284 ; SI-CHECK: DS_WRITE_B32 285 ; SI-CHECK: DS_WRITE_B32 286 ; SI-CHECK: DS_WRITE_B32 287 ; SI-CHECK: DS_WRITE_B32 288 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) { 289 entry: 290 store <4 x i32> %in, <4 x i32> addrspace(3)* %out 291 ret void 292 } 293 294 ; FUNC-LABEL: @store_local_i64_i8 295 ; EG-CHECK: LDS_BYTE_WRITE 296 ; SI-CHECK: DS_WRITE_B8 297 define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) { 298 entry: 299 %0 = trunc i64 %in to i8 300 store i8 %0, i8 addrspace(3)* %out 301 ret void 302 } 303 304 ; FUNC-LABEL: @store_local_i64_i16 305 ; EG-CHECK: LDS_SHORT_WRITE 306 ; SI-CHECK: DS_WRITE_B16 307 define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) { 308 entry: 309 %0 = trunc i64 %in to i16 310 store i16 %0, i16 addrspace(3)* %out 311 ret void 312 } 313 314 ; The stores in this function are combined by the optimizer to create a 315 ; 64-bit store with 32-bit alignment. This is legal for SI and the legalizer 316 ; should not try to split the 64-bit store back into 2 32-bit stores. 317 ; 318 ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should 319 ; be two 32-bit stores. 320 321 ; EG-CHECK-LABEL: @vecload2 322 ; EG-CHECK: MEM_RAT_CACHELESS STORE_RAW 323 ; CM-CHECK-LABEL: @vecload2 324 ; CM-CHECK: MEM_RAT_CACHELESS STORE_DWORD 325 ; SI-CHECK-LABEL: @vecload2 326 ; SI-CHECK: BUFFER_STORE_DWORDX2 327 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 { 328 entry: 329 %0 = load i32 addrspace(2)* %mem, align 4 330 %arrayidx1.i = getelementptr inbounds i32 addrspace(2)* %mem, i64 1 331 %1 = load i32 addrspace(2)* %arrayidx1.i, align 4 332 store i32 %0, i32 addrspace(1)* %out, align 4 333 %arrayidx1 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 334 store i32 %1, i32 addrspace(1)* %arrayidx1, align 4 335 ret void 336 } 337 338 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "unsafe-fp-math"="false" "use-soft-float"="false" } 339 340 ; When i128 was a legal type this program generated cannot select errors: 341 342 ; FUNC-LABEL: @i128-const-store 343 ; FIXME: We should be able to to this with one store instruction 344 ; EG-CHECK: STORE_RAW 345 ; EG-CHECK: STORE_RAW 346 ; EG-CHECK: STORE_RAW 347 ; EG-CHECK: STORE_RAW 348 ; CM-CHECK: STORE_DWORD 349 ; CM-CHECK: STORE_DWORD 350 ; CM-CHECK: STORE_DWORD 351 ; CM-CHECK: STORE_DWORD 352 ; SI: BUFFER_STORE_DWORDX2 353 ; SI: BUFFER_STORE_DWORDX2 354 define void @i128-const-store(i32 addrspace(1)* %out) { 355 entry: 356 store i32 1, i32 addrspace(1)* %out, align 4 357 %arrayidx2 = getelementptr inbounds i32 addrspace(1)* %out, i64 1 358 store i32 1, i32 addrspace(1)* %arrayidx2, align 4 359 %arrayidx4 = getelementptr inbounds i32 addrspace(1)* %out, i64 2 360 store i32 2, i32 addrspace(1)* %arrayidx4, align 4 361 %arrayidx6 = getelementptr inbounds i32 addrspace(1)* %out, i64 3 362 store i32 2, i32 addrspace(1)* %arrayidx6, align 4 363 ret void 364 } 365