1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 5 6 ;===------------------------------------------------------------------------===; 7 ; GLOBAL ADDRESS SPACE 8 ;===------------------------------------------------------------------------===; 9 10 ; Load an i8 value from the global address space. 11 ; FUNC-LABEL: {{^}}load_i8: 12 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 13 14 ; SI: buffer_load_ubyte v{{[0-9]+}}, 15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %1 = load i8, i8 addrspace(1)* %in 17 %2 = zext i8 %1 to i32 18 store i32 %2, i32 addrspace(1)* %out 19 ret void 20 } 21 22 ; FUNC-LABEL: {{^}}load_i8_sext: 23 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 24 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 25 ; R600: 8 26 ; SI: buffer_load_sbyte 27 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 28 entry: 29 %0 = load i8, i8 addrspace(1)* %in 30 %1 = sext i8 %0 to i32 31 store i32 %1, i32 addrspace(1)* %out 32 ret void 33 } 34 35 ; FUNC-LABEL: {{^}}load_v2i8: 36 ; R600: VTX_READ_8 37 ; R600: VTX_READ_8 38 ; SI: buffer_load_ubyte 39 ; SI: buffer_load_ubyte 40 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 41 entry: 42 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 43 %1 = zext <2 x i8> %0 to <2 x i32> 44 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 45 ret void 46 } 47 48 ; FUNC-LABEL: {{^}}load_v2i8_sext: 49 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 50 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 51 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 52 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 53 ; R600-DAG: 8 54 ; R600-DAG: 8 55 56 ; SI: buffer_load_sbyte 57 ; SI: buffer_load_sbyte 58 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 59 entry: 60 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 61 %1 = sext <2 x i8> %0 to <2 x i32> 62 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 63 ret void 64 } 65 66 ; FUNC-LABEL: {{^}}load_v4i8: 67 ; R600: VTX_READ_8 68 ; R600: VTX_READ_8 69 ; R600: VTX_READ_8 70 ; R600: VTX_READ_8 71 ; SI: buffer_load_ubyte 72 ; SI: buffer_load_ubyte 73 ; SI: buffer_load_ubyte 74 ; SI: buffer_load_ubyte 75 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 76 entry: 77 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 78 %1 = zext <4 x i8> %0 to <4 x i32> 79 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 80 ret void 81 } 82 83 ; FUNC-LABEL: {{^}}load_v4i8_sext: 84 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 85 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 86 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 87 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 88 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 89 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 90 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal 91 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal 92 ; R600-DAG: 8 93 ; R600-DAG: 8 94 ; R600-DAG: 8 95 ; R600-DAG: 8 96 ; SI: buffer_load_sbyte 97 ; SI: buffer_load_sbyte 98 ; SI: buffer_load_sbyte 99 ; SI: buffer_load_sbyte 100 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 101 entry: 102 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 103 %1 = sext <4 x i8> %0 to <4 x i32> 104 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 105 ret void 106 } 107 108 ; Load an i16 value from the global address space. 109 ; FUNC-LABEL: {{^}}load_i16: 110 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 111 ; SI: buffer_load_ushort 112 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 113 entry: 114 %0 = load i16 , i16 addrspace(1)* %in 115 %1 = zext i16 %0 to i32 116 store i32 %1, i32 addrspace(1)* %out 117 ret void 118 } 119 120 ; FUNC-LABEL: {{^}}load_i16_sext: 121 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 122 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 123 ; R600: 16 124 ; SI: buffer_load_sshort 125 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 126 entry: 127 %0 = load i16, i16 addrspace(1)* %in 128 %1 = sext i16 %0 to i32 129 store i32 %1, i32 addrspace(1)* %out 130 ret void 131 } 132 133 ; FUNC-LABEL: {{^}}load_v2i16: 134 ; R600: VTX_READ_16 135 ; R600: VTX_READ_16 136 ; SI: buffer_load_ushort 137 ; SI: buffer_load_ushort 138 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 139 entry: 140 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 141 %1 = zext <2 x i16> %0 to <2 x i32> 142 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 143 ret void 144 } 145 146 ; FUNC-LABEL: {{^}}load_v2i16_sext: 147 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 148 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 149 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 150 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 151 ; R600-DAG: 16 152 ; R600-DAG: 16 153 ; SI: buffer_load_sshort 154 ; SI: buffer_load_sshort 155 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 156 entry: 157 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 158 %1 = sext <2 x i16> %0 to <2 x i32> 159 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 160 ret void 161 } 162 163 ; FUNC-LABEL: {{^}}load_v4i16: 164 ; R600: VTX_READ_16 165 ; R600: VTX_READ_16 166 ; R600: VTX_READ_16 167 ; R600: VTX_READ_16 168 ; SI: buffer_load_ushort 169 ; SI: buffer_load_ushort 170 ; SI: buffer_load_ushort 171 ; SI: buffer_load_ushort 172 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 173 entry: 174 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 175 %1 = zext <4 x i16> %0 to <4 x i32> 176 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 177 ret void 178 } 179 180 ; FUNC-LABEL: {{^}}load_v4i16_sext: 181 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 182 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 183 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 184 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 185 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal 186 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal 187 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal 188 ; R600-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal 189 ; R600-DAG: 16 190 ; R600-DAG: 16 191 ; R600-DAG: 16 192 ; R600-DAG: 16 193 ; SI: buffer_load_sshort 194 ; SI: buffer_load_sshort 195 ; SI: buffer_load_sshort 196 ; SI: buffer_load_sshort 197 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 198 entry: 199 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 200 %1 = sext <4 x i16> %0 to <4 x i32> 201 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 202 ret void 203 } 204 205 ; load an i32 value from the global address space. 206 ; FUNC-LABEL: {{^}}load_i32: 207 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 208 209 ; SI: buffer_load_dword v{{[0-9]+}} 210 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 211 entry: 212 %0 = load i32, i32 addrspace(1)* %in 213 store i32 %0, i32 addrspace(1)* %out 214 ret void 215 } 216 217 ; load a f32 value from the global address space. 218 ; FUNC-LABEL: {{^}}load_f32: 219 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 220 221 ; SI: buffer_load_dword v{{[0-9]+}} 222 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 223 entry: 224 %0 = load float, float addrspace(1)* %in 225 store float %0, float addrspace(1)* %out 226 ret void 227 } 228 229 ; load a v2f32 value from the global address space 230 ; FUNC-LABEL: {{^}}load_v2f32: 231 ; R600: MEM_RAT 232 ; R600: VTX_READ_64 233 ; SI: buffer_load_dwordx2 234 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 235 entry: 236 %0 = load <2 x float>, <2 x float> addrspace(1)* %in 237 store <2 x float> %0, <2 x float> addrspace(1)* %out 238 ret void 239 } 240 241 ; FUNC-LABEL: {{^}}load_i64: 242 ; R600: VTX_READ_64 243 ; SI: buffer_load_dwordx2 244 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 245 entry: 246 %0 = load i64, i64 addrspace(1)* %in 247 store i64 %0, i64 addrspace(1)* %out 248 ret void 249 } 250 251 ; FUNC-LABEL: {{^}}load_i64_sext: 252 ; R600: MEM_RAT 253 ; R600: MEM_RAT 254 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 255 ; R600: 31 256 ; SI: buffer_load_dword 257 258 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 259 entry: 260 %0 = load i32, i32 addrspace(1)* %in 261 %1 = sext i32 %0 to i64 262 store i64 %1, i64 addrspace(1)* %out 263 ret void 264 } 265 266 ; FUNC-LABEL: {{^}}load_i64_zext: 267 ; R600: MEM_RAT 268 ; R600: MEM_RAT 269 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 270 entry: 271 %0 = load i32, i32 addrspace(1)* %in 272 %1 = zext i32 %0 to i64 273 store i64 %1, i64 addrspace(1)* %out 274 ret void 275 } 276 277 ; FUNC-LABEL: {{^}}load_v8i32: 278 ; R600: VTX_READ_128 279 ; R600: VTX_READ_128 280 281 ; SI: buffer_load_dwordx4 282 ; SI: buffer_load_dwordx4 283 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { 284 entry: 285 %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in 286 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 287 ret void 288 } 289 290 ; FUNC-LABEL: {{^}}load_v16i32: 291 ; R600: VTX_READ_128 292 ; R600: VTX_READ_128 293 ; R600: VTX_READ_128 294 ; R600: VTX_READ_128 295 296 ; SI: buffer_load_dwordx4 297 ; SI: buffer_load_dwordx4 298 ; SI: buffer_load_dwordx4 299 ; SI: buffer_load_dwordx4 300 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { 301 entry: 302 %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in 303 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 304 ret void 305 } 306 307 ;===------------------------------------------------------------------------===; 308 ; CONSTANT ADDRESS SPACE 309 ;===------------------------------------------------------------------------===; 310 311 ; Load a sign-extended i8 value 312 ; FUNC-LABEL: {{^}}load_const_i8_sext: 313 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 314 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 315 ; R600: 8 316 ; SI: buffer_load_sbyte v{{[0-9]+}}, 317 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 318 entry: 319 %0 = load i8, i8 addrspace(2)* %in 320 %1 = sext i8 %0 to i32 321 store i32 %1, i32 addrspace(1)* %out 322 ret void 323 } 324 325 ; Load an aligned i8 value 326 ; FUNC-LABEL: {{^}}load_const_i8_aligned: 327 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 328 ; SI: buffer_load_ubyte v{{[0-9]+}}, 329 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 330 entry: 331 %0 = load i8, i8 addrspace(2)* %in 332 %1 = zext i8 %0 to i32 333 store i32 %1, i32 addrspace(1)* %out 334 ret void 335 } 336 337 ; Load an un-aligned i8 value 338 ; FUNC-LABEL: {{^}}load_const_i8_unaligned: 339 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 340 ; SI: buffer_load_ubyte v{{[0-9]+}}, 341 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 342 entry: 343 %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1 344 %1 = load i8, i8 addrspace(2)* %0 345 %2 = zext i8 %1 to i32 346 store i32 %2, i32 addrspace(1)* %out 347 ret void 348 } 349 350 ; Load a sign-extended i16 value 351 ; FUNC-LABEL: {{^}}load_const_i16_sext: 352 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 353 ; R600: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 354 ; R600: 16 355 ; SI: buffer_load_sshort 356 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 357 entry: 358 %0 = load i16, i16 addrspace(2)* %in 359 %1 = sext i16 %0 to i32 360 store i32 %1, i32 addrspace(1)* %out 361 ret void 362 } 363 364 ; Load an aligned i16 value 365 ; FUNC-LABEL: {{^}}load_const_i16_aligned: 366 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 367 ; SI: buffer_load_ushort 368 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 369 entry: 370 %0 = load i16, i16 addrspace(2)* %in 371 %1 = zext i16 %0 to i32 372 store i32 %1, i32 addrspace(1)* %out 373 ret void 374 } 375 376 ; Load an un-aligned i16 value 377 ; FUNC-LABEL: {{^}}load_const_i16_unaligned: 378 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 379 ; SI: buffer_load_ushort 380 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 381 entry: 382 %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 383 %1 = load i16, i16 addrspace(2)* %0 384 %2 = zext i16 %1 to i32 385 store i32 %2, i32 addrspace(1)* %out 386 ret void 387 } 388 389 ; Load an i32 value from the constant address space. 390 ; FUNC-LABEL: {{^}}load_const_addrspace_i32: 391 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 392 393 ; SI: s_load_dword s{{[0-9]+}} 394 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 395 entry: 396 %0 = load i32, i32 addrspace(2)* %in 397 store i32 %0, i32 addrspace(1)* %out 398 ret void 399 } 400 401 ; Load a f32 value from the constant address space. 402 ; FUNC-LABEL: {{^}}load_const_addrspace_f32: 403 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 404 405 ; SI: s_load_dword s{{[0-9]+}} 406 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 407 %1 = load float, float addrspace(2)* %in 408 store float %1, float addrspace(1)* %out 409 ret void 410 } 411 412 ;===------------------------------------------------------------------------===; 413 ; LOCAL ADDRESS SPACE 414 ;===------------------------------------------------------------------------===; 415 416 ; Load an i8 value from the local address space. 417 ; FUNC-LABEL: {{^}}load_i8_local: 418 ; R600: LDS_UBYTE_READ_RET 419 ; SI-NOT: s_wqm_b64 420 ; SI: s_mov_b32 m0 421 ; SI: ds_read_u8 422 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 423 %1 = load i8, i8 addrspace(3)* %in 424 %2 = zext i8 %1 to i32 425 store i32 %2, i32 addrspace(1)* %out 426 ret void 427 } 428 429 ; FUNC-LABEL: {{^}}load_i8_sext_local: 430 ; R600: LDS_UBYTE_READ_RET 431 ; R600: BFE_INT 432 ; SI-NOT: s_wqm_b64 433 ; SI: s_mov_b32 m0 434 ; SI: ds_read_i8 435 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 436 entry: 437 %0 = load i8, i8 addrspace(3)* %in 438 %1 = sext i8 %0 to i32 439 store i32 %1, i32 addrspace(1)* %out 440 ret void 441 } 442 443 ; FUNC-LABEL: {{^}}load_v2i8_local: 444 ; R600: LDS_UBYTE_READ_RET 445 ; R600: LDS_UBYTE_READ_RET 446 ; SI-NOT: s_wqm_b64 447 ; SI: s_mov_b32 m0 448 ; SI: ds_read_u8 449 ; SI: ds_read_u8 450 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 451 entry: 452 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 453 %1 = zext <2 x i8> %0 to <2 x i32> 454 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 455 ret void 456 } 457 458 ; FUNC-LABEL: {{^}}load_v2i8_sext_local: 459 ; R600-DAG: LDS_UBYTE_READ_RET 460 ; R600-DAG: LDS_UBYTE_READ_RET 461 ; R600-DAG: BFE_INT 462 ; R600-DAG: BFE_INT 463 ; SI-NOT: s_wqm_b64 464 ; SI: s_mov_b32 m0 465 ; SI: ds_read_i8 466 ; SI: ds_read_i8 467 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 468 entry: 469 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 470 %1 = sext <2 x i8> %0 to <2 x i32> 471 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 472 ret void 473 } 474 475 ; FUNC-LABEL: {{^}}load_v4i8_local: 476 ; R600: LDS_UBYTE_READ_RET 477 ; R600: LDS_UBYTE_READ_RET 478 ; R600: LDS_UBYTE_READ_RET 479 ; R600: LDS_UBYTE_READ_RET 480 ; SI-NOT: s_wqm_b64 481 ; SI: s_mov_b32 m0 482 ; SI: ds_read_u8 483 ; SI: ds_read_u8 484 ; SI: ds_read_u8 485 ; SI: ds_read_u8 486 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 487 entry: 488 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 489 %1 = zext <4 x i8> %0 to <4 x i32> 490 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 491 ret void 492 } 493 494 ; FUNC-LABEL: {{^}}load_v4i8_sext_local: 495 ; R600-DAG: LDS_UBYTE_READ_RET 496 ; R600-DAG: LDS_UBYTE_READ_RET 497 ; R600-DAG: LDS_UBYTE_READ_RET 498 ; R600-DAG: LDS_UBYTE_READ_RET 499 ; R600-DAG: BFE_INT 500 ; R600-DAG: BFE_INT 501 ; R600-DAG: BFE_INT 502 ; R600-DAG: BFE_INT 503 ; SI-NOT: s_wqm_b64 504 ; SI: s_mov_b32 m0 505 ; SI: ds_read_i8 506 ; SI: ds_read_i8 507 ; SI: ds_read_i8 508 ; SI: ds_read_i8 509 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 510 entry: 511 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 512 %1 = sext <4 x i8> %0 to <4 x i32> 513 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 514 ret void 515 } 516 517 ; Load an i16 value from the local address space. 518 ; FUNC-LABEL: {{^}}load_i16_local: 519 ; R600: LDS_USHORT_READ_RET 520 ; SI-NOT: s_wqm_b64 521 ; SI: s_mov_b32 m0 522 ; SI: ds_read_u16 523 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 524 entry: 525 %0 = load i16 , i16 addrspace(3)* %in 526 %1 = zext i16 %0 to i32 527 store i32 %1, i32 addrspace(1)* %out 528 ret void 529 } 530 531 ; FUNC-LABEL: {{^}}load_i16_sext_local: 532 ; R600: LDS_USHORT_READ_RET 533 ; R600: BFE_INT 534 ; SI-NOT: s_wqm_b64 535 ; SI: s_mov_b32 m0 536 ; SI: ds_read_i16 537 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 538 entry: 539 %0 = load i16, i16 addrspace(3)* %in 540 %1 = sext i16 %0 to i32 541 store i32 %1, i32 addrspace(1)* %out 542 ret void 543 } 544 545 ; FUNC-LABEL: {{^}}load_v2i16_local: 546 ; R600: LDS_USHORT_READ_RET 547 ; R600: LDS_USHORT_READ_RET 548 ; SI-NOT: s_wqm_b64 549 ; SI: s_mov_b32 m0 550 ; SI: ds_read_u16 551 ; SI: ds_read_u16 552 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 553 entry: 554 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 555 %1 = zext <2 x i16> %0 to <2 x i32> 556 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 557 ret void 558 } 559 560 ; FUNC-LABEL: {{^}}load_v2i16_sext_local: 561 ; R600-DAG: LDS_USHORT_READ_RET 562 ; R600-DAG: LDS_USHORT_READ_RET 563 ; R600-DAG: BFE_INT 564 ; R600-DAG: BFE_INT 565 ; SI-NOT: s_wqm_b64 566 ; SI: s_mov_b32 m0 567 ; SI: ds_read_i16 568 ; SI: ds_read_i16 569 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 570 entry: 571 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 572 %1 = sext <2 x i16> %0 to <2 x i32> 573 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 574 ret void 575 } 576 577 ; FUNC-LABEL: {{^}}load_v4i16_local: 578 ; R600: LDS_USHORT_READ_RET 579 ; R600: LDS_USHORT_READ_RET 580 ; R600: LDS_USHORT_READ_RET 581 ; R600: LDS_USHORT_READ_RET 582 ; SI-NOT: s_wqm_b64 583 ; SI: s_mov_b32 m0 584 ; SI: ds_read_u16 585 ; SI: ds_read_u16 586 ; SI: ds_read_u16 587 ; SI: ds_read_u16 588 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 589 entry: 590 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 591 %1 = zext <4 x i16> %0 to <4 x i32> 592 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 593 ret void 594 } 595 596 ; FUNC-LABEL: {{^}}load_v4i16_sext_local: 597 ; R600-DAG: LDS_USHORT_READ_RET 598 ; R600-DAG: LDS_USHORT_READ_RET 599 ; R600-DAG: LDS_USHORT_READ_RET 600 ; R600-DAG: LDS_USHORT_READ_RET 601 ; R600-DAG: BFE_INT 602 ; R600-DAG: BFE_INT 603 ; R600-DAG: BFE_INT 604 ; R600-DAG: BFE_INT 605 ; SI-NOT: s_wqm_b64 606 ; SI: s_mov_b32 m0 607 ; SI: ds_read_i16 608 ; SI: ds_read_i16 609 ; SI: ds_read_i16 610 ; SI: ds_read_i16 611 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 612 entry: 613 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 614 %1 = sext <4 x i16> %0 to <4 x i32> 615 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 616 ret void 617 } 618 619 ; load an i32 value from the local address space. 620 ; FUNC-LABEL: {{^}}load_i32_local: 621 ; R600: LDS_READ_RET 622 ; SI-NOT: s_wqm_b64 623 ; SI: s_mov_b32 m0 624 ; SI: ds_read_b32 625 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 626 entry: 627 %0 = load i32, i32 addrspace(3)* %in 628 store i32 %0, i32 addrspace(1)* %out 629 ret void 630 } 631 632 ; load a f32 value from the local address space. 633 ; FUNC-LABEL: {{^}}load_f32_local: 634 ; R600: LDS_READ_RET 635 ; SI: s_mov_b32 m0 636 ; SI: ds_read_b32 637 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 638 entry: 639 %0 = load float, float addrspace(3)* %in 640 store float %0, float addrspace(1)* %out 641 ret void 642 } 643 644 ; load a v2f32 value from the local address space 645 ; FUNC-LABEL: {{^}}load_v2f32_local: 646 ; R600: LDS_READ_RET 647 ; R600: LDS_READ_RET 648 ; SI: s_mov_b32 m0 649 ; SI: ds_read_b64 650 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 651 entry: 652 %0 = load <2 x float>, <2 x float> addrspace(3)* %in 653 store <2 x float> %0, <2 x float> addrspace(1)* %out 654 ret void 655 } 656 657 ; Test loading a i32 and v2i32 value from the same base pointer. 658 ; FUNC-LABEL: {{^}}load_i32_v2i32_local: 659 ; R600: LDS_READ_RET 660 ; R600: LDS_READ_RET 661 ; R600: LDS_READ_RET 662 ; SI-DAG: ds_read_b32 663 ; SI-DAG: ds_read2_b32 664 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { 665 %scalar = load i32, i32 addrspace(3)* %in 666 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* 667 %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 668 %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 669 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0 670 %vec = add <2 x i32> %vec0, %vec1 671 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out 672 ret void 673 } 674 675 676 @lds = addrspace(3) global [512 x i32] undef, align 4 677 678 ; On SI we need to make sure that the base offset is a register and not 679 ; an immediate. 680 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr: 681 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 682 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4 683 ; R600: LDS_READ_RET 684 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 685 entry: 686 %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 687 %tmp1 = load i32, i32 addrspace(3)* %tmp0 688 %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 689 store i32 %tmp1, i32 addrspace(1)* %tmp2 690 ret void 691 } 692