1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600 --check-prefix=FUNC %s 3 ; RUN: llc < %s -march=amdgcn -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 4 ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI --check-prefix=FUNC %s 5 6 ;===------------------------------------------------------------------------===; 7 ; GLOBAL ADDRESS SPACE 8 ;===------------------------------------------------------------------------===; 9 10 ; Load an i8 value from the global address space. 11 ; FUNC-LABEL: {{^}}load_i8: 12 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 13 14 ; SI: buffer_load_ubyte v{{[0-9]+}}, 15 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 16 %1 = load i8, i8 addrspace(1)* %in 17 %2 = zext i8 %1 to i32 18 store i32 %2, i32 addrspace(1)* %out 19 ret void 20 } 21 22 ; FUNC-LABEL: {{^}}load_i8_sext: 23 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 24 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 25 ; R600: 24 26 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 27 ; R600: 24 28 ; SI: buffer_load_sbyte 29 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 30 entry: 31 %0 = load i8, i8 addrspace(1)* %in 32 %1 = sext i8 %0 to i32 33 store i32 %1, i32 addrspace(1)* %out 34 ret void 35 } 36 37 ; FUNC-LABEL: {{^}}load_v2i8: 38 ; R600: VTX_READ_8 39 ; R600: VTX_READ_8 40 ; SI: buffer_load_ubyte 41 ; SI: buffer_load_ubyte 42 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 43 entry: 44 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 45 %1 = zext <2 x i8> %0 to <2 x i32> 46 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 47 ret void 48 } 49 50 ; FUNC-LABEL: {{^}}load_v2i8_sext: 51 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 52 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 53 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 54 ; R600-DAG: 24 55 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 56 ; R600-DAG: 24 57 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 58 ; R600-DAG: 24 59 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 60 ; R600-DAG: 24 61 ; SI: buffer_load_sbyte 62 ; SI: buffer_load_sbyte 63 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 64 entry: 65 %0 = load <2 x i8>, <2 x i8> addrspace(1)* %in 66 %1 = sext <2 x i8> %0 to <2 x i32> 67 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 68 ret void 69 } 70 71 ; FUNC-LABEL: {{^}}load_v4i8: 72 ; R600: VTX_READ_8 73 ; R600: VTX_READ_8 74 ; R600: VTX_READ_8 75 ; R600: VTX_READ_8 76 ; SI: buffer_load_ubyte 77 ; SI: buffer_load_ubyte 78 ; SI: buffer_load_ubyte 79 ; SI: buffer_load_ubyte 80 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 81 entry: 82 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 83 %1 = zext <4 x i8> %0 to <4 x i32> 84 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 85 ret void 86 } 87 88 ; FUNC-LABEL: {{^}}load_v4i8_sext: 89 ; R600-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 90 ; R600-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 91 ; R600-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 92 ; R600-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 93 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 94 ; R600-DAG: 24 95 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 96 ; R600-DAG: 24 97 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 98 ; R600-DAG: 24 99 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 100 ; R600-DAG: 24 101 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 102 ; R600-DAG: 24 103 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 104 ; R600-DAG: 24 105 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 106 ; R600-DAG: 24 107 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 108 ; R600-DAG: 24 109 ; SI: buffer_load_sbyte 110 ; SI: buffer_load_sbyte 111 ; SI: buffer_load_sbyte 112 ; SI: buffer_load_sbyte 113 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 114 entry: 115 %0 = load <4 x i8>, <4 x i8> addrspace(1)* %in 116 %1 = sext <4 x i8> %0 to <4 x i32> 117 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 118 ret void 119 } 120 121 ; Load an i16 value from the global address space. 122 ; FUNC-LABEL: {{^}}load_i16: 123 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 124 ; SI: buffer_load_ushort 125 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 126 entry: 127 %0 = load i16 , i16 addrspace(1)* %in 128 %1 = zext i16 %0 to i32 129 store i32 %1, i32 addrspace(1)* %out 130 ret void 131 } 132 133 ; FUNC-LABEL: {{^}}load_i16_sext: 134 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 135 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 136 ; R600: 16 137 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 138 ; R600: 16 139 ; SI: buffer_load_sshort 140 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 141 entry: 142 %0 = load i16, i16 addrspace(1)* %in 143 %1 = sext i16 %0 to i32 144 store i32 %1, i32 addrspace(1)* %out 145 ret void 146 } 147 148 ; FUNC-LABEL: {{^}}load_v2i16: 149 ; R600: VTX_READ_16 150 ; R600: VTX_READ_16 151 ; SI: buffer_load_ushort 152 ; SI: buffer_load_ushort 153 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 154 entry: 155 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 156 %1 = zext <2 x i16> %0 to <2 x i32> 157 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 158 ret void 159 } 160 161 ; FUNC-LABEL: {{^}}load_v2i16_sext: 162 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 163 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 164 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 165 ; R600-DAG: 16 166 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 167 ; R600-DAG: 16 168 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 169 ; R600-DAG: 16 170 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 171 ; R600-DAG: 16 172 ; SI: buffer_load_sshort 173 ; SI: buffer_load_sshort 174 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 175 entry: 176 %0 = load <2 x i16>, <2 x i16> addrspace(1)* %in 177 %1 = sext <2 x i16> %0 to <2 x i32> 178 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 179 ret void 180 } 181 182 ; FUNC-LABEL: {{^}}load_v4i16: 183 ; R600: VTX_READ_16 184 ; R600: VTX_READ_16 185 ; R600: VTX_READ_16 186 ; R600: VTX_READ_16 187 ; SI: buffer_load_ushort 188 ; SI: buffer_load_ushort 189 ; SI: buffer_load_ushort 190 ; SI: buffer_load_ushort 191 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 192 entry: 193 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 194 %1 = zext <4 x i16> %0 to <4 x i32> 195 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 196 ret void 197 } 198 199 ; FUNC-LABEL: {{^}}load_v4i16_sext: 200 ; R600-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 201 ; R600-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 202 ; R600-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 203 ; R600-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 204 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 205 ; R600-DAG: 16 206 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 207 ; R600-DAG: 16 208 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 209 ; R600-DAG: 16 210 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 211 ; R600-DAG: 16 212 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 213 ; R600-DAG: 16 214 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 215 ; R600-DAG: 16 216 ; R600-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 217 ; R600-DAG: 16 218 ; R600-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 219 ; R600-DAG: 16 220 ; SI: buffer_load_sshort 221 ; SI: buffer_load_sshort 222 ; SI: buffer_load_sshort 223 ; SI: buffer_load_sshort 224 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 225 entry: 226 %0 = load <4 x i16>, <4 x i16> addrspace(1)* %in 227 %1 = sext <4 x i16> %0 to <4 x i32> 228 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 229 ret void 230 } 231 232 ; load an i32 value from the global address space. 233 ; FUNC-LABEL: {{^}}load_i32: 234 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 235 236 ; SI: buffer_load_dword v{{[0-9]+}} 237 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 238 entry: 239 %0 = load i32, i32 addrspace(1)* %in 240 store i32 %0, i32 addrspace(1)* %out 241 ret void 242 } 243 244 ; load a f32 value from the global address space. 245 ; FUNC-LABEL: {{^}}load_f32: 246 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 247 248 ; SI: buffer_load_dword v{{[0-9]+}} 249 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 250 entry: 251 %0 = load float, float addrspace(1)* %in 252 store float %0, float addrspace(1)* %out 253 ret void 254 } 255 256 ; load a v2f32 value from the global address space 257 ; FUNC-LABEL: {{^}}load_v2f32: 258 ; R600: MEM_RAT 259 ; R600: VTX_READ_64 260 ; SI: buffer_load_dwordx2 261 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 262 entry: 263 %0 = load <2 x float>, <2 x float> addrspace(1)* %in 264 store <2 x float> %0, <2 x float> addrspace(1)* %out 265 ret void 266 } 267 268 ; FUNC-LABEL: {{^}}load_i64: 269 ; R600: VTX_READ_64 270 ; SI: buffer_load_dwordx2 271 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 272 entry: 273 %0 = load i64, i64 addrspace(1)* %in 274 store i64 %0, i64 addrspace(1)* %out 275 ret void 276 } 277 278 ; FUNC-LABEL: {{^}}load_i64_sext: 279 ; R600: MEM_RAT 280 ; R600: MEM_RAT 281 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 282 ; R600: 31 283 ; SI: buffer_load_dword 284 285 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 286 entry: 287 %0 = load i32, i32 addrspace(1)* %in 288 %1 = sext i32 %0 to i64 289 store i64 %1, i64 addrspace(1)* %out 290 ret void 291 } 292 293 ; FUNC-LABEL: {{^}}load_i64_zext: 294 ; R600: MEM_RAT 295 ; R600: MEM_RAT 296 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 297 entry: 298 %0 = load i32, i32 addrspace(1)* %in 299 %1 = zext i32 %0 to i64 300 store i64 %1, i64 addrspace(1)* %out 301 ret void 302 } 303 304 ; FUNC-LABEL: {{^}}load_v8i32: 305 ; R600: VTX_READ_128 306 ; R600: VTX_READ_128 307 ; XXX: We should be using DWORDX4 instructions on SI. 308 ; SI: buffer_load_dword 309 ; SI: buffer_load_dword 310 ; SI: buffer_load_dword 311 ; SI: buffer_load_dword 312 ; SI: buffer_load_dword 313 ; SI: buffer_load_dword 314 ; SI: buffer_load_dword 315 ; SI: buffer_load_dword 316 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { 317 entry: 318 %0 = load <8 x i32>, <8 x i32> addrspace(1)* %in 319 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 320 ret void 321 } 322 323 ; FUNC-LABEL: {{^}}load_v16i32: 324 ; R600: VTX_READ_128 325 ; R600: VTX_READ_128 326 ; R600: VTX_READ_128 327 ; R600: VTX_READ_128 328 ; XXX: We should be using DWORDX4 instructions on SI. 329 ; SI: buffer_load_dword 330 ; SI: buffer_load_dword 331 ; SI: buffer_load_dword 332 ; SI: buffer_load_dword 333 ; SI: buffer_load_dword 334 ; SI: buffer_load_dword 335 ; SI: buffer_load_dword 336 ; SI: buffer_load_dword 337 ; SI: buffer_load_dword 338 ; SI: buffer_load_dword 339 ; SI: buffer_load_dword 340 ; SI: buffer_load_dword 341 ; SI: buffer_load_dword 342 ; SI: buffer_load_dword 343 ; SI: buffer_load_dword 344 ; SI: buffer_load_dword 345 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { 346 entry: 347 %0 = load <16 x i32>, <16 x i32> addrspace(1)* %in 348 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 349 ret void 350 } 351 352 ;===------------------------------------------------------------------------===; 353 ; CONSTANT ADDRESS SPACE 354 ;===------------------------------------------------------------------------===; 355 356 ; Load a sign-extended i8 value 357 ; FUNC-LABEL: {{^}}load_const_i8_sext: 358 ; R600: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 359 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 360 ; R600: 24 361 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 362 ; R600: 24 363 ; SI: buffer_load_sbyte v{{[0-9]+}}, 364 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 365 entry: 366 %0 = load i8, i8 addrspace(2)* %in 367 %1 = sext i8 %0 to i32 368 store i32 %1, i32 addrspace(1)* %out 369 ret void 370 } 371 372 ; Load an aligned i8 value 373 ; FUNC-LABEL: {{^}}load_const_i8_aligned: 374 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 375 ; SI: buffer_load_ubyte v{{[0-9]+}}, 376 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 377 entry: 378 %0 = load i8, i8 addrspace(2)* %in 379 %1 = zext i8 %0 to i32 380 store i32 %1, i32 addrspace(1)* %out 381 ret void 382 } 383 384 ; Load an un-aligned i8 value 385 ; FUNC-LABEL: {{^}}load_const_i8_unaligned: 386 ; R600: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 387 ; SI: buffer_load_ubyte v{{[0-9]+}}, 388 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 389 entry: 390 %0 = getelementptr i8, i8 addrspace(2)* %in, i32 1 391 %1 = load i8, i8 addrspace(2)* %0 392 %2 = zext i8 %1 to i32 393 store i32 %2, i32 addrspace(1)* %out 394 ret void 395 } 396 397 ; Load a sign-extended i16 value 398 ; FUNC-LABEL: {{^}}load_const_i16_sext: 399 ; R600: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 400 ; R600: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 401 ; R600: 16 402 ; R600: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 403 ; R600: 16 404 ; SI: buffer_load_sshort 405 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 406 entry: 407 %0 = load i16, i16 addrspace(2)* %in 408 %1 = sext i16 %0 to i32 409 store i32 %1, i32 addrspace(1)* %out 410 ret void 411 } 412 413 ; Load an aligned i16 value 414 ; FUNC-LABEL: {{^}}load_const_i16_aligned: 415 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 416 ; SI: buffer_load_ushort 417 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 418 entry: 419 %0 = load i16, i16 addrspace(2)* %in 420 %1 = zext i16 %0 to i32 421 store i32 %1, i32 addrspace(1)* %out 422 ret void 423 } 424 425 ; Load an un-aligned i16 value 426 ; FUNC-LABEL: {{^}}load_const_i16_unaligned: 427 ; R600: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 428 ; SI: buffer_load_ushort 429 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 430 entry: 431 %0 = getelementptr i16, i16 addrspace(2)* %in, i32 1 432 %1 = load i16, i16 addrspace(2)* %0 433 %2 = zext i16 %1 to i32 434 store i32 %2, i32 addrspace(1)* %out 435 ret void 436 } 437 438 ; Load an i32 value from the constant address space. 439 ; FUNC-LABEL: {{^}}load_const_addrspace_i32: 440 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 441 442 ; SI: s_load_dword s{{[0-9]+}} 443 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 444 entry: 445 %0 = load i32, i32 addrspace(2)* %in 446 store i32 %0, i32 addrspace(1)* %out 447 ret void 448 } 449 450 ; Load a f32 value from the constant address space. 451 ; FUNC-LABEL: {{^}}load_const_addrspace_f32: 452 ; R600: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 453 454 ; SI: s_load_dword s{{[0-9]+}} 455 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 456 %1 = load float, float addrspace(2)* %in 457 store float %1, float addrspace(1)* %out 458 ret void 459 } 460 461 ;===------------------------------------------------------------------------===; 462 ; LOCAL ADDRESS SPACE 463 ;===------------------------------------------------------------------------===; 464 465 ; Load an i8 value from the local address space. 466 ; FUNC-LABEL: {{^}}load_i8_local: 467 ; R600: LDS_UBYTE_READ_RET 468 ; SI-NOT: s_wqm_b64 469 ; SI: s_mov_b32 m0 470 ; SI: ds_read_u8 471 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 472 %1 = load i8, i8 addrspace(3)* %in 473 %2 = zext i8 %1 to i32 474 store i32 %2, i32 addrspace(1)* %out 475 ret void 476 } 477 478 ; FUNC-LABEL: {{^}}load_i8_sext_local: 479 ; R600: LDS_UBYTE_READ_RET 480 ; R600: ASHR 481 ; SI-NOT: s_wqm_b64 482 ; SI: s_mov_b32 m0 483 ; SI: ds_read_i8 484 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 485 entry: 486 %0 = load i8, i8 addrspace(3)* %in 487 %1 = sext i8 %0 to i32 488 store i32 %1, i32 addrspace(1)* %out 489 ret void 490 } 491 492 ; FUNC-LABEL: {{^}}load_v2i8_local: 493 ; R600: LDS_UBYTE_READ_RET 494 ; R600: LDS_UBYTE_READ_RET 495 ; SI-NOT: s_wqm_b64 496 ; SI: s_mov_b32 m0 497 ; SI: ds_read_u8 498 ; SI: ds_read_u8 499 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 500 entry: 501 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 502 %1 = zext <2 x i8> %0 to <2 x i32> 503 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 504 ret void 505 } 506 507 ; FUNC-LABEL: {{^}}load_v2i8_sext_local: 508 ; R600-DAG: LDS_UBYTE_READ_RET 509 ; R600-DAG: LDS_UBYTE_READ_RET 510 ; R600-DAG: ASHR 511 ; R600-DAG: ASHR 512 ; SI-NOT: s_wqm_b64 513 ; SI: s_mov_b32 m0 514 ; SI: ds_read_i8 515 ; SI: ds_read_i8 516 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 517 entry: 518 %0 = load <2 x i8>, <2 x i8> addrspace(3)* %in 519 %1 = sext <2 x i8> %0 to <2 x i32> 520 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 521 ret void 522 } 523 524 ; FUNC-LABEL: {{^}}load_v4i8_local: 525 ; R600: LDS_UBYTE_READ_RET 526 ; R600: LDS_UBYTE_READ_RET 527 ; R600: LDS_UBYTE_READ_RET 528 ; R600: LDS_UBYTE_READ_RET 529 ; SI-NOT: s_wqm_b64 530 ; SI: s_mov_b32 m0 531 ; SI: ds_read_u8 532 ; SI: ds_read_u8 533 ; SI: ds_read_u8 534 ; SI: ds_read_u8 535 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 536 entry: 537 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 538 %1 = zext <4 x i8> %0 to <4 x i32> 539 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 540 ret void 541 } 542 543 ; FUNC-LABEL: {{^}}load_v4i8_sext_local: 544 ; R600-DAG: LDS_UBYTE_READ_RET 545 ; R600-DAG: LDS_UBYTE_READ_RET 546 ; R600-DAG: LDS_UBYTE_READ_RET 547 ; R600-DAG: LDS_UBYTE_READ_RET 548 ; R600-DAG: ASHR 549 ; R600-DAG: ASHR 550 ; R600-DAG: ASHR 551 ; R600-DAG: ASHR 552 ; SI-NOT: s_wqm_b64 553 ; SI: s_mov_b32 m0 554 ; SI: ds_read_i8 555 ; SI: ds_read_i8 556 ; SI: ds_read_i8 557 ; SI: ds_read_i8 558 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 559 entry: 560 %0 = load <4 x i8>, <4 x i8> addrspace(3)* %in 561 %1 = sext <4 x i8> %0 to <4 x i32> 562 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 563 ret void 564 } 565 566 ; Load an i16 value from the local address space. 567 ; FUNC-LABEL: {{^}}load_i16_local: 568 ; R600: LDS_USHORT_READ_RET 569 ; SI-NOT: s_wqm_b64 570 ; SI: s_mov_b32 m0 571 ; SI: ds_read_u16 572 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 573 entry: 574 %0 = load i16 , i16 addrspace(3)* %in 575 %1 = zext i16 %0 to i32 576 store i32 %1, i32 addrspace(1)* %out 577 ret void 578 } 579 580 ; FUNC-LABEL: {{^}}load_i16_sext_local: 581 ; R600: LDS_USHORT_READ_RET 582 ; R600: ASHR 583 ; SI-NOT: s_wqm_b64 584 ; SI: s_mov_b32 m0 585 ; SI: ds_read_i16 586 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 587 entry: 588 %0 = load i16, i16 addrspace(3)* %in 589 %1 = sext i16 %0 to i32 590 store i32 %1, i32 addrspace(1)* %out 591 ret void 592 } 593 594 ; FUNC-LABEL: {{^}}load_v2i16_local: 595 ; R600: LDS_USHORT_READ_RET 596 ; R600: LDS_USHORT_READ_RET 597 ; SI-NOT: s_wqm_b64 598 ; SI: s_mov_b32 m0 599 ; SI: ds_read_u16 600 ; SI: ds_read_u16 601 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 602 entry: 603 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 604 %1 = zext <2 x i16> %0 to <2 x i32> 605 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 606 ret void 607 } 608 609 ; FUNC-LABEL: {{^}}load_v2i16_sext_local: 610 ; R600-DAG: LDS_USHORT_READ_RET 611 ; R600-DAG: LDS_USHORT_READ_RET 612 ; R600-DAG: ASHR 613 ; R600-DAG: ASHR 614 ; SI-NOT: s_wqm_b64 615 ; SI: s_mov_b32 m0 616 ; SI: ds_read_i16 617 ; SI: ds_read_i16 618 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 619 entry: 620 %0 = load <2 x i16>, <2 x i16> addrspace(3)* %in 621 %1 = sext <2 x i16> %0 to <2 x i32> 622 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 623 ret void 624 } 625 626 ; FUNC-LABEL: {{^}}load_v4i16_local: 627 ; R600: LDS_USHORT_READ_RET 628 ; R600: LDS_USHORT_READ_RET 629 ; R600: LDS_USHORT_READ_RET 630 ; R600: LDS_USHORT_READ_RET 631 ; SI-NOT: s_wqm_b64 632 ; SI: s_mov_b32 m0 633 ; SI: ds_read_u16 634 ; SI: ds_read_u16 635 ; SI: ds_read_u16 636 ; SI: ds_read_u16 637 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 638 entry: 639 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 640 %1 = zext <4 x i16> %0 to <4 x i32> 641 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 642 ret void 643 } 644 645 ; FUNC-LABEL: {{^}}load_v4i16_sext_local: 646 ; R600-DAG: LDS_USHORT_READ_RET 647 ; R600-DAG: LDS_USHORT_READ_RET 648 ; R600-DAG: LDS_USHORT_READ_RET 649 ; R600-DAG: LDS_USHORT_READ_RET 650 ; R600-DAG: ASHR 651 ; R600-DAG: ASHR 652 ; R600-DAG: ASHR 653 ; R600-DAG: ASHR 654 ; SI-NOT: s_wqm_b64 655 ; SI: s_mov_b32 m0 656 ; SI: ds_read_i16 657 ; SI: ds_read_i16 658 ; SI: ds_read_i16 659 ; SI: ds_read_i16 660 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 661 entry: 662 %0 = load <4 x i16>, <4 x i16> addrspace(3)* %in 663 %1 = sext <4 x i16> %0 to <4 x i32> 664 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 665 ret void 666 } 667 668 ; load an i32 value from the local address space. 669 ; FUNC-LABEL: {{^}}load_i32_local: 670 ; R600: LDS_READ_RET 671 ; SI-NOT: s_wqm_b64 672 ; SI: s_mov_b32 m0 673 ; SI: ds_read_b32 674 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 675 entry: 676 %0 = load i32, i32 addrspace(3)* %in 677 store i32 %0, i32 addrspace(1)* %out 678 ret void 679 } 680 681 ; load a f32 value from the local address space. 682 ; FUNC-LABEL: {{^}}load_f32_local: 683 ; R600: LDS_READ_RET 684 ; SI: s_mov_b32 m0 685 ; SI: ds_read_b32 686 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 687 entry: 688 %0 = load float, float addrspace(3)* %in 689 store float %0, float addrspace(1)* %out 690 ret void 691 } 692 693 ; load a v2f32 value from the local address space 694 ; FUNC-LABEL: {{^}}load_v2f32_local: 695 ; R600: LDS_READ_RET 696 ; R600: LDS_READ_RET 697 ; SI: s_mov_b32 m0 698 ; SI: ds_read_b64 699 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 700 entry: 701 %0 = load <2 x float>, <2 x float> addrspace(3)* %in 702 store <2 x float> %0, <2 x float> addrspace(1)* %out 703 ret void 704 } 705 706 ; Test loading a i32 and v2i32 value from the same base pointer. 707 ; FUNC-LABEL: {{^}}load_i32_v2i32_local: 708 ; R600: LDS_READ_RET 709 ; R600: LDS_READ_RET 710 ; R600: LDS_READ_RET 711 ; SI-DAG: ds_read_b32 712 ; SI-DAG: ds_read2_b32 713 define void @load_i32_v2i32_local(<2 x i32> addrspace(1)* %out, i32 addrspace(3)* %in) { 714 %scalar = load i32, i32 addrspace(3)* %in 715 %tmp0 = bitcast i32 addrspace(3)* %in to <2 x i32> addrspace(3)* 716 %vec_ptr = getelementptr <2 x i32>, <2 x i32> addrspace(3)* %tmp0, i32 2 717 %vec0 = load <2 x i32>, <2 x i32> addrspace(3)* %vec_ptr, align 4 718 %vec1 = insertelement <2 x i32> <i32 0, i32 0>, i32 %scalar, i32 0 719 %vec = add <2 x i32> %vec0, %vec1 720 store <2 x i32> %vec, <2 x i32> addrspace(1)* %out 721 ret void 722 } 723 724 725 @lds = addrspace(3) global [512 x i32] undef, align 4 726 727 ; On SI we need to make sure that the base offset is a register and not 728 ; an immediate. 729 ; FUNC-LABEL: {{^}}load_i32_local_const_ptr: 730 ; SI: v_mov_b32_e32 v[[ZERO:[0-9]+]], 0 731 ; SI: ds_read_b32 v0, v[[ZERO]] offset:4 732 ; R600: LDS_READ_RET 733 define void @load_i32_local_const_ptr(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 734 entry: 735 %tmp0 = getelementptr [512 x i32], [512 x i32] addrspace(3)* @lds, i32 0, i32 1 736 %tmp1 = load i32, i32 addrspace(3)* %tmp0 737 %tmp2 = getelementptr i32, i32 addrspace(1)* %out, i32 1 738 store i32 %tmp1, i32 addrspace(1)* %tmp2 739 ret void 740 } 741