1 ; RUN: llc < %s -march=r600 -mcpu=redwood | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s 2 ; RUN: llc < %s -march=r600 -mcpu=cayman | FileCheck --check-prefix=R600-CHECK --check-prefix=FUNC %s 3 ; RUN: llc < %s -march=r600 -mcpu=SI -verify-machineinstrs | FileCheck --check-prefix=SI-CHECK --check-prefix=FUNC %s 4 5 ;===------------------------------------------------------------------------===; 6 ; GLOBAL ADDRESS SPACE 7 ;===------------------------------------------------------------------------===; 8 9 ; Load an i8 value from the global address space. 10 ; FUNC-LABEL: @load_i8 11 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 12 13 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 14 define void @load_i8(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 15 %1 = load i8 addrspace(1)* %in 16 %2 = zext i8 %1 to i32 17 store i32 %2, i32 addrspace(1)* %out 18 ret void 19 } 20 21 ; FUNC-LABEL: @load_i8_sext 22 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 23 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 24 ; R600-CHECK: 24 25 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 26 ; R600-CHECK: 24 27 ; SI-CHECK: BUFFER_LOAD_SBYTE 28 define void @load_i8_sext(i32 addrspace(1)* %out, i8 addrspace(1)* %in) { 29 entry: 30 %0 = load i8 addrspace(1)* %in 31 %1 = sext i8 %0 to i32 32 store i32 %1, i32 addrspace(1)* %out 33 ret void 34 } 35 36 ; FUNC-LABEL: @load_v2i8 37 ; R600-CHECK: VTX_READ_8 38 ; R600-CHECK: VTX_READ_8 39 ; SI-CHECK: BUFFER_LOAD_UBYTE 40 ; SI-CHECK: BUFFER_LOAD_UBYTE 41 define void @load_v2i8(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 42 entry: 43 %0 = load <2 x i8> addrspace(1)* %in 44 %1 = zext <2 x i8> %0 to <2 x i32> 45 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 46 ret void 47 } 48 49 ; FUNC-LABEL: @load_v2i8_sext 50 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 51 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 52 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 53 ; R600-CHECK-DAG: 24 54 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 55 ; R600-CHECK-DAG: 24 56 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 57 ; R600-CHECK-DAG: 24 58 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 59 ; R600-CHECK-DAG: 24 60 ; SI-CHECK: BUFFER_LOAD_SBYTE 61 ; SI-CHECK: BUFFER_LOAD_SBYTE 62 define void @load_v2i8_sext(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) { 63 entry: 64 %0 = load <2 x i8> addrspace(1)* %in 65 %1 = sext <2 x i8> %0 to <2 x i32> 66 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 67 ret void 68 } 69 70 ; FUNC-LABEL: @load_v4i8 71 ; R600-CHECK: VTX_READ_8 72 ; R600-CHECK: VTX_READ_8 73 ; R600-CHECK: VTX_READ_8 74 ; R600-CHECK: VTX_READ_8 75 ; SI-CHECK: BUFFER_LOAD_UBYTE 76 ; SI-CHECK: BUFFER_LOAD_UBYTE 77 ; SI-CHECK: BUFFER_LOAD_UBYTE 78 ; SI-CHECK: BUFFER_LOAD_UBYTE 79 define void @load_v4i8(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 80 entry: 81 %0 = load <4 x i8> addrspace(1)* %in 82 %1 = zext <4 x i8> %0 to <4 x i32> 83 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 84 ret void 85 } 86 87 ; FUNC-LABEL: @load_v4i8_sext 88 ; R600-CHECK-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 89 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 90 ; R600-CHECK-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 91 ; R600-CHECK-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 92 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 93 ; R600-CHECK-DAG: 24 94 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 95 ; R600-CHECK-DAG: 24 96 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 97 ; R600-CHECK-DAG: 24 98 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 99 ; R600-CHECK-DAG: 24 100 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 101 ; R600-CHECK-DAG: 24 102 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 103 ; R600-CHECK-DAG: 24 104 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 105 ; R600-CHECK-DAG: 24 106 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 107 ; R600-CHECK-DAG: 24 108 ; SI-CHECK: BUFFER_LOAD_SBYTE 109 ; SI-CHECK: BUFFER_LOAD_SBYTE 110 ; SI-CHECK: BUFFER_LOAD_SBYTE 111 ; SI-CHECK: BUFFER_LOAD_SBYTE 112 define void @load_v4i8_sext(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) { 113 entry: 114 %0 = load <4 x i8> addrspace(1)* %in 115 %1 = sext <4 x i8> %0 to <4 x i32> 116 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 117 ret void 118 } 119 120 ; Load an i16 value from the global address space. 121 ; FUNC-LABEL: @load_i16 122 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 123 ; SI-CHECK: BUFFER_LOAD_USHORT 124 define void @load_i16(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 125 entry: 126 %0 = load i16 addrspace(1)* %in 127 %1 = zext i16 %0 to i32 128 store i32 %1, i32 addrspace(1)* %out 129 ret void 130 } 131 132 ; FUNC-LABEL: @load_i16_sext 133 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 134 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 135 ; R600-CHECK: 16 136 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 137 ; R600-CHECK: 16 138 ; SI-CHECK: BUFFER_LOAD_SSHORT 139 define void @load_i16_sext(i32 addrspace(1)* %out, i16 addrspace(1)* %in) { 140 entry: 141 %0 = load i16 addrspace(1)* %in 142 %1 = sext i16 %0 to i32 143 store i32 %1, i32 addrspace(1)* %out 144 ret void 145 } 146 147 ; FUNC-LABEL: @load_v2i16 148 ; R600-CHECK: VTX_READ_16 149 ; R600-CHECK: VTX_READ_16 150 ; SI-CHECK: BUFFER_LOAD_USHORT 151 ; SI-CHECK: BUFFER_LOAD_USHORT 152 define void @load_v2i16(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 153 entry: 154 %0 = load <2 x i16> addrspace(1)* %in 155 %1 = zext <2 x i16> %0 to <2 x i32> 156 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 157 ret void 158 } 159 160 ; FUNC-LABEL: @load_v2i16_sext 161 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 162 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 163 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 164 ; R600-CHECK-DAG: 16 165 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 166 ; R600-CHECK-DAG: 16 167 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 168 ; R600-CHECK-DAG: 16 169 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 170 ; R600-CHECK-DAG: 16 171 ; SI-CHECK: BUFFER_LOAD_SSHORT 172 ; SI-CHECK: BUFFER_LOAD_SSHORT 173 define void @load_v2i16_sext(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(1)* %in) { 174 entry: 175 %0 = load <2 x i16> addrspace(1)* %in 176 %1 = sext <2 x i16> %0 to <2 x i32> 177 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 178 ret void 179 } 180 181 ; FUNC-LABEL: @load_v4i16 182 ; R600-CHECK: VTX_READ_16 183 ; R600-CHECK: VTX_READ_16 184 ; R600-CHECK: VTX_READ_16 185 ; R600-CHECK: VTX_READ_16 186 ; SI-CHECK: BUFFER_LOAD_USHORT 187 ; SI-CHECK: BUFFER_LOAD_USHORT 188 ; SI-CHECK: BUFFER_LOAD_USHORT 189 ; SI-CHECK: BUFFER_LOAD_USHORT 190 define void @load_v4i16(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 191 entry: 192 %0 = load <4 x i16> addrspace(1)* %in 193 %1 = zext <4 x i16> %0 to <4 x i32> 194 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 195 ret void 196 } 197 198 ; FUNC-LABEL: @load_v4i16_sext 199 ; R600-CHECK-DAG: VTX_READ_16 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]] 200 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]] 201 ; R600-CHECK-DAG: VTX_READ_16 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]] 202 ; R600-CHECK-DAG: VTX_READ_16 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]] 203 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_X_CHAN:[XYZW]]], [[DST_X]] 204 ; R600-CHECK-DAG: 16 205 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_X_CHAN]] 206 ; R600-CHECK-DAG: 16 207 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Y_CHAN:[XYZW]]], [[DST_Y]] 208 ; R600-CHECK-DAG: 16 209 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Y_CHAN]] 210 ; R600-CHECK-DAG: 16 211 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_Z_CHAN:[XYZW]]], [[DST_Z]] 212 ; R600-CHECK-DAG: 16 213 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_Z_CHAN]] 214 ; R600-CHECK-DAG: 16 215 ; R600-CHECK-DAG: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_W_CHAN:[XYZW]]], [[DST_W]] 216 ; R600-CHECK-DAG: 16 217 ; R600-CHECK-DAG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_W_CHAN]] 218 ; R600-CHECK-DAG: 16 219 ; SI-CHECK: BUFFER_LOAD_SSHORT 220 ; SI-CHECK: BUFFER_LOAD_SSHORT 221 ; SI-CHECK: BUFFER_LOAD_SSHORT 222 ; SI-CHECK: BUFFER_LOAD_SSHORT 223 define void @load_v4i16_sext(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(1)* %in) { 224 entry: 225 %0 = load <4 x i16> addrspace(1)* %in 226 %1 = sext <4 x i16> %0 to <4 x i32> 227 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 228 ret void 229 } 230 231 ; load an i32 value from the global address space. 232 ; FUNC-LABEL: @load_i32 233 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 234 235 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 236 define void @load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) { 237 entry: 238 %0 = load i32 addrspace(1)* %in 239 store i32 %0, i32 addrspace(1)* %out 240 ret void 241 } 242 243 ; load a f32 value from the global address space. 244 ; FUNC-LABEL: @load_f32 245 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 246 247 ; SI-CHECK: BUFFER_LOAD_DWORD v{{[0-9]+}} 248 define void @load_f32(float addrspace(1)* %out, float addrspace(1)* %in) { 249 entry: 250 %0 = load float addrspace(1)* %in 251 store float %0, float addrspace(1)* %out 252 ret void 253 } 254 255 ; load a v2f32 value from the global address space 256 ; FUNC-LABEL: @load_v2f32 257 ; R600-CHECK: VTX_READ_64 258 259 ; SI-CHECK: BUFFER_LOAD_DWORDX2 260 define void @load_v2f32(<2 x float> addrspace(1)* %out, <2 x float> addrspace(1)* %in) { 261 entry: 262 %0 = load <2 x float> addrspace(1)* %in 263 store <2 x float> %0, <2 x float> addrspace(1)* %out 264 ret void 265 } 266 267 ; FUNC-LABEL: @load_i64 268 ; R600-CHECK: MEM_RAT 269 ; R600-CHECK: MEM_RAT 270 271 ; SI-CHECK: BUFFER_LOAD_DWORDX2 272 define void @load_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { 273 entry: 274 %0 = load i64 addrspace(1)* %in 275 store i64 %0, i64 addrspace(1)* %out 276 ret void 277 } 278 279 ; FUNC-LABEL: @load_i64_sext 280 ; R600-CHECK: MEM_RAT 281 ; R600-CHECK: MEM_RAT 282 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal.x 283 ; R600-CHECK: 31 284 ; SI-CHECK: BUFFER_LOAD_DWORD 285 286 define void @load_i64_sext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 287 entry: 288 %0 = load i32 addrspace(1)* %in 289 %1 = sext i32 %0 to i64 290 store i64 %1, i64 addrspace(1)* %out 291 ret void 292 } 293 294 ; FUNC-LABEL: @load_i64_zext 295 ; R600-CHECK: MEM_RAT 296 ; R600-CHECK: MEM_RAT 297 define void @load_i64_zext(i64 addrspace(1)* %out, i32 addrspace(1)* %in) { 298 entry: 299 %0 = load i32 addrspace(1)* %in 300 %1 = zext i32 %0 to i64 301 store i64 %1, i64 addrspace(1)* %out 302 ret void 303 } 304 305 ; FUNC-LABEL: @load_v8i32 306 ; R600-CHECK: VTX_READ_128 307 ; R600-CHECK: VTX_READ_128 308 ; XXX: We should be using DWORDX4 instructions on SI. 309 ; SI-CHECK: BUFFER_LOAD_DWORD 310 ; SI-CHECK: BUFFER_LOAD_DWORD 311 ; SI-CHECK: BUFFER_LOAD_DWORD 312 ; SI-CHECK: BUFFER_LOAD_DWORD 313 ; SI-CHECK: BUFFER_LOAD_DWORD 314 ; SI-CHECK: BUFFER_LOAD_DWORD 315 ; SI-CHECK: BUFFER_LOAD_DWORD 316 ; SI-CHECK: BUFFER_LOAD_DWORD 317 define void @load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) { 318 entry: 319 %0 = load <8 x i32> addrspace(1)* %in 320 store <8 x i32> %0, <8 x i32> addrspace(1)* %out 321 ret void 322 } 323 324 ; FUNC-LABEL: @load_v16i32 325 ; R600-CHECK: VTX_READ_128 326 ; R600-CHECK: VTX_READ_128 327 ; R600-CHECK: VTX_READ_128 328 ; R600-CHECK: VTX_READ_128 329 ; XXX: We should be using DWORDX4 instructions on SI. 330 ; SI-CHECK: BUFFER_LOAD_DWORD 331 ; SI-CHECK: BUFFER_LOAD_DWORD 332 ; SI-CHECK: BUFFER_LOAD_DWORD 333 ; SI-CHECK: BUFFER_LOAD_DWORD 334 ; SI-CHECK: BUFFER_LOAD_DWORD 335 ; SI-CHECK: BUFFER_LOAD_DWORD 336 ; SI-CHECK: BUFFER_LOAD_DWORD 337 ; SI-CHECK: BUFFER_LOAD_DWORD 338 ; SI-CHECK: BUFFER_LOAD_DWORD 339 ; SI-CHECK: BUFFER_LOAD_DWORD 340 ; SI-CHECK: BUFFER_LOAD_DWORD 341 ; SI-CHECK: BUFFER_LOAD_DWORD 342 ; SI-CHECK: BUFFER_LOAD_DWORD 343 ; SI-CHECK: BUFFER_LOAD_DWORD 344 ; SI-CHECK: BUFFER_LOAD_DWORD 345 ; SI-CHECK: BUFFER_LOAD_DWORD 346 define void @load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) { 347 entry: 348 %0 = load <16 x i32> addrspace(1)* %in 349 store <16 x i32> %0, <16 x i32> addrspace(1)* %out 350 ret void 351 } 352 353 ;===------------------------------------------------------------------------===; 354 ; CONSTANT ADDRESS SPACE 355 ;===------------------------------------------------------------------------===; 356 357 ; Load a sign-extended i8 value 358 ; FUNC-LABEL: @load_const_i8_sext 359 ; R600-CHECK: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]] 360 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 361 ; R600-CHECK: 24 362 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 363 ; R600-CHECK: 24 364 ; SI-CHECK: BUFFER_LOAD_SBYTE v{{[0-9]+}}, 365 define void @load_const_i8_sext(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 366 entry: 367 %0 = load i8 addrspace(2)* %in 368 %1 = sext i8 %0 to i32 369 store i32 %1, i32 addrspace(1)* %out 370 ret void 371 } 372 373 ; Load an aligned i8 value 374 ; FUNC-LABEL: @load_const_i8_aligned 375 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 376 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 377 define void @load_const_i8_aligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 378 entry: 379 %0 = load i8 addrspace(2)* %in 380 %1 = zext i8 %0 to i32 381 store i32 %1, i32 addrspace(1)* %out 382 ret void 383 } 384 385 ; Load an un-aligned i8 value 386 ; FUNC-LABEL: @load_const_i8_unaligned 387 ; R600-CHECK: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}} 388 ; SI-CHECK: BUFFER_LOAD_UBYTE v{{[0-9]+}}, 389 define void @load_const_i8_unaligned(i32 addrspace(1)* %out, i8 addrspace(2)* %in) { 390 entry: 391 %0 = getelementptr i8 addrspace(2)* %in, i32 1 392 %1 = load i8 addrspace(2)* %0 393 %2 = zext i8 %1 to i32 394 store i32 %2, i32 addrspace(1)* %out 395 ret void 396 } 397 398 ; Load a sign-extended i16 value 399 ; FUNC-LABEL: @load_const_i16_sext 400 ; R600-CHECK: VTX_READ_16 [[DST:T[0-9]\.[XYZW]]], [[DST]] 401 ; R600-CHECK: LSHL {{[* ]*}}T{{[0-9]}}.[[LSHL_CHAN:[XYZW]]], [[DST]] 402 ; R600-CHECK: 16 403 ; R600-CHECK: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, PV.[[LSHL_CHAN]] 404 ; R600-CHECK: 16 405 ; SI-CHECK: BUFFER_LOAD_SSHORT 406 define void @load_const_i16_sext(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 407 entry: 408 %0 = load i16 addrspace(2)* %in 409 %1 = sext i16 %0 to i32 410 store i32 %1, i32 addrspace(1)* %out 411 ret void 412 } 413 414 ; Load an aligned i16 value 415 ; FUNC-LABEL: @load_const_i16_aligned 416 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 417 ; SI-CHECK: BUFFER_LOAD_USHORT 418 define void @load_const_i16_aligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 419 entry: 420 %0 = load i16 addrspace(2)* %in 421 %1 = zext i16 %0 to i32 422 store i32 %1, i32 addrspace(1)* %out 423 ret void 424 } 425 426 ; Load an un-aligned i16 value 427 ; FUNC-LABEL: @load_const_i16_unaligned 428 ; R600-CHECK: VTX_READ_16 T{{[0-9]+\.X, T[0-9]+\.X}} 429 ; SI-CHECK: BUFFER_LOAD_USHORT 430 define void @load_const_i16_unaligned(i32 addrspace(1)* %out, i16 addrspace(2)* %in) { 431 entry: 432 %0 = getelementptr i16 addrspace(2)* %in, i32 1 433 %1 = load i16 addrspace(2)* %0 434 %2 = zext i16 %1 to i32 435 store i32 %2, i32 addrspace(1)* %out 436 ret void 437 } 438 439 ; Load an i32 value from the constant address space. 440 ; FUNC-LABEL: @load_const_addrspace_i32 441 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 442 443 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 444 define void @load_const_addrspace_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) { 445 entry: 446 %0 = load i32 addrspace(2)* %in 447 store i32 %0, i32 addrspace(1)* %out 448 ret void 449 } 450 451 ; Load a f32 value from the constant address space. 452 ; FUNC-LABEL: @load_const_addrspace_f32 453 ; R600-CHECK: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 454 455 ; SI-CHECK: S_LOAD_DWORD s{{[0-9]+}} 456 define void @load_const_addrspace_f32(float addrspace(1)* %out, float addrspace(2)* %in) { 457 %1 = load float addrspace(2)* %in 458 store float %1, float addrspace(1)* %out 459 ret void 460 } 461 462 ;===------------------------------------------------------------------------===; 463 ; LOCAL ADDRESS SPACE 464 ;===------------------------------------------------------------------------===; 465 466 ; Load an i8 value from the local address space. 467 ; FUNC-LABEL: @load_i8_local 468 ; R600-CHECK: LDS_UBYTE_READ_RET 469 ; SI-CHECK-NOT: S_WQM_B64 470 ; SI-CHECK: S_MOV_B32 m0 471 ; SI-CHECK: DS_READ_U8 472 define void @load_i8_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 473 %1 = load i8 addrspace(3)* %in 474 %2 = zext i8 %1 to i32 475 store i32 %2, i32 addrspace(1)* %out 476 ret void 477 } 478 479 ; FUNC-LABEL: @load_i8_sext_local 480 ; R600-CHECK: LDS_UBYTE_READ_RET 481 ; R600-CHECK: ASHR 482 ; SI-CHECK-NOT: S_WQM_B64 483 ; SI-CHECK: S_MOV_B32 m0 484 ; SI-CHECK: DS_READ_I8 485 define void @load_i8_sext_local(i32 addrspace(1)* %out, i8 addrspace(3)* %in) { 486 entry: 487 %0 = load i8 addrspace(3)* %in 488 %1 = sext i8 %0 to i32 489 store i32 %1, i32 addrspace(1)* %out 490 ret void 491 } 492 493 ; FUNC-LABEL: @load_v2i8_local 494 ; R600-CHECK: LDS_UBYTE_READ_RET 495 ; R600-CHECK: LDS_UBYTE_READ_RET 496 ; SI-CHECK-NOT: S_WQM_B64 497 ; SI-CHECK: S_MOV_B32 m0 498 ; SI-CHECK: DS_READ_U8 499 ; SI-CHECK: DS_READ_U8 500 define void @load_v2i8_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 501 entry: 502 %0 = load <2 x i8> addrspace(3)* %in 503 %1 = zext <2 x i8> %0 to <2 x i32> 504 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 505 ret void 506 } 507 508 ; FUNC-LABEL: @load_v2i8_sext_local 509 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 510 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 511 ; R600-CHECK-DAG: ASHR 512 ; R600-CHECK-DAG: ASHR 513 ; SI-CHECK-NOT: S_WQM_B64 514 ; SI-CHECK: S_MOV_B32 m0 515 ; SI-CHECK: DS_READ_I8 516 ; SI-CHECK: DS_READ_I8 517 define void @load_v2i8_sext_local(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(3)* %in) { 518 entry: 519 %0 = load <2 x i8> addrspace(3)* %in 520 %1 = sext <2 x i8> %0 to <2 x i32> 521 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 522 ret void 523 } 524 525 ; FUNC-LABEL: @load_v4i8_local 526 ; R600-CHECK: LDS_UBYTE_READ_RET 527 ; R600-CHECK: LDS_UBYTE_READ_RET 528 ; R600-CHECK: LDS_UBYTE_READ_RET 529 ; R600-CHECK: LDS_UBYTE_READ_RET 530 ; SI-CHECK-NOT: S_WQM_B64 531 ; SI-CHECK: S_MOV_B32 m0 532 ; SI-CHECK: DS_READ_U8 533 ; SI-CHECK: DS_READ_U8 534 ; SI-CHECK: DS_READ_U8 535 ; SI-CHECK: DS_READ_U8 536 define void @load_v4i8_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 537 entry: 538 %0 = load <4 x i8> addrspace(3)* %in 539 %1 = zext <4 x i8> %0 to <4 x i32> 540 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 541 ret void 542 } 543 544 ; FUNC-LABEL: @load_v4i8_sext_local 545 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 546 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 547 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 548 ; R600-CHECK-DAG: LDS_UBYTE_READ_RET 549 ; R600-CHECK-DAG: ASHR 550 ; R600-CHECK-DAG: ASHR 551 ; R600-CHECK-DAG: ASHR 552 ; R600-CHECK-DAG: ASHR 553 ; SI-CHECK-NOT: S_WQM_B64 554 ; SI-CHECK: S_MOV_B32 m0 555 ; SI-CHECK: DS_READ_I8 556 ; SI-CHECK: DS_READ_I8 557 ; SI-CHECK: DS_READ_I8 558 ; SI-CHECK: DS_READ_I8 559 define void @load_v4i8_sext_local(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(3)* %in) { 560 entry: 561 %0 = load <4 x i8> addrspace(3)* %in 562 %1 = sext <4 x i8> %0 to <4 x i32> 563 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 564 ret void 565 } 566 567 ; Load an i16 value from the local address space. 568 ; FUNC-LABEL: @load_i16_local 569 ; R600-CHECK: LDS_USHORT_READ_RET 570 ; SI-CHECK-NOT: S_WQM_B64 571 ; SI-CHECK: S_MOV_B32 m0 572 ; SI-CHECK: DS_READ_U16 573 define void @load_i16_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 574 entry: 575 %0 = load i16 addrspace(3)* %in 576 %1 = zext i16 %0 to i32 577 store i32 %1, i32 addrspace(1)* %out 578 ret void 579 } 580 581 ; FUNC-LABEL: @load_i16_sext_local 582 ; R600-CHECK: LDS_USHORT_READ_RET 583 ; R600-CHECK: ASHR 584 ; SI-CHECK-NOT: S_WQM_B64 585 ; SI-CHECK: S_MOV_B32 m0 586 ; SI-CHECK: DS_READ_I16 587 define void @load_i16_sext_local(i32 addrspace(1)* %out, i16 addrspace(3)* %in) { 588 entry: 589 %0 = load i16 addrspace(3)* %in 590 %1 = sext i16 %0 to i32 591 store i32 %1, i32 addrspace(1)* %out 592 ret void 593 } 594 595 ; FUNC-LABEL: @load_v2i16_local 596 ; R600-CHECK: LDS_USHORT_READ_RET 597 ; R600-CHECK: LDS_USHORT_READ_RET 598 ; SI-CHECK-NOT: S_WQM_B64 599 ; SI-CHECK: S_MOV_B32 m0 600 ; SI-CHECK: DS_READ_U16 601 ; SI-CHECK: DS_READ_U16 602 define void @load_v2i16_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 603 entry: 604 %0 = load <2 x i16> addrspace(3)* %in 605 %1 = zext <2 x i16> %0 to <2 x i32> 606 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 607 ret void 608 } 609 610 ; FUNC-LABEL: @load_v2i16_sext_local 611 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 612 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 613 ; R600-CHECK-DAG: ASHR 614 ; R600-CHECK-DAG: ASHR 615 ; SI-CHECK-NOT: S_WQM_B64 616 ; SI-CHECK: S_MOV_B32 m0 617 ; SI-CHECK: DS_READ_I16 618 ; SI-CHECK: DS_READ_I16 619 define void @load_v2i16_sext_local(<2 x i32> addrspace(1)* %out, <2 x i16> addrspace(3)* %in) { 620 entry: 621 %0 = load <2 x i16> addrspace(3)* %in 622 %1 = sext <2 x i16> %0 to <2 x i32> 623 store <2 x i32> %1, <2 x i32> addrspace(1)* %out 624 ret void 625 } 626 627 ; FUNC-LABEL: @load_v4i16_local 628 ; R600-CHECK: LDS_USHORT_READ_RET 629 ; R600-CHECK: LDS_USHORT_READ_RET 630 ; R600-CHECK: LDS_USHORT_READ_RET 631 ; R600-CHECK: LDS_USHORT_READ_RET 632 ; SI-CHECK-NOT: S_WQM_B64 633 ; SI-CHECK: S_MOV_B32 m0 634 ; SI-CHECK: DS_READ_U16 635 ; SI-CHECK: DS_READ_U16 636 ; SI-CHECK: DS_READ_U16 637 ; SI-CHECK: DS_READ_U16 638 define void @load_v4i16_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 639 entry: 640 %0 = load <4 x i16> addrspace(3)* %in 641 %1 = zext <4 x i16> %0 to <4 x i32> 642 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 643 ret void 644 } 645 646 ; FUNC-LABEL: @load_v4i16_sext_local 647 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 648 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 649 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 650 ; R600-CHECK-DAG: LDS_USHORT_READ_RET 651 ; R600-CHECK-DAG: ASHR 652 ; R600-CHECK-DAG: ASHR 653 ; R600-CHECK-DAG: ASHR 654 ; R600-CHECK-DAG: ASHR 655 ; SI-CHECK-NOT: S_WQM_B64 656 ; SI-CHECK: S_MOV_B32 m0 657 ; SI-CHECK: DS_READ_I16 658 ; SI-CHECK: DS_READ_I16 659 ; SI-CHECK: DS_READ_I16 660 ; SI-CHECK: DS_READ_I16 661 define void @load_v4i16_sext_local(<4 x i32> addrspace(1)* %out, <4 x i16> addrspace(3)* %in) { 662 entry: 663 %0 = load <4 x i16> addrspace(3)* %in 664 %1 = sext <4 x i16> %0 to <4 x i32> 665 store <4 x i32> %1, <4 x i32> addrspace(1)* %out 666 ret void 667 } 668 669 ; load an i32 value from the local address space. 670 ; FUNC-LABEL: @load_i32_local 671 ; R600-CHECK: LDS_READ_RET 672 ; SI-CHECK-NOT: S_WQM_B64 673 ; SI-CHECK: S_MOV_B32 m0 674 ; SI-CHECK: DS_READ_B32 675 define void @load_i32_local(i32 addrspace(1)* %out, i32 addrspace(3)* %in) { 676 entry: 677 %0 = load i32 addrspace(3)* %in 678 store i32 %0, i32 addrspace(1)* %out 679 ret void 680 } 681 682 ; load a f32 value from the local address space. 683 ; FUNC-LABEL: @load_f32_local 684 ; R600-CHECK: LDS_READ_RET 685 ; SI-CHECK: S_MOV_B32 m0 686 ; SI-CHECK: DS_READ_B32 687 define void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) { 688 entry: 689 %0 = load float addrspace(3)* %in 690 store float %0, float addrspace(1)* %out 691 ret void 692 } 693 694 ; load a v2f32 value from the local address space 695 ; FUNC-LABEL: @load_v2f32_local 696 ; R600-CHECK: LDS_READ_RET 697 ; R600-CHECK: LDS_READ_RET 698 ; SI-CHECK: S_MOV_B32 m0 699 ; SI-CHECK: DS_READ_B64 700 define void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) { 701 entry: 702 %0 = load <2 x float> addrspace(3)* %in 703 store <2 x float> %0, <2 x float> addrspace(1)* %out 704 ret void 705 } 706