1 ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s 2 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s 4 5 ; SI-LABEL: {{^}}local_unaligned_load_store_i16: 6 ; SI: ds_read_u8 7 ; SI: ds_read_u8 8 ; SI: ds_write_b8 9 ; SI: ds_write_b8 10 ; SI: s_endpgm 11 define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 { 12 %v = load i16, i16 addrspace(3)* %p, align 1 13 store i16 %v, i16 addrspace(3)* %r, align 1 14 ret void 15 } 16 17 ; SI-LABEL: {{^}}global_unaligned_load_store_i16: 18 ; ALIGNED: buffer_load_ubyte 19 ; ALIGNED: buffer_load_ubyte 20 ; ALIGNED: buffer_store_byte 21 ; ALIGNED: buffer_store_byte 22 23 ; UNALIGNED: buffer_load_ushort 24 ; UNALIGNED: buffer_store_short 25 ; SI: s_endpgm 26 define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 { 27 %v = load i16, i16 addrspace(1)* %p, align 1 28 store i16 %v, i16 addrspace(1)* %r, align 1 29 ret void 30 } 31 32 ; FUNC-LABEL: {{^}}local_unaligned_load_store_i32: 33 34 ; SI: ds_read_u8 35 ; SI: ds_read_u8 36 ; SI: ds_read_u8 37 ; SI: ds_read_u8 38 ; SI-NOT: v_or 39 ; SI-NOT: v_lshl 40 ; SI: ds_write_b8 41 ; SI: ds_write_b8 42 ; SI: ds_write_b8 43 ; SI: ds_write_b8 44 ; SI: s_endpgm 45 define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { 46 %v = load i32, i32 addrspace(3)* %p, align 1 47 store i32 %v, i32 addrspace(3)* %r, align 1 48 ret void 49 } 50 51 ; SI-LABEL: {{^}}global_unaligned_load_store_i32: 52 ; ALIGNED: buffer_load_ubyte 53 ; ALIGNED: buffer_load_ubyte 54 ; ALIGNED: buffer_load_ubyte 55 ; ALIGNED: buffer_load_ubyte 56 ; ALIGNED: buffer_store_byte 57 ; ALIGNED: buffer_store_byte 58 ; ALIGNED: buffer_store_byte 59 ; ALIGNED: buffer_store_byte 60 61 ; UNALIGNED: buffer_load_dword 62 ; UNALIGNED: buffer_store_dword 63 define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { 64 %v = load i32, i32 addrspace(1)* %p, align 1 65 store i32 %v, i32 addrspace(1)* %r, align 1 66 ret void 67 } 68 69 ; SI-LABEL: {{^}}global_align2_load_store_i32: 70 ; ALIGNED: buffer_load_ushort 71 ; ALIGNED: buffer_load_ushort 72 ; ALIGNED: buffer_store_short 73 ; ALIGNED: buffer_store_short 74 75 ; UNALIGNED: buffer_load_dword 76 ; UNALIGNED: buffer_store_dword 77 define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 { 78 %v = load i32, i32 addrspace(1)* %p, align 2 79 store i32 %v, i32 addrspace(1)* %r, align 2 80 ret void 81 } 82 83 ; FUNC-LABEL: {{^}}local_align2_load_store_i32: 84 ; GCN: ds_read_u16 85 ; GCN: ds_read_u16 86 ; GCN: ds_write_b16 87 ; GCN: ds_write_b16 88 define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 { 89 %v = load i32, i32 addrspace(3)* %p, align 2 90 store i32 %v, i32 addrspace(3)* %r, align 2 91 ret void 92 } 93 94 ; FUNC-LABEL: {{^}}local_unaligned_load_store_i64: 95 ; SI: ds_read_u8 96 ; SI: ds_read_u8 97 ; SI: ds_read_u8 98 ; SI: ds_read_u8 99 ; SI: ds_read_u8 100 ; SI: ds_read_u8 101 ; SI: ds_read_u8 102 ; SI: ds_read_u8 103 104 ; SI-NOT: v_or_b32 105 ; SI-NOT: v_lshl 106 ; SI: ds_write_b8 107 ; SI-NOT: v_or_b32 108 ; SI-NOT: v_lshl 109 110 ; SI: ds_write_b8 111 ; SI-NOT: v_or_b32 112 ; SI-NOT: v_lshl 113 114 ; SI: ds_write_b8 115 ; SI-NOT: v_or_b32 116 ; SI-NOT: v_lshl 117 118 ; SI: ds_write_b8 119 ; SI-NOT: v_or_b32 120 ; SI-NOT: v_lshl 121 122 ; SI: ds_write_b8 123 ; SI-NOT: v_or_b32 124 ; SI-NOT: v_lshl 125 126 ; SI: ds_write_b8 127 ; SI-NOT: v_or_b32 128 ; SI-NOT: v_lshl 129 130 ; SI: ds_write_b8 131 ; SI-NOT: v_or_b32 132 ; SI-NOT: v_lshl 133 ; SI: ds_write_b8 134 ; SI: s_endpgm 135 define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 { 136 %v = load i64, i64 addrspace(3)* %p, align 1 137 store i64 %v, i64 addrspace(3)* %r, align 1 138 ret void 139 } 140 141 ; SI-LABEL: {{^}}local_unaligned_load_store_v2i32: 142 ; SI: ds_read_u8 143 ; SI: ds_read_u8 144 ; SI: ds_read_u8 145 ; SI: ds_read_u8 146 ; SI: ds_read_u8 147 ; SI: ds_read_u8 148 ; SI: ds_read_u8 149 ; SI: ds_read_u8 150 151 ; SI-NOT: v_or_b32 152 ; SI-NOT: v_lshl 153 ; SI: ds_write_b8 154 ; SI-NOT: v_or_b32 155 ; SI-NOT: v_lshl 156 157 ; SI: ds_write_b8 158 ; SI-NOT: v_or_b32 159 ; SI-NOT: v_lshl 160 161 ; SI: ds_write_b8 162 ; SI-NOT: v_or_b32 163 ; SI-NOT: v_lshl 164 165 ; SI: ds_write_b8 166 ; SI-NOT: v_or_b32 167 ; SI-NOT: v_lshl 168 169 ; SI: ds_write_b8 170 ; SI-NOT: v_or_b32 171 ; SI-NOT: v_lshl 172 173 ; SI: ds_write_b8 174 ; SI-NOT: v_or_b32 175 ; SI-NOT: v_lshl 176 177 ; SI: ds_write_b8 178 ; SI-NOT: v_or_b32 179 ; SI-NOT: v_lshl 180 ; SI: ds_write_b8 181 ; SI: s_endpgm 182 define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 { 183 %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1 184 store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1 185 ret void 186 } 187 188 ; SI-LABEL: {{^}}global_align2_load_store_i64: 189 ; ALIGNED: buffer_load_ushort 190 ; ALIGNED: buffer_load_ushort 191 192 ; ALIGNED-NOT: v_or_ 193 ; ALIGNED-NOT: v_lshl 194 195 ; ALIGNED: buffer_load_ushort 196 197 ; ALIGNED-NOT: v_or_ 198 ; ALIGNED-NOT: v_lshl 199 200 ; ALIGNED: buffer_load_ushort 201 202 ; ALIGNED-NOT: v_or_ 203 ; ALIGNED-NOT: v_lshl 204 205 ; ALIGNED: buffer_store_short 206 ; ALIGNED: buffer_store_short 207 ; ALIGNED: buffer_store_short 208 ; ALIGNED: buffer_store_short 209 210 ; UNALIGNED: buffer_load_dwordx2 211 ; UNALIGNED: buffer_store_dwordx2 212 define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { 213 %v = load i64, i64 addrspace(1)* %p, align 2 214 store i64 %v, i64 addrspace(1)* %r, align 2 215 ret void 216 } 217 218 ; SI-LABEL: {{^}}unaligned_load_store_i64_global: 219 ; ALIGNED: buffer_load_ubyte 220 ; ALIGNED: buffer_load_ubyte 221 ; ALIGNED: buffer_load_ubyte 222 ; ALIGNED: buffer_load_ubyte 223 ; ALIGNED: buffer_load_ubyte 224 ; ALIGNED: buffer_load_ubyte 225 ; ALIGNED: buffer_load_ubyte 226 ; ALIGNED: buffer_load_ubyte 227 228 ; ALIGNED-NOT: v_or_ 229 ; ALIGNED-NOT: v_lshl 230 231 ; ALIGNED: buffer_store_byte 232 ; ALIGNED: buffer_store_byte 233 ; ALIGNED: buffer_store_byte 234 ; ALIGNED: buffer_store_byte 235 ; ALIGNED: buffer_store_byte 236 ; ALIGNED: buffer_store_byte 237 ; ALIGNED: buffer_store_byte 238 ; ALIGNED: buffer_store_byte 239 240 ; UNALIGNED: buffer_load_dwordx2 241 ; UNALIGNED: buffer_store_dwordx2 242 define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 { 243 %v = load i64, i64 addrspace(1)* %p, align 1 244 store i64 %v, i64 addrspace(1)* %r, align 1 245 ret void 246 } 247 248 ; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32: 249 ; GCN: ds_read_u8 250 ; GCN: ds_read_u8 251 ; GCN: ds_read_u8 252 ; GCN: ds_read_u8 253 254 ; GCN: ds_read_u8 255 ; GCN: ds_read_u8 256 ; GCN: ds_read_u8 257 ; GCN: ds_read_u8 258 259 ; GCN: ds_read_u8 260 ; GCN: ds_read_u8 261 ; GCN: ds_read_u8 262 ; GCN: ds_read_u8 263 264 ; GCN: ds_read_u8 265 ; GCN: ds_read_u8 266 ; GCN: ds_read_u8 267 ; GCN: ds_read_u8 268 269 ; GCN: ds_write_b8 270 ; GCN: ds_write_b8 271 ; GCN: ds_write_b8 272 ; GCN: ds_write_b8 273 274 ; GCN: ds_write_b8 275 ; GCN: ds_write_b8 276 ; GCN: ds_write_b8 277 ; GCN: ds_write_b8 278 279 ; GCN: ds_write_b8 280 ; GCN: ds_write_b8 281 ; GCN: ds_write_b8 282 ; GCN: ds_write_b8 283 284 ; GCN: ds_write_b8 285 ; GCN: ds_write_b8 286 ; GCN: ds_write_b8 287 ; GCN: ds_write_b8 288 ; GCN: s_endpgm 289 define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 { 290 %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1 291 store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1 292 ret void 293 } 294 295 ; SI-LABEL: {{^}}global_unaligned_load_store_v4i32 296 ; ALIGNED: buffer_load_ubyte 297 ; ALIGNED: buffer_load_ubyte 298 ; ALIGNED: buffer_load_ubyte 299 ; ALIGNED: buffer_load_ubyte 300 ; ALIGNED: buffer_load_ubyte 301 ; ALIGNED: buffer_load_ubyte 302 ; ALIGNED: buffer_load_ubyte 303 ; ALIGNED: buffer_load_ubyte 304 ; ALIGNED: buffer_load_ubyte 305 ; ALIGNED: buffer_load_ubyte 306 ; ALIGNED: buffer_load_ubyte 307 ; ALIGNED: buffer_load_ubyte 308 ; ALIGNED: buffer_load_ubyte 309 ; ALIGNED: buffer_load_ubyte 310 ; ALIGNED: buffer_load_ubyte 311 ; ALIGNED: buffer_load_ubyte 312 313 ; ALIGNED: buffer_store_byte 314 ; ALIGNED: buffer_store_byte 315 ; ALIGNED: buffer_store_byte 316 ; ALIGNED: buffer_store_byte 317 ; ALIGNED: buffer_store_byte 318 ; ALIGNED: buffer_store_byte 319 ; ALIGNED: buffer_store_byte 320 ; ALIGNED: buffer_store_byte 321 ; ALIGNED: buffer_store_byte 322 ; ALIGNED: buffer_store_byte 323 ; ALIGNED: buffer_store_byte 324 ; ALIGNED: buffer_store_byte 325 ; ALIGNED: buffer_store_byte 326 ; ALIGNED: buffer_store_byte 327 ; ALIGNED: buffer_store_byte 328 ; ALIGNED: buffer_store_byte 329 330 ; UNALIGNED: buffer_load_dwordx4 331 ; UNALIGNED: buffer_store_dwordx4 332 define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 { 333 %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1 334 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1 335 ret void 336 } 337 338 ; FUNC-LABEL: {{^}}local_load_i64_align_4: 339 ; GCN: ds_read2_b32 340 define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { 341 %val = load i64, i64 addrspace(3)* %in, align 4 342 store i64 %val, i64 addrspace(1)* %out, align 8 343 ret void 344 } 345 346 ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset 347 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9 348 define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { 349 %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4 350 %val = load i64, i64 addrspace(3)* %ptr, align 4 351 store i64 %val, i64 addrspace(1)* %out, align 8 352 ret void 353 } 354 355 ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset: 356 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits 357 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1 358 ; GCN: s_endpgm 359 define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { 360 %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)* 361 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 362 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* 363 %val = load i64, i64 addrspace(3)* %ptri64, align 4 364 store i64 %val, i64 addrspace(1)* %out, align 8 365 ret void 366 } 367 368 ; FUNC-LABEL: {{^}}local_load_i64_align_1: 369 ; GCN: ds_read_u8 370 ; GCN: ds_read_u8 371 ; GCN: ds_read_u8 372 ; GCN: ds_read_u8 373 ; GCN: ds_read_u8 374 ; GCN: ds_read_u8 375 ; GCN: ds_read_u8 376 ; GCN: ds_read_u8 377 ; GCN: store_dwordx2 378 define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 { 379 %val = load i64, i64 addrspace(3)* %in, align 1 380 store i64 %val, i64 addrspace(1)* %out, align 8 381 ret void 382 } 383 384 ; FUNC-LABEL: {{^}}local_store_i64_align_4: 385 ; GCN: ds_write2_b32 386 define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 { 387 store i64 %val, i64 addrspace(3)* %out, align 4 388 ret void 389 } 390 391 ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset 392 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9 393 ; GCN: s_endpgm 394 define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 { 395 %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4 396 store i64 0, i64 addrspace(3)* %ptr, align 4 397 ret void 398 } 399 400 ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset: 401 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits 402 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1 403 ; GCN: s_endpgm 404 define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 { 405 %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)* 406 %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255 407 %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)* 408 store i64 0, i64 addrspace(3)* %out, align 4 409 ret void 410 } 411 412 ; SI-LABEL: {{^}}constant_unaligned_load_i32: 413 ; ALIGNED: buffer_load_ubyte 414 ; ALIGNED: buffer_load_ubyte 415 ; ALIGNED: buffer_load_ubyte 416 ; ALIGNED: buffer_load_ubyte 417 418 ; UNALIGNED: s_load_dword 419 420 ; SI: buffer_store_dword 421 define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { 422 %v = load i32, i32 addrspace(2)* %p, align 1 423 store i32 %v, i32 addrspace(1)* %r, align 4 424 ret void 425 } 426 427 ; SI-LABEL: {{^}}constant_align2_load_i32: 428 ; ALIGNED: buffer_load_ushort 429 ; ALIGNED: buffer_load_ushort 430 431 ; UNALIGNED: s_load_dword 432 ; UNALIGNED: buffer_store_dword 433 define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { 434 %v = load i32, i32 addrspace(2)* %p, align 2 435 store i32 %v, i32 addrspace(1)* %r, align 4 436 ret void 437 } 438 439 ; SI-LABEL: {{^}}constant_align2_load_i64: 440 ; ALIGNED: buffer_load_ushort 441 ; ALIGNED: buffer_load_ushort 442 ; ALIGNED: buffer_load_ushort 443 ; ALIGNED: buffer_load_ushort 444 445 ; UNALIGNED: s_load_dwordx2 446 ; UNALIGNED: buffer_store_dwordx2 447 define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { 448 %v = load i64, i64 addrspace(2)* %p, align 2 449 store i64 %v, i64 addrspace(1)* %r, align 4 450 ret void 451 } 452 453 ; SI-LABEL: {{^}}constant_align4_load_i64: 454 ; SI: s_load_dwordx2 455 ; SI: buffer_store_dwordx2 456 define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 { 457 %v = load i64, i64 addrspace(2)* %p, align 4 458 store i64 %v, i64 addrspace(1)* %r, align 4 459 ret void 460 } 461 462 ; SI-LABEL: {{^}}constant_align4_load_v4i32: 463 ; SI: s_load_dwordx4 464 ; SI: buffer_store_dwordx4 465 define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { 466 %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4 467 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4 468 ret void 469 } 470 471 ; SI-LABEL: {{^}}constant_unaligned_load_v2i32: 472 ; ALIGNED: buffer_load_ubyte 473 ; ALIGNED: buffer_load_ubyte 474 ; ALIGNED: buffer_load_ubyte 475 ; ALIGNED: buffer_load_ubyte 476 477 ; ALIGNED: buffer_load_ubyte 478 ; ALIGNED: buffer_load_ubyte 479 ; ALIGNED: buffer_load_ubyte 480 ; ALIGNED: buffer_load_ubyte 481 482 ; UNALIGNED: buffer_load_dwordx2 483 484 ; SI: buffer_store_dwordx2 485 define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 { 486 %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1 487 store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4 488 ret void 489 } 490 491 ; SI-LABEL: {{^}}constant_unaligned_load_v4i32: 492 ; ALIGNED: buffer_load_ubyte 493 ; ALIGNED: buffer_load_ubyte 494 ; ALIGNED: buffer_load_ubyte 495 ; ALIGNED: buffer_load_ubyte 496 497 ; ALIGNED: buffer_load_ubyte 498 ; ALIGNED: buffer_load_ubyte 499 ; ALIGNED: buffer_load_ubyte 500 ; ALIGNED: buffer_load_ubyte 501 502 ; ALIGNED: buffer_load_ubyte 503 ; ALIGNED: buffer_load_ubyte 504 ; ALIGNED: buffer_load_ubyte 505 ; ALIGNED: buffer_load_ubyte 506 507 ; ALIGNED: buffer_load_ubyte 508 ; ALIGNED: buffer_load_ubyte 509 ; ALIGNED: buffer_load_ubyte 510 ; ALIGNED: buffer_load_ubyte 511 512 ; UNALIGNED: buffer_load_dwordx4 513 514 ; SI: buffer_store_dwordx4 515 define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 { 516 %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1 517 store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4 518 ret void 519 } 520 521 ; SI-LABEL: {{^}}constant_align4_load_i8: 522 ; SI: buffer_load_ubyte 523 ; SI: buffer_store_byte 524 define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { 525 %v = load i8, i8 addrspace(2)* %p, align 4 526 store i8 %v, i8 addrspace(1)* %r, align 4 527 ret void 528 } 529 530 ; SI-LABEL: {{^}}constant_align2_load_i8: 531 ; SI: buffer_load_ubyte 532 ; SI: buffer_store_byte 533 define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 { 534 %v = load i8, i8 addrspace(2)* %p, align 2 535 store i8 %v, i8 addrspace(1)* %r, align 2 536 ret void 537 } 538 539 ; SI-LABEL: {{^}}constant_align4_merge_load_2_i32: 540 ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 541 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]] 542 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]] 543 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}} 544 define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 { 545 %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1 546 %v0 = load i32, i32 addrspace(2)* %p, align 4 547 %v1 = load i32, i32 addrspace(2)* %gep0, align 4 548 549 %gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1 550 store i32 %v0, i32 addrspace(1)* %r, align 4 551 store i32 %v1, i32 addrspace(1)* %gep1, align 4 552 ret void 553 } 554 555 attributes #0 = { nounwind } 556