Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
      2 ; RUN: llc -march=amdgcn -mcpu=bonaire -mattr=+unaligned-buffer-access -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=UNALIGNED %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=ALIGNED %s
      4 
      5 ; SI-LABEL: {{^}}local_unaligned_load_store_i16:
      6 ; SI: ds_read_u8
      7 ; SI: ds_read_u8
      8 ; SI: ds_write_b8
      9 ; SI: ds_write_b8
     10 ; SI: s_endpgm
     11 define void @local_unaligned_load_store_i16(i16 addrspace(3)* %p, i16 addrspace(3)* %r) #0 {
     12   %v = load i16, i16 addrspace(3)* %p, align 1
     13   store i16 %v, i16 addrspace(3)* %r, align 1
     14   ret void
     15 }
     16 
     17 ; SI-LABEL: {{^}}global_unaligned_load_store_i16:
     18 ; ALIGNED: buffer_load_ubyte
     19 ; ALIGNED: buffer_load_ubyte
     20 ; ALIGNED: buffer_store_byte
     21 ; ALIGNED: buffer_store_byte
     22 
     23 ; UNALIGNED: buffer_load_ushort
     24 ; UNALIGNED: buffer_store_short
     25 ; SI: s_endpgm
     26 define void @global_unaligned_load_store_i16(i16 addrspace(1)* %p, i16 addrspace(1)* %r) #0 {
     27   %v = load i16, i16 addrspace(1)* %p, align 1
     28   store i16 %v, i16 addrspace(1)* %r, align 1
     29   ret void
     30 }
     31 
     32 ; FUNC-LABEL: {{^}}local_unaligned_load_store_i32:
     33 
     34 ; SI: ds_read_u8
     35 ; SI: ds_read_u8
     36 ; SI: ds_read_u8
     37 ; SI: ds_read_u8
     38 ; SI-NOT: v_or
     39 ; SI-NOT: v_lshl
     40 ; SI: ds_write_b8
     41 ; SI: ds_write_b8
     42 ; SI: ds_write_b8
     43 ; SI: ds_write_b8
     44 ; SI: s_endpgm
     45 define void @local_unaligned_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
     46   %v = load i32, i32 addrspace(3)* %p, align 1
     47   store i32 %v, i32 addrspace(3)* %r, align 1
     48   ret void
     49 }
     50 
     51 ; SI-LABEL: {{^}}global_unaligned_load_store_i32:
     52 ; ALIGNED: buffer_load_ubyte
     53 ; ALIGNED: buffer_load_ubyte
     54 ; ALIGNED: buffer_load_ubyte
     55 ; ALIGNED: buffer_load_ubyte
     56 ; ALIGNED: buffer_store_byte
     57 ; ALIGNED: buffer_store_byte
     58 ; ALIGNED: buffer_store_byte
     59 ; ALIGNED: buffer_store_byte
     60 
     61 ; UNALIGNED: buffer_load_dword
     62 ; UNALIGNED: buffer_store_dword
     63 define void @global_unaligned_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
     64   %v = load i32, i32 addrspace(1)* %p, align 1
     65   store i32 %v, i32 addrspace(1)* %r, align 1
     66   ret void
     67 }
     68 
     69 ; SI-LABEL: {{^}}global_align2_load_store_i32:
     70 ; ALIGNED: buffer_load_ushort
     71 ; ALIGNED: buffer_load_ushort
     72 ; ALIGNED: buffer_store_short
     73 ; ALIGNED: buffer_store_short
     74 
     75 ; UNALIGNED: buffer_load_dword
     76 ; UNALIGNED: buffer_store_dword
     77 define void @global_align2_load_store_i32(i32 addrspace(1)* %p, i32 addrspace(1)* %r) #0 {
     78   %v = load i32, i32 addrspace(1)* %p, align 2
     79   store i32 %v, i32 addrspace(1)* %r, align 2
     80   ret void
     81 }
     82 
     83 ; FUNC-LABEL: {{^}}local_align2_load_store_i32:
     84 ; GCN: ds_read_u16
     85 ; GCN: ds_read_u16
     86 ; GCN: ds_write_b16
     87 ; GCN: ds_write_b16
     88 define void @local_align2_load_store_i32(i32 addrspace(3)* %p, i32 addrspace(3)* %r) #0 {
     89   %v = load i32, i32 addrspace(3)* %p, align 2
     90   store i32 %v, i32 addrspace(3)* %r, align 2
     91   ret void
     92 }
     93 
     94 ; FUNC-LABEL: {{^}}local_unaligned_load_store_i64:
     95 ; SI: ds_read_u8
     96 ; SI: ds_read_u8
     97 ; SI: ds_read_u8
     98 ; SI: ds_read_u8
     99 ; SI: ds_read_u8
    100 ; SI: ds_read_u8
    101 ; SI: ds_read_u8
    102 ; SI: ds_read_u8
    103 
    104 ; SI-NOT: v_or_b32
    105 ; SI-NOT: v_lshl
    106 ; SI: ds_write_b8
    107 ; SI-NOT: v_or_b32
    108 ; SI-NOT: v_lshl
    109 
    110 ; SI: ds_write_b8
    111 ; SI-NOT: v_or_b32
    112 ; SI-NOT: v_lshl
    113 
    114 ; SI: ds_write_b8
    115 ; SI-NOT: v_or_b32
    116 ; SI-NOT: v_lshl
    117 
    118 ; SI: ds_write_b8
    119 ; SI-NOT: v_or_b32
    120 ; SI-NOT: v_lshl
    121 
    122 ; SI: ds_write_b8
    123 ; SI-NOT: v_or_b32
    124 ; SI-NOT: v_lshl
    125 
    126 ; SI: ds_write_b8
    127 ; SI-NOT: v_or_b32
    128 ; SI-NOT: v_lshl
    129 
    130 ; SI: ds_write_b8
    131 ; SI-NOT: v_or_b32
    132 ; SI-NOT: v_lshl
    133 ; SI: ds_write_b8
    134 ; SI: s_endpgm
    135 define void @local_unaligned_load_store_i64(i64 addrspace(3)* %p, i64 addrspace(3)* %r) #0 {
    136   %v = load i64, i64 addrspace(3)* %p, align 1
    137   store i64 %v, i64 addrspace(3)* %r, align 1
    138   ret void
    139 }
    140 
    141 ; SI-LABEL: {{^}}local_unaligned_load_store_v2i32:
    142 ; SI: ds_read_u8
    143 ; SI: ds_read_u8
    144 ; SI: ds_read_u8
    145 ; SI: ds_read_u8
    146 ; SI: ds_read_u8
    147 ; SI: ds_read_u8
    148 ; SI: ds_read_u8
    149 ; SI: ds_read_u8
    150 
    151 ; SI-NOT: v_or_b32
    152 ; SI-NOT: v_lshl
    153 ; SI: ds_write_b8
    154 ; SI-NOT: v_or_b32
    155 ; SI-NOT: v_lshl
    156 
    157 ; SI: ds_write_b8
    158 ; SI-NOT: v_or_b32
    159 ; SI-NOT: v_lshl
    160 
    161 ; SI: ds_write_b8
    162 ; SI-NOT: v_or_b32
    163 ; SI-NOT: v_lshl
    164 
    165 ; SI: ds_write_b8
    166 ; SI-NOT: v_or_b32
    167 ; SI-NOT: v_lshl
    168 
    169 ; SI: ds_write_b8
    170 ; SI-NOT: v_or_b32
    171 ; SI-NOT: v_lshl
    172 
    173 ; SI: ds_write_b8
    174 ; SI-NOT: v_or_b32
    175 ; SI-NOT: v_lshl
    176 
    177 ; SI: ds_write_b8
    178 ; SI-NOT: v_or_b32
    179 ; SI-NOT: v_lshl
    180 ; SI: ds_write_b8
    181 ; SI: s_endpgm
    182 define void @local_unaligned_load_store_v2i32(<2 x i32> addrspace(3)* %p, <2 x i32> addrspace(3)* %r) #0 {
    183   %v = load <2 x i32>, <2 x i32> addrspace(3)* %p, align 1
    184   store <2 x i32> %v, <2 x i32> addrspace(3)* %r, align 1
    185   ret void
    186 }
    187 
    188 ; SI-LABEL: {{^}}global_align2_load_store_i64:
    189 ; ALIGNED: buffer_load_ushort
    190 ; ALIGNED: buffer_load_ushort
    191 
    192 ; ALIGNED-NOT: v_or_
    193 ; ALIGNED-NOT: v_lshl
    194 
    195 ; ALIGNED: buffer_load_ushort
    196 
    197 ; ALIGNED-NOT: v_or_
    198 ; ALIGNED-NOT: v_lshl
    199 
    200 ; ALIGNED: buffer_load_ushort
    201 
    202 ; ALIGNED-NOT: v_or_
    203 ; ALIGNED-NOT: v_lshl
    204 
    205 ; ALIGNED: buffer_store_short
    206 ; ALIGNED: buffer_store_short
    207 ; ALIGNED: buffer_store_short
    208 ; ALIGNED: buffer_store_short
    209 
    210 ; UNALIGNED: buffer_load_dwordx2
    211 ; UNALIGNED: buffer_store_dwordx2
    212 define void @global_align2_load_store_i64(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
    213   %v = load i64, i64 addrspace(1)* %p, align 2
    214   store i64 %v, i64 addrspace(1)* %r, align 2
    215   ret void
    216 }
    217 
    218 ; SI-LABEL: {{^}}unaligned_load_store_i64_global:
    219 ; ALIGNED: buffer_load_ubyte
    220 ; ALIGNED: buffer_load_ubyte
    221 ; ALIGNED: buffer_load_ubyte
    222 ; ALIGNED: buffer_load_ubyte
    223 ; ALIGNED: buffer_load_ubyte
    224 ; ALIGNED: buffer_load_ubyte
    225 ; ALIGNED: buffer_load_ubyte
    226 ; ALIGNED: buffer_load_ubyte
    227 
    228 ; ALIGNED-NOT: v_or_
    229 ; ALIGNED-NOT: v_lshl
    230 
    231 ; ALIGNED: buffer_store_byte
    232 ; ALIGNED: buffer_store_byte
    233 ; ALIGNED: buffer_store_byte
    234 ; ALIGNED: buffer_store_byte
    235 ; ALIGNED: buffer_store_byte
    236 ; ALIGNED: buffer_store_byte
    237 ; ALIGNED: buffer_store_byte
    238 ; ALIGNED: buffer_store_byte
    239 
    240 ; UNALIGNED: buffer_load_dwordx2
    241 ; UNALIGNED: buffer_store_dwordx2
    242 define void @unaligned_load_store_i64_global(i64 addrspace(1)* %p, i64 addrspace(1)* %r) #0 {
    243   %v = load i64, i64 addrspace(1)* %p, align 1
    244   store i64 %v, i64 addrspace(1)* %r, align 1
    245   ret void
    246 }
    247 
    248 ; FUNC-LABEL: {{^}}local_unaligned_load_store_v4i32:
    249 ; GCN: ds_read_u8
    250 ; GCN: ds_read_u8
    251 ; GCN: ds_read_u8
    252 ; GCN: ds_read_u8
    253 
    254 ; GCN: ds_read_u8
    255 ; GCN: ds_read_u8
    256 ; GCN: ds_read_u8
    257 ; GCN: ds_read_u8
    258 
    259 ; GCN: ds_read_u8
    260 ; GCN: ds_read_u8
    261 ; GCN: ds_read_u8
    262 ; GCN: ds_read_u8
    263 
    264 ; GCN: ds_read_u8
    265 ; GCN: ds_read_u8
    266 ; GCN: ds_read_u8
    267 ; GCN: ds_read_u8
    268 
    269 ; GCN: ds_write_b8
    270 ; GCN: ds_write_b8
    271 ; GCN: ds_write_b8
    272 ; GCN: ds_write_b8
    273 
    274 ; GCN: ds_write_b8
    275 ; GCN: ds_write_b8
    276 ; GCN: ds_write_b8
    277 ; GCN: ds_write_b8
    278 
    279 ; GCN: ds_write_b8
    280 ; GCN: ds_write_b8
    281 ; GCN: ds_write_b8
    282 ; GCN: ds_write_b8
    283 
    284 ; GCN: ds_write_b8
    285 ; GCN: ds_write_b8
    286 ; GCN: ds_write_b8
    287 ; GCN: ds_write_b8
    288 ; GCN: s_endpgm
    289 define void @local_unaligned_load_store_v4i32(<4 x i32> addrspace(3)* %p, <4 x i32> addrspace(3)* %r) #0 {
    290   %v = load <4 x i32>, <4 x i32> addrspace(3)* %p, align 1
    291   store <4 x i32> %v, <4 x i32> addrspace(3)* %r, align 1
    292   ret void
    293 }
    294 
    295 ; SI-LABEL: {{^}}global_unaligned_load_store_v4i32
    296 ; ALIGNED: buffer_load_ubyte
    297 ; ALIGNED: buffer_load_ubyte
    298 ; ALIGNED: buffer_load_ubyte
    299 ; ALIGNED: buffer_load_ubyte
    300 ; ALIGNED: buffer_load_ubyte
    301 ; ALIGNED: buffer_load_ubyte
    302 ; ALIGNED: buffer_load_ubyte
    303 ; ALIGNED: buffer_load_ubyte
    304 ; ALIGNED: buffer_load_ubyte
    305 ; ALIGNED: buffer_load_ubyte
    306 ; ALIGNED: buffer_load_ubyte
    307 ; ALIGNED: buffer_load_ubyte
    308 ; ALIGNED: buffer_load_ubyte
    309 ; ALIGNED: buffer_load_ubyte
    310 ; ALIGNED: buffer_load_ubyte
    311 ; ALIGNED: buffer_load_ubyte
    312 
    313 ; ALIGNED: buffer_store_byte
    314 ; ALIGNED: buffer_store_byte
    315 ; ALIGNED: buffer_store_byte
    316 ; ALIGNED: buffer_store_byte
    317 ; ALIGNED: buffer_store_byte
    318 ; ALIGNED: buffer_store_byte
    319 ; ALIGNED: buffer_store_byte
    320 ; ALIGNED: buffer_store_byte
    321 ; ALIGNED: buffer_store_byte
    322 ; ALIGNED: buffer_store_byte
    323 ; ALIGNED: buffer_store_byte
    324 ; ALIGNED: buffer_store_byte
    325 ; ALIGNED: buffer_store_byte
    326 ; ALIGNED: buffer_store_byte
    327 ; ALIGNED: buffer_store_byte
    328 ; ALIGNED: buffer_store_byte
    329 
    330 ; UNALIGNED: buffer_load_dwordx4
    331 ; UNALIGNED: buffer_store_dwordx4
    332 define void @global_unaligned_load_store_v4i32(<4 x i32> addrspace(1)* %p, <4 x i32> addrspace(1)* %r) #0 {
    333   %v = load <4 x i32>, <4 x i32> addrspace(1)* %p, align 1
    334   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 1
    335   ret void
    336 }
    337 
    338 ; FUNC-LABEL: {{^}}local_load_i64_align_4:
    339 ; GCN: ds_read2_b32
    340 define void @local_load_i64_align_4(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
    341   %val = load i64, i64 addrspace(3)* %in, align 4
    342   store i64 %val, i64 addrspace(1)* %out, align 8
    343   ret void
    344 }
    345 
    346 ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_offset
    347 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset0:8 offset1:9
    348 define void @local_load_i64_align_4_with_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
    349   %ptr = getelementptr i64, i64 addrspace(3)* %in, i32 4
    350   %val = load i64, i64 addrspace(3)* %ptr, align 4
    351   store i64 %val, i64 addrspace(1)* %out, align 8
    352   ret void
    353 }
    354 
    355 ; FUNC-LABEL: {{^}}local_load_i64_align_4_with_split_offset:
    356 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
    357 ; GCN: ds_read2_b32 v[{{[0-9]+}}:{{[0-9]+}}], v{{[0-9]}} offset1:1
    358 ; GCN: s_endpgm
    359 define void @local_load_i64_align_4_with_split_offset(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
    360   %ptr = bitcast i64 addrspace(3)* %in to i32 addrspace(3)*
    361   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
    362   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
    363   %val = load i64, i64 addrspace(3)* %ptri64, align 4
    364   store i64 %val, i64 addrspace(1)* %out, align 8
    365   ret void
    366 }
    367 
    368 ; FUNC-LABEL: {{^}}local_load_i64_align_1:
    369 ; GCN: ds_read_u8
    370 ; GCN: ds_read_u8
    371 ; GCN: ds_read_u8
    372 ; GCN: ds_read_u8
    373 ; GCN: ds_read_u8
    374 ; GCN: ds_read_u8
    375 ; GCN: ds_read_u8
    376 ; GCN: ds_read_u8
    377 ; GCN: store_dwordx2
    378 define void @local_load_i64_align_1(i64 addrspace(1)* nocapture %out, i64 addrspace(3)* %in) #0 {
    379   %val = load i64, i64 addrspace(3)* %in, align 1
    380   store i64 %val, i64 addrspace(1)* %out, align 8
    381   ret void
    382 }
    383 
    384 ; FUNC-LABEL: {{^}}local_store_i64_align_4:
    385 ; GCN: ds_write2_b32
    386 define void @local_store_i64_align_4(i64 addrspace(3)* %out, i64 %val) #0 {
    387   store i64 %val, i64 addrspace(3)* %out, align 4
    388   ret void
    389 }
    390 
    391 ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_offset
    392 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset0:8 offset1:9
    393 ; GCN: s_endpgm
    394 define void @local_store_i64_align_4_with_offset(i64 addrspace(3)* %out) #0 {
    395   %ptr = getelementptr i64, i64 addrspace(3)* %out, i32 4
    396   store i64 0, i64 addrspace(3)* %ptr, align 4
    397   ret void
    398 }
    399 
    400 ; FUNC-LABEL: {{^}}local_store_i64_align_4_with_split_offset:
    401 ; The tests for the case where the lo offset is 8-bits, but the hi offset is 9-bits
    402 ; GCN: ds_write2_b32 v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} offset1:1
    403 ; GCN: s_endpgm
    404 define void @local_store_i64_align_4_with_split_offset(i64 addrspace(3)* %out) #0 {
    405   %ptr = bitcast i64 addrspace(3)* %out to i32 addrspace(3)*
    406   %ptr255 = getelementptr i32, i32 addrspace(3)* %ptr, i32 255
    407   %ptri64 = bitcast i32 addrspace(3)* %ptr255 to i64 addrspace(3)*
    408   store i64 0, i64 addrspace(3)* %out, align 4
    409   ret void
    410 }
    411 
    412 ; SI-LABEL: {{^}}constant_unaligned_load_i32:
    413 ; ALIGNED: buffer_load_ubyte
    414 ; ALIGNED: buffer_load_ubyte
    415 ; ALIGNED: buffer_load_ubyte
    416 ; ALIGNED: buffer_load_ubyte
    417 
    418 ; UNALIGNED: s_load_dword
    419 
    420 ; SI: buffer_store_dword
    421 define void @constant_unaligned_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
    422   %v = load i32, i32 addrspace(2)* %p, align 1
    423   store i32 %v, i32 addrspace(1)* %r, align 4
    424   ret void
    425 }
    426 
    427 ; SI-LABEL: {{^}}constant_align2_load_i32:
    428 ; ALIGNED: buffer_load_ushort
    429 ; ALIGNED: buffer_load_ushort
    430 
    431 ; UNALIGNED: s_load_dword
    432 ; UNALIGNED: buffer_store_dword
    433 define void @constant_align2_load_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
    434   %v = load i32, i32 addrspace(2)* %p, align 2
    435   store i32 %v, i32 addrspace(1)* %r, align 4
    436   ret void
    437 }
    438 
    439 ; SI-LABEL: {{^}}constant_align2_load_i64:
    440 ; ALIGNED: buffer_load_ushort
    441 ; ALIGNED: buffer_load_ushort
    442 ; ALIGNED: buffer_load_ushort
    443 ; ALIGNED: buffer_load_ushort
    444 
    445 ; UNALIGNED: s_load_dwordx2
    446 ; UNALIGNED: buffer_store_dwordx2
    447 define void @constant_align2_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
    448   %v = load i64, i64 addrspace(2)* %p, align 2
    449   store i64 %v, i64 addrspace(1)* %r, align 4
    450   ret void
    451 }
    452 
    453 ; SI-LABEL: {{^}}constant_align4_load_i64:
    454 ; SI: s_load_dwordx2
    455 ; SI: buffer_store_dwordx2
    456 define void @constant_align4_load_i64(i64 addrspace(2)* %p, i64 addrspace(1)* %r) #0 {
    457   %v = load i64, i64 addrspace(2)* %p, align 4
    458   store i64 %v, i64 addrspace(1)* %r, align 4
    459   ret void
    460 }
    461 
    462 ; SI-LABEL: {{^}}constant_align4_load_v4i32:
    463 ; SI: s_load_dwordx4
    464 ; SI: buffer_store_dwordx4
    465 define void @constant_align4_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
    466   %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 4
    467   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
    468   ret void
    469 }
    470 
    471 ; SI-LABEL: {{^}}constant_unaligned_load_v2i32:
    472 ; ALIGNED: buffer_load_ubyte
    473 ; ALIGNED: buffer_load_ubyte
    474 ; ALIGNED: buffer_load_ubyte
    475 ; ALIGNED: buffer_load_ubyte
    476 
    477 ; ALIGNED: buffer_load_ubyte
    478 ; ALIGNED: buffer_load_ubyte
    479 ; ALIGNED: buffer_load_ubyte
    480 ; ALIGNED: buffer_load_ubyte
    481 
    482 ; UNALIGNED: buffer_load_dwordx2
    483 
    484 ; SI: buffer_store_dwordx2
    485 define void @constant_unaligned_load_v2i32(<2 x i32> addrspace(2)* %p, <2 x i32> addrspace(1)* %r) #0 {
    486   %v = load <2 x i32>, <2 x i32> addrspace(2)* %p, align 1
    487   store <2 x i32> %v, <2 x i32> addrspace(1)* %r, align 4
    488   ret void
    489 }
    490 
    491 ; SI-LABEL: {{^}}constant_unaligned_load_v4i32:
    492 ; ALIGNED: buffer_load_ubyte
    493 ; ALIGNED: buffer_load_ubyte
    494 ; ALIGNED: buffer_load_ubyte
    495 ; ALIGNED: buffer_load_ubyte
    496 
    497 ; ALIGNED: buffer_load_ubyte
    498 ; ALIGNED: buffer_load_ubyte
    499 ; ALIGNED: buffer_load_ubyte
    500 ; ALIGNED: buffer_load_ubyte
    501 
    502 ; ALIGNED: buffer_load_ubyte
    503 ; ALIGNED: buffer_load_ubyte
    504 ; ALIGNED: buffer_load_ubyte
    505 ; ALIGNED: buffer_load_ubyte
    506 
    507 ; ALIGNED: buffer_load_ubyte
    508 ; ALIGNED: buffer_load_ubyte
    509 ; ALIGNED: buffer_load_ubyte
    510 ; ALIGNED: buffer_load_ubyte
    511 
    512 ; UNALIGNED: buffer_load_dwordx4
    513 
    514 ; SI: buffer_store_dwordx4
    515 define void @constant_unaligned_load_v4i32(<4 x i32> addrspace(2)* %p, <4 x i32> addrspace(1)* %r) #0 {
    516   %v = load <4 x i32>, <4 x i32> addrspace(2)* %p, align 1
    517   store <4 x i32> %v, <4 x i32> addrspace(1)* %r, align 4
    518   ret void
    519 }
    520 
    521 ; SI-LABEL: {{^}}constant_align4_load_i8:
    522 ; SI: buffer_load_ubyte
    523 ; SI: buffer_store_byte
    524 define void @constant_align4_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
    525   %v = load i8, i8 addrspace(2)* %p, align 4
    526   store i8 %v, i8 addrspace(1)* %r, align 4
    527   ret void
    528 }
    529 
    530 ; SI-LABEL: {{^}}constant_align2_load_i8:
    531 ; SI: buffer_load_ubyte
    532 ; SI: buffer_store_byte
    533 define void @constant_align2_load_i8(i8 addrspace(2)* %p, i8 addrspace(1)* %r) #0 {
    534   %v = load i8, i8 addrspace(2)* %p, align 2
    535   store i8 %v, i8 addrspace(1)* %r, align 2
    536   ret void
    537 }
    538 
    539 ; SI-LABEL: {{^}}constant_align4_merge_load_2_i32:
    540 ; SI: s_load_dwordx2 s{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    541 ; SI-DAG: v_mov_b32_e32 v[[VLO:[0-9]+]], s[[LO]]
    542 ; SI-DAG: v_mov_b32_e32 v[[VHI:[0-9]+]], s[[HI]]
    543 ; SI: buffer_store_dwordx2 v{{\[}}[[VLO]]:[[VHI]]{{\]}}
    544 define void @constant_align4_merge_load_2_i32(i32 addrspace(2)* %p, i32 addrspace(1)* %r) #0 {
    545   %gep0 = getelementptr i32, i32 addrspace(2)* %p, i64 1
    546   %v0 = load i32, i32 addrspace(2)* %p, align 4
    547   %v1 = load i32, i32 addrspace(2)* %gep0, align 4
    548 
    549   %gep1 = getelementptr i32, i32 addrspace(1)* %r, i64 1
    550   store i32 %v0, i32 addrspace(1)* %r, align 4
    551   store i32 %v1, i32 addrspace(1)* %gep1, align 4
    552   ret void
    553 }
    554 
    555 attributes #0 = { nounwind }
    556