Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 
      6 
      7 ; FUNC-LABEL: {{^}}global_load_i32:
      8 ; GCN-NOHSA: buffer_load_dword v{{[0-9]+}}
      9 ; GCN-HSA: flat_load_dword
     10 
     11 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
     12 define void @global_load_i32(i32 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
     13 entry:
     14   %ld = load i32, i32 addrspace(1)* %in
     15   store i32 %ld, i32 addrspace(1)* %out
     16   ret void
     17 }
     18 
     19 ; FUNC-LABEL: {{^}}global_load_v2i32:
     20 ; GCN-NOHSA: buffer_load_dwordx2
     21 ; GCN-HSA: flat_load_dwordx2
     22 
     23 ; EG: VTX_READ_64
     24 define void @global_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
     25 entry:
     26   %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
     27   store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
     28   ret void
     29 }
     30 
     31 ; FUNC-LABEL: {{^}}global_load_v3i32:
     32 ; GCN-NOHSA: buffer_load_dwordx4
     33 ; GCN-HSA: flat_load_dwordx4
     34 
     35 ; EG: VTX_READ_128
     36 define void @global_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(1)* %in) #0 {
     37 entry:
     38   %ld = load <3 x i32>, <3 x i32> addrspace(1)* %in
     39   store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}global_load_v4i32:
     44 ; GCN-NOHSA: buffer_load_dwordx4
     45 ; GCN-HSA: flat_load_dwordx4
     46 
     47 ; EG: VTX_READ_128
     48 define void @global_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
     49 entry:
     50   %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
     51   store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
     52   ret void
     53 }
     54 
     55 ; FUNC-LABEL: {{^}}global_load_v8i32:
     56 ; GCN-NOHSA: buffer_load_dwordx4
     57 ; GCN-NOHSA: buffer_load_dwordx4
     58 ; GCN-HSA: flat_load_dwordx4
     59 ; GCN-HSA: flat_load_dwordx4
     60 
     61 ; EG: VTX_READ_128
     62 ; EG: VTX_READ_128
     63 define void @global_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
     64 entry:
     65   %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
     66   store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
     67   ret void
     68 }
     69 
     70 ; FUNC-LABEL: {{^}}global_load_v16i32:
     71 ; GCN-NOHSA: buffer_load_dwordx4
     72 ; GCN-NOHSA: buffer_load_dwordx4
     73 ; GCN-NOHSA: buffer_load_dwordx4
     74 ; GCN-NOHSA: buffer_load_dwordx4
     75 
     76 ; GCN-HSA: flat_load_dwordx4
     77 ; GCN-HSA: flat_load_dwordx4
     78 ; GCN-HSA: flat_load_dwordx4
     79 ; GCN-HSA: flat_load_dwordx4
     80 
     81 ; EG: VTX_READ_128
     82 ; EG: VTX_READ_128
     83 ; EG: VTX_READ_128
     84 ; EG: VTX_READ_128
     85 define void @global_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
     86 entry:
     87   %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
     88   store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
     89   ret void
     90 }
     91 
     92 ; FUNC-LABEL: {{^}}global_zextload_i32_to_i64:
     93 ; GCN-NOHSA-DAG: buffer_load_dword v[[LO:[0-9]+]],
     94 ; GCN-HSA-DAG: flat_load_dword v[[LO:[0-9]+]],
     95 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
     96 
     97 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
     98 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
     99 
    100 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
    101 define void @global_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
    102   %ld = load i32, i32 addrspace(1)* %in
    103   %ext = zext i32 %ld to i64
    104   store i64 %ext, i64 addrspace(1)* %out
    105   ret void
    106 }
    107 
    108 ; FUNC-LABEL: {{^}}global_sextload_i32_to_i64:
    109 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
    110 ; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
    111 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    112 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    113 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    114 
    115 
    116 ; EG: MEM_RAT
    117 ; EG: VTX_READ_32
    118 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
    119 ; EG: 31
    120 define void @global_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) #0 {
    121   %ld = load i32, i32 addrspace(1)* %in
    122   %ext = sext i32 %ld to i64
    123   store i64 %ext, i64 addrspace(1)* %out
    124   ret void
    125 }
    126 
    127 ; FUNC-LABEL: {{^}}global_zextload_v1i32_to_v1i64:
    128 ; GCN-NOHSA: buffer_load_dword
    129 ; GCN-NOHSA: buffer_store_dwordx2
    130 
    131 ; GCN-HSA: flat_load_dword
    132 ; GCN-HSA: flat_store_dwordx2
    133 define void @global_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
    134   %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
    135   %ext = zext <1 x i32> %ld to <1 x i64>
    136   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    137   ret void
    138 }
    139 
    140 ; FUNC-LABEL: {{^}}global_sextload_v1i32_to_v1i64:
    141 ; GCN-NOHSA: buffer_load_dword v[[LO:[0-9]+]]
    142 ; GCN-HSA: flat_load_dword v[[LO:[0-9]+]]
    143 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    144 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    145 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    146 define void @global_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* %in) #0 {
    147   %ld = load <1 x i32>, <1 x i32> addrspace(1)* %in
    148   %ext = sext <1 x i32> %ld to <1 x i64>
    149   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    150   ret void
    151 }
    152 
    153 ; FUNC-LABEL: {{^}}global_zextload_v2i32_to_v2i64:
    154 ; GCN-NOHSA: buffer_load_dwordx2
    155 ; GCN-NOHSA: buffer_store_dwordx4
    156 
    157 ; GCN-HSA: flat_load_dwordx2
    158 ; GCN-HSA: flat_store_dwordx4
    159 define void @global_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
    160   %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
    161   %ext = zext <2 x i32> %ld to <2 x i64>
    162   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    163   ret void
    164 }
    165 
    166 ; FUNC-LABEL: {{^}}global_sextload_v2i32_to_v2i64:
    167 ; GCN-NOHSA: buffer_load_dwordx2
    168 ; GCN-HSA: flat_load_dwordx2
    169 
    170 ; GCN-DAG: v_ashrrev_i32
    171 ; GCN-DAG: v_ashrrev_i32
    172 
    173 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    174 ; GCN-HSA-DAG: flat_store_dwordx4
    175 define void @global_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* %in) #0 {
    176   %ld = load <2 x i32>, <2 x i32> addrspace(1)* %in
    177   %ext = sext <2 x i32> %ld to <2 x i64>
    178   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    179   ret void
    180 }
    181 
    182 ; FUNC-LABEL: {{^}}global_zextload_v4i32_to_v4i64:
    183 ; GCN-NOHSA: buffer_load_dwordx4
    184 ; GCN-NOHSA: buffer_store_dwordx4
    185 ; GCN-NOHSA: buffer_store_dwordx4
    186 
    187 ; GCN-HSA: flat_load_dwordx4
    188 ; GCN-HSA: flat_store_dwordx4
    189 ; GCN-HSA: flat_store_dwordx4
    190 define void @global_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
    191   %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
    192   %ext = zext <4 x i32> %ld to <4 x i64>
    193   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    194   ret void
    195 }
    196 
    197 ; FUNC-LABEL: {{^}}global_sextload_v4i32_to_v4i64:
    198 ; GCN-NOHSA: buffer_load_dwordx4
    199 ; GCN-HSA: flat_load_dwordx4
    200 
    201 ; GCN-DAG: v_ashrrev_i32
    202 ; GCN-DAG: v_ashrrev_i32
    203 ; GCN-DAG: v_ashrrev_i32
    204 ; GCN-DAG: v_ashrrev_i32
    205 
    206 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    207 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    208 
    209 ; GCN-HSA-DAG: flat_store_dwordx4
    210 ; GCN-HSA-DAG: flat_store_dwordx4
    211 define void @global_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* %in) #0 {
    212   %ld = load <4 x i32>, <4 x i32> addrspace(1)* %in
    213   %ext = sext <4 x i32> %ld to <4 x i64>
    214   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    215   ret void
    216 }
    217 
    218 ; FUNC-LABEL: {{^}}global_zextload_v8i32_to_v8i64:
    219 ; GCN-NOHSA: buffer_load_dwordx4
    220 ; GCN-NOHSA: buffer_load_dwordx4
    221 
    222 ; GCN-HSA: flat_load_dwordx4
    223 ; GCN-HSA: flat_load_dwordx4
    224 
    225 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    226 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    227 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    228 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    229 
    230 ; GCN-HSA-DAG: flat_store_dwordx4
    231 ; GCN-HSA-DAG: flat_store_dwordx4
    232 ; GCN-SA-DAG: flat_store_dwordx4
    233 ; GCN-HSA-DAG: flat_store_dwordx4
    234 define void @global_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
    235   %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
    236   %ext = zext <8 x i32> %ld to <8 x i64>
    237   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    238   ret void
    239 }
    240 
    241 ; FUNC-LABEL: {{^}}global_sextload_v8i32_to_v8i64:
    242 ; GCN-NOHSA: buffer_load_dwordx4
    243 ; GCN-NOHSA: buffer_load_dwordx4
    244 
    245 ; GCN-HSA: flat_load_dwordx4
    246 ; GCN-HSA: flat_load_dwordx4
    247 
    248 ; GCN-DAG: v_ashrrev_i32
    249 ; GCN-DAG: v_ashrrev_i32
    250 ; GCN-DAG: v_ashrrev_i32
    251 ; GCN-DAG: v_ashrrev_i32
    252 ; GCN-DAG: v_ashrrev_i32
    253 ; GCN-DAG: v_ashrrev_i32
    254 ; GCN-DAG: v_ashrrev_i32
    255 ; GCN-DAG: v_ashrrev_i32
    256 
    257 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    258 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    259 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    260 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    261 
    262 ; GCN-HSA-DAG: flat_store_dwordx4
    263 ; GCN-HSA-DAG: flat_store_dwordx4
    264 ; GCN-HSA-DAG: flat_store_dwordx4
    265 ; GCN-HSA-DAG: flat_store_dwordx4
    266 define void @global_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* %in) #0 {
    267   %ld = load <8 x i32>, <8 x i32> addrspace(1)* %in
    268   %ext = sext <8 x i32> %ld to <8 x i64>
    269   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    270   ret void
    271 }
    272 
    273 ; FUNC-LABEL: {{^}}global_sextload_v16i32_to_v16i64:
    274 ; GCN-NOHSA: buffer_load_dwordx4
    275 ; GCN-NOHSA: buffer_load_dwordx4
    276 ; GCN-NOHSA: buffer_load_dwordx4
    277 ; GCN-NOHSA: buffer_load_dwordx4
    278 
    279 ; GCN-HSA: flat_load_dwordx4
    280 ; GCN-HSA: flat_load_dwordx4
    281 ; GCN-HSA: flat_load_dwordx4
    282 ; GCN-HSA: flat_load_dwordx4
    283 
    284 
    285 ; GCN-DAG: v_ashrrev_i32
    286 ; GCN-DAG: v_ashrrev_i32
    287 ; GCN-DAG: v_ashrrev_i32
    288 ; GCN-DAG: v_ashrrev_i32
    289 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    290 ; GCN-HSA-DAG: flat_store_dwordx4
    291 
    292 ; GCN-DAG: v_ashrrev_i32
    293 ; GCN-DAG: v_ashrrev_i32
    294 ; GCN-DAG: v_ashrrev_i32
    295 ; GCN-DAG: v_ashrrev_i32
    296 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    297 ; GCN-HSA-DAG: flat_store_dwordx4
    298 
    299 ; GCN-DAG: v_ashrrev_i32
    300 ; GCN-DAG: v_ashrrev_i32
    301 ; GCN-DAG: v_ashrrev_i32
    302 ; GCN-DAG: v_ashrrev_i32
    303 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    304 ; GCN-HSA-DAG: flat_store_dwordx4
    305 
    306 ; GCN-DAG: v_ashrrev_i32
    307 ; GCN-DAG: v_ashrrev_i32
    308 ; GCN-DAG: v_ashrrev_i32
    309 ; GCN-DAG: v_ashrrev_i32
    310 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    311 ; GCN-HSA-DAG: flat_store_dwordx4
    312 define void @global_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
    313   %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
    314   %ext = sext <16 x i32> %ld to <16 x i64>
    315   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    316   ret void
    317 }
    318 
    319 ; FUNC-LABEL: {{^}}global_zextload_v16i32_to_v16i64
    320 ; GCN-NOHSA: buffer_load_dwordx4
    321 ; GCN-NOHSA: buffer_load_dwordx4
    322 ; GCN-NOHSA: buffer_load_dwordx4
    323 ; GCN-NOHSA: buffer_load_dwordx4
    324 
    325 ; GCN-HSA: flat_load_dwordx4
    326 ; GCN-HSA: flat_load_dwordx4
    327 ; GCN-HSA: flat_load_dwordx4
    328 ; GCN-HSA: flat_load_dwordx4
    329 
    330 ; GCN-NOHSA: buffer_store_dwordx4
    331 ; GCN-NOHSA: buffer_store_dwordx4
    332 ; GCN-NOHSA: buffer_store_dwordx4
    333 ; GCN-NOHSA: buffer_store_dwordx4
    334 ; GCN-NOHSA: buffer_store_dwordx4
    335 ; GCN-NOHSA: buffer_store_dwordx4
    336 ; GCN-NOHSA: buffer_store_dwordx4
    337 ; GCN-NOHSA: buffer_store_dwordx4
    338 
    339 ; GCN-HSA: flat_store_dwordx4
    340 ; GCN-HSA: flat_store_dwordx4
    341 ; GCN-HSA: flat_store_dwordx4
    342 ; GCN-HSA: flat_store_dwordx4
    343 ; GCN-HSA: flat_store_dwordx4
    344 ; GCN-HSA: flat_store_dwordx4
    345 ; GCN-HSA: flat_store_dwordx4
    346 ; GCN-HSA: flat_store_dwordx4
    347 define void @global_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* %in) #0 {
    348   %ld = load <16 x i32>, <16 x i32> addrspace(1)* %in
    349   %ext = zext <16 x i32> %ld to <16 x i64>
    350   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    351   ret void
    352 }
    353 
    354 ; FUNC-LABEL: {{^}}global_sextload_v32i32_to_v32i64:
    355 
    356 ; GCN-NOHSA: buffer_load_dwordx4
    357 ; GCN-NOHSA: buffer_load_dwordx4
    358 ; GCN-NOHSA: buffer_load_dwordx4
    359 ; GCN-NOHSA: buffer_load_dwordx4
    360 ; GCN-NOHSA: buffer_load_dwordx4
    361 ; GCN-NOHSA: buffer_load_dwordx4
    362 ; GCN-NOHSA: buffer_load_dwordx4
    363 ; GCN-NOHSA: buffer_load_dwordx4
    364 
    365 ; GCN-HSA: flat_load_dwordx4
    366 ; GCN-HSA: flat_load_dwordx4
    367 ; GCN-HSA: flat_load_dwordx4
    368 ; GCN-HSA: flat_load_dwordx4
    369 ; GCN-HSA: flat_load_dwordx4
    370 ; GCN-HSA: flat_load_dwordx4
    371 ; GCN-HSA: flat_load_dwordx4
    372 ; GCN-HSA: flat_load_dwordx4
    373 
    374 ; GCN-DAG: v_ashrrev_i32
    375 ; GCN-DAG: v_ashrrev_i32
    376 ; GCN-DAG: v_ashrrev_i32
    377 ; GCN-DAG: v_ashrrev_i32
    378 ; GCN-DAG: v_ashrrev_i32
    379 ; GCN-DAG: v_ashrrev_i32
    380 ; GCN-DAG: v_ashrrev_i32
    381 ; GCN-DAG: v_ashrrev_i32
    382 ; GCN-DAG: v_ashrrev_i32
    383 ; GCN-DAG: v_ashrrev_i32
    384 ; GCN-DAG: v_ashrrev_i32
    385 ; GCN-DAG: v_ashrrev_i32
    386 ; GCN-DAG: v_ashrrev_i32
    387 ; GCN-DAG: v_ashrrev_i32
    388 ; GCN-DAG: v_ashrrev_i32
    389 ; GCN-DAG: v_ashrrev_i32
    390 ; GCN-DAG: v_ashrrev_i32
    391 ; GCN-DAG: v_ashrrev_i32
    392 ; GCN-DAG: v_ashrrev_i32
    393 ; GCN-DAG: v_ashrrev_i32
    394 ; GCN-DAG: v_ashrrev_i32
    395 ; GCN-DAG: v_ashrrev_i32
    396 ; GCN-DAG: v_ashrrev_i32
    397 ; GCN-DAG: v_ashrrev_i32
    398 ; GCN-DAG: v_ashrrev_i32
    399 ; GCN-DAG: v_ashrrev_i32
    400 ; GCN-DAG: v_ashrrev_i32
    401 ; GCN-DAG: v_ashrrev_i32
    402 ; GCN-DAG: v_ashrrev_i32
    403 ; GCN-DAG: v_ashrrev_i32
    404 ; GCN-DAG: v_ashrrev_i32
    405 ; GCN-DAG: v_ashrrev_i32
    406 
    407 ; GCN-NOHSA: buffer_store_dwordx4
    408 ; GCN-NOHSA: buffer_store_dwordx4
    409 ; GCN-NOHSA: buffer_store_dwordx4
    410 ; GCN-NOHSA: buffer_store_dwordx4
    411 
    412 ; GCN-NOHSA: buffer_store_dwordx4
    413 ; GCN-NOHSA: buffer_store_dwordx4
    414 ; GCN-NOHSA: buffer_store_dwordx4
    415 ; GCN-NOHSA: buffer_store_dwordx4
    416 
    417 ; GCN-NOHSA: buffer_store_dwordx4
    418 ; GCN-NOHSA: buffer_store_dwordx4
    419 ; GCN-NOHSA: buffer_store_dwordx4
    420 ; GCN-NOHSA: buffer_store_dwordx4
    421 
    422 ; GCN-NOHSA: buffer_store_dwordx4
    423 ; GCN-NOHSA: buffer_store_dwordx4
    424 ; GCN-NOHSA: buffer_store_dwordx4
    425 ; GCN-NOHSA: buffer_store_dwordx4
    426 
    427 ; GCN-HSA: flat_store_dwordx4
    428 ; GCN-HSA: flat_store_dwordx4
    429 ; GCN-HSA: flat_store_dwordx4
    430 ; GCN-HSA: flat_store_dwordx4
    431 
    432 ; GCN-HSA: flat_store_dwordx4
    433 ; GCN-HSA: flat_store_dwordx4
    434 ; GCN-HSA: flat_store_dwordx4
    435 ; GCN-HSA: flat_store_dwordx4
    436 
    437 ; GCN-HSA: flat_store_dwordx4
    438 ; GCN-HSA: flat_store_dwordx4
    439 ; GCN-HSA: flat_store_dwordx4
    440 ; GCN-HSA: flat_store_dwordx4
    441 
    442 ; GCN-HSA: flat_store_dwordx4
    443 ; GCN-HSA: flat_store_dwordx4
    444 ; GCN-HSA: flat_store_dwordx4
    445 ; GCN-HSA: flat_store_dwordx4
    446 
    447 define void @global_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
    448   %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
    449   %ext = sext <32 x i32> %ld to <32 x i64>
    450   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    451   ret void
    452 }
    453 
    454 ; FUNC-LABEL: {{^}}global_zextload_v32i32_to_v32i64:
    455 ; GCN-NOHSA: buffer_load_dwordx4
    456 ; GCN-NOHSA: buffer_load_dwordx4
    457 ; GCN-NOHSA: buffer_load_dwordx4
    458 ; GCN-NOHSA: buffer_load_dwordx4
    459 ; GCN-NOHSA: buffer_load_dwordx4
    460 ; GCN-NOHSA: buffer_load_dwordx4
    461 ; GCN-NOHSA: buffer_load_dwordx4
    462 ; GCN-NOHSA: buffer_load_dwordx4
    463 
    464 ; GCN-HSA: flat_load_dwordx4
    465 ; GCN-HSA: flat_load_dwordx4
    466 ; GCN-HSA: flat_load_dwordx4
    467 ; GCN-HSA: flat_load_dwordx4
    468 ; GCN-HSA: flat_load_dwordx4
    469 ; GCN-HSA: flat_load_dwordx4
    470 ; GCN-HSA: flat_load_dwordx4
    471 ; GCN-HSA: flat_load_dwordx4
    472 
    473 
    474 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    475 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    476 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    477 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    478 
    479 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    480 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    481 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    482 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    483 
    484 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    485 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    486 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    487 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    488 
    489 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    490 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    491 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    492 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    493 
    494 
    495 ; GCN-HSA-DAG: flat_store_dwordx4
    496 ; GCN-HSA-DAG: flat_store_dwordx4
    497 ; GCN-HSA-DAG: flat_store_dwordx4
    498 ; GCN-HSA-DAG: flat_store_dwordx4
    499 
    500 ; GCN-HSA-DAG: flat_store_dwordx4
    501 ; GCN-HSA-DAG: flat_store_dwordx4
    502 ; GCN-HSA-DAG: flat_store_dwordx4
    503 ; GCN-HSA-DAG: flat_store_dwordx4
    504 
    505 ; GCN-HSA-DAG: flat_store_dwordx4
    506 ; GCN-HSA-DAG: flat_store_dwordx4
    507 ; GCN-HSA-DAG: flat_store_dwordx4
    508 ; GCN-HSA-DAG: flat_store_dwordx4
    509 
    510 ; GCN-HSA-DAG: flat_store_dwordx4
    511 ; GCN-HSA-DAG: flat_store_dwordx4
    512 ; GCN-HSA-DAG: flat_store_dwordx4
    513 ; GCN-HSA-DAG: flat_store_dwordx4
    514 define void @global_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* %in) #0 {
    515   %ld = load <32 x i32>, <32 x i32> addrspace(1)* %in
    516   %ext = zext <32 x i32> %ld to <32 x i64>
    517   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    518   ret void
    519 }
    520 
    521 attributes #0 = { nounwind }
    522