Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 
      6 ; FUNC-LABEL: {{^}}constant_load_i32:
      7 ; GCN: s_load_dword s{{[0-9]+}}
      8 
      9 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
     10 define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
     11 entry:
     12   %ld = load i32, i32 addrspace(2)* %in
     13   store i32 %ld, i32 addrspace(1)* %out
     14   ret void
     15 }
     16 
     17 ; FUNC-LABEL: {{^}}constant_load_v2i32:
     18 ; GCN: s_load_dwordx2
     19 
     20 ; EG: VTX_READ_64
     21 define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
     22 entry:
     23   %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
     24   store <2 x i32> %ld, <2 x i32> addrspace(1)* %out
     25   ret void
     26 }
     27 
     28 ; FUNC-LABEL: {{^}}constant_load_v3i32:
     29 ; GCN: s_load_dwordx4
     30 
     31 ; EG: VTX_READ_128
     32 define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 {
     33 entry:
     34   %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in
     35   store <3 x i32> %ld, <3 x i32> addrspace(1)* %out
     36   ret void
     37 }
     38 
     39 ; FUNC-LABEL: {{^}}constant_load_v4i32:
     40 ; GCN: s_load_dwordx4
     41 
     42 ; EG: VTX_READ_128
     43 define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
     44 entry:
     45   %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
     46   store <4 x i32> %ld, <4 x i32> addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: {{^}}constant_load_v8i32:
     51 ; GCN: s_load_dwordx8
     52 
     53 ; EG: VTX_READ_128
     54 ; EG: VTX_READ_128
     55 define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
     56 entry:
     57   %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
     58   store <8 x i32> %ld, <8 x i32> addrspace(1)* %out
     59   ret void
     60 }
     61 
     62 ; FUNC-LABEL: {{^}}constant_load_v16i32:
     63 ; GCN: s_load_dwordx16
     64 
     65 ; EG: VTX_READ_128
     66 ; EG: VTX_READ_128
     67 ; EG: VTX_READ_128
     68 ; EG: VTX_READ_128
     69 define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
     70 entry:
     71   %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
     72   store <16 x i32> %ld, <16 x i32> addrspace(1)* %out
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64:
     77 ; GCN-DAG: s_load_dword s[[SLO:[0-9]+]],
     78 ; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}}
     79 ; GCN: store_dwordx2
     80 
     81 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
     82 ; EG: CF_END
     83 ; EG: VTX_READ_32
     84 define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
     85   %ld = load i32, i32 addrspace(2)* %in
     86   %ext = zext i32 %ld to i64
     87   store i64 %ext, i64 addrspace(1)* %out
     88   ret void
     89 }
     90 
     91 ; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64:
     92 ; GCN: s_load_dword s[[SLO:[0-9]+]]
     93 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31
     94 ; GCN: store_dwordx2
     95 
     96 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
     97 ; EG: CF_END
     98 ; EG: VTX_READ_32
     99 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}},  literal.
    100 ; EG: 31
    101 define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 {
    102   %ld = load i32, i32 addrspace(2)* %in
    103   %ext = sext i32 %ld to i64
    104   store i64 %ext, i64 addrspace(1)* %out
    105   ret void
    106 }
    107 
    108 ; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64:
    109 ; GCN: s_load_dword
    110 ; GCN: store_dwordx2
    111 define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
    112   %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
    113   %ext = zext <1 x i32> %ld to <1 x i64>
    114   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64:
    119 ; GCN: s_load_dword s[[LO:[0-9]+]]
    120 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31
    121 ; GCN: store_dwordx2
    122 define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 {
    123   %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in
    124   %ext = sext <1 x i32> %ld to <1 x i64>
    125   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    126   ret void
    127 }
    128 
    129 ; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64:
    130 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    131 ; GCN: store_dwordx4
    132 define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
    133   %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
    134   %ext = zext <2 x i32> %ld to <2 x i64>
    135   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    136   ret void
    137 }
    138 
    139 ; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64:
    140 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}}
    141 
    142 ; GCN-DAG: s_ashr_i32
    143 ; GCN-DAG: s_ashr_i32
    144 
    145 ; GCN: store_dwordx4
    146 define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 {
    147   %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in
    148   %ext = sext <2 x i32> %ld to <2 x i64>
    149   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    150   ret void
    151 }
    152 
    153 ; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64:
    154 ; GCN: s_load_dwordx4
    155 
    156 ; GCN: store_dwordx4
    157 ; GCN: store_dwordx4
    158 define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
    159   %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
    160   %ext = zext <4 x i32> %ld to <4 x i64>
    161   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    162   ret void
    163 }
    164 
    165 ; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64:
    166 ; GCN: s_load_dwordx4
    167 
    168 ; GCN: s_ashr_i32
    169 ; GCN: s_ashr_i32
    170 ; GCN: s_ashr_i32
    171 ; GCN: s_ashr_i32
    172 
    173 ; GCN: store_dwordx4
    174 ; GCN: store_dwordx4
    175 define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 {
    176   %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in
    177   %ext = sext <4 x i32> %ld to <4 x i64>
    178   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    179   ret void
    180 }
    181 
    182 ; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64:
    183 ; GCN: s_load_dwordx8
    184 
    185 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    186 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    187 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    188 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    189 
    190 ; GCN-HSA-DAG: flat_store_dwordx4
    191 ; GCN-HSA-DAG: flat_store_dwordx4
    192 ; GCN-SA-DAG: flat_store_dwordx4
    193 ; GCN-HSA-DAG: flat_store_dwordx4
    194 define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
    195   %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
    196   %ext = zext <8 x i32> %ld to <8 x i64>
    197   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    198   ret void
    199 }
    200 
    201 ; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64:
    202 ; GCN: s_load_dwordx8
    203 
    204 ; GCN: s_ashr_i32
    205 ; GCN: s_ashr_i32
    206 ; GCN: s_ashr_i32
    207 ; GCN: s_ashr_i32
    208 ; GCN: s_ashr_i32
    209 ; GCN: s_ashr_i32
    210 ; GCN: s_ashr_i32
    211 ; GCN: s_ashr_i32
    212 
    213 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    214 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    215 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    216 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    217 
    218 ; GCN-HSA-DAG: flat_store_dwordx4
    219 ; GCN-HSA-DAG: flat_store_dwordx4
    220 ; GCN-HSA-DAG: flat_store_dwordx4
    221 ; GCN-HSA-DAG: flat_store_dwordx4
    222 define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 {
    223   %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in
    224   %ext = sext <8 x i32> %ld to <8 x i64>
    225   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    226   ret void
    227 }
    228 
    229 ; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64:
    230 ; GCN: s_load_dwordx16
    231 
    232 
    233 ; GCN-DAG: s_ashr_i32
    234 
    235 ; GCN: store_dwordx4
    236 ; GCN: store_dwordx4
    237 ; GCN: store_dwordx4
    238 ; GCN: store_dwordx4
    239 ; GCN: store_dwordx4
    240 ; GCN: store_dwordx4
    241 ; GCN: store_dwordx4
    242 ; GCN: store_dwordx4
    243 define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
    244   %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
    245   %ext = sext <16 x i32> %ld to <16 x i64>
    246   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    247   ret void
    248 }
    249 
    250 ; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64
    251 ; GCN: s_load_dwordx16
    252 
    253 ; GCN-NOHSA: buffer_store_dwordx4
    254 ; GCN-NOHSA: buffer_store_dwordx4
    255 ; GCN-NOHSA: buffer_store_dwordx4
    256 ; GCN-NOHSA: buffer_store_dwordx4
    257 ; GCN-NOHSA: buffer_store_dwordx4
    258 ; GCN-NOHSA: buffer_store_dwordx4
    259 ; GCN-NOHSA: buffer_store_dwordx4
    260 ; GCN-NOHSA: buffer_store_dwordx4
    261 
    262 ; GCN-HSA: flat_store_dwordx4
    263 ; GCN-HSA: flat_store_dwordx4
    264 ; GCN-HSA: flat_store_dwordx4
    265 ; GCN-HSA: flat_store_dwordx4
    266 ; GCN-HSA: flat_store_dwordx4
    267 ; GCN-HSA: flat_store_dwordx4
    268 ; GCN-HSA: flat_store_dwordx4
    269 ; GCN-HSA: flat_store_dwordx4
    270 define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 {
    271   %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in
    272   %ext = zext <16 x i32> %ld to <16 x i64>
    273   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    274   ret void
    275 }
    276 
    277 ; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64:
    278 
    279 ; GCN: s_load_dwordx16
    280 ; GCN: s_load_dwordx16
    281 
    282 ; GCN-NOHSA: buffer_store_dwordx4
    283 ; GCN-NOHSA: buffer_store_dwordx4
    284 ; GCN-NOHSA: buffer_store_dwordx4
    285 ; GCN-NOHSA: buffer_store_dwordx4
    286 
    287 ; GCN-NOHSA: buffer_store_dwordx4
    288 ; GCN-NOHSA: buffer_store_dwordx4
    289 ; GCN-NOHSA: buffer_store_dwordx4
    290 ; GCN-NOHSA: buffer_store_dwordx4
    291 
    292 ; GCN-NOHSA: buffer_store_dwordx4
    293 ; GCN-NOHSA: buffer_store_dwordx4
    294 ; GCN-NOHSA: buffer_store_dwordx4
    295 ; GCN-NOHSA: buffer_store_dwordx4
    296 
    297 ; GCN-NOHSA: buffer_store_dwordx4
    298 ; GCN-NOHSA: buffer_store_dwordx4
    299 ; GCN-NOHSA: buffer_store_dwordx4
    300 ; GCN-NOHSA: buffer_store_dwordx4
    301 
    302 ; GCN-HSA: flat_store_dwordx4
    303 ; GCN-HSA: flat_store_dwordx4
    304 ; GCN-HSA: flat_store_dwordx4
    305 ; GCN-HSA: flat_store_dwordx4
    306 
    307 ; GCN-HSA: flat_store_dwordx4
    308 ; GCN-HSA: flat_store_dwordx4
    309 ; GCN-HSA: flat_store_dwordx4
    310 ; GCN-HSA: flat_store_dwordx4
    311 
    312 ; GCN-HSA: flat_store_dwordx4
    313 ; GCN-HSA: flat_store_dwordx4
    314 ; GCN-HSA: flat_store_dwordx4
    315 ; GCN-HSA: flat_store_dwordx4
    316 
    317 ; GCN-HSA: flat_store_dwordx4
    318 ; GCN-HSA: flat_store_dwordx4
    319 ; GCN-HSA: flat_store_dwordx4
    320 ; GCN-HSA: flat_store_dwordx4
    321 
    322 define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
    323   %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
    324   %ext = sext <32 x i32> %ld to <32 x i64>
    325   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    326   ret void
    327 }
    328 
    329 ; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64:
    330 ; GCN: s_load_dwordx16
    331 ; GCN: s_load_dwordx16
    332 
    333 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    334 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    335 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    336 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    337 
    338 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    339 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    340 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    341 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    342 
    343 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    344 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    345 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    346 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    347 
    348 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    349 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    350 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    351 ; GCN-NOHSA-DAG: buffer_store_dwordx4
    352 
    353 
    354 ; GCN-HSA-DAG: flat_store_dwordx4
    355 ; GCN-HSA-DAG: flat_store_dwordx4
    356 ; GCN-HSA-DAG: flat_store_dwordx4
    357 ; GCN-HSA-DAG: flat_store_dwordx4
    358 
    359 ; GCN-HSA-DAG: flat_store_dwordx4
    360 ; GCN-HSA-DAG: flat_store_dwordx4
    361 ; GCN-HSA-DAG: flat_store_dwordx4
    362 ; GCN-HSA-DAG: flat_store_dwordx4
    363 
    364 ; GCN-HSA-DAG: flat_store_dwordx4
    365 ; GCN-HSA-DAG: flat_store_dwordx4
    366 ; GCN-HSA-DAG: flat_store_dwordx4
    367 ; GCN-HSA-DAG: flat_store_dwordx4
    368 
    369 ; GCN-HSA-DAG: flat_store_dwordx4
    370 ; GCN-HSA-DAG: flat_store_dwordx4
    371 ; GCN-HSA-DAG: flat_store_dwordx4
    372 ; GCN-HSA-DAG: flat_store_dwordx4
    373 define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 {
    374   %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in
    375   %ext = zext <32 x i32> %ld to <32 x i64>
    376   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    377   ret void
    378 }
    379 
    380 attributes #0 = { nounwind }
    381