Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs< %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=cypress < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 
      5 ; FUNC-LABEL: {{^}}zextload_global_i32_to_i64:
      6 ; SI: buffer_load_dword v[[LO:[0-9]+]],
      7 ; SI: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
      8 ; SI: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
      9 define void @zextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     10   %a = load i32, i32 addrspace(1)* %in
     11   %ext = zext i32 %a to i64
     12   store i64 %ext, i64 addrspace(1)* %out
     13   ret void
     14 }
     15 
     16 ; FUNC-LABEL: {{^}}sextload_global_i32_to_i64:
     17 ; SI: buffer_load_dword [[LOAD:v[0-9]+]],
     18 ; SI: v_ashrrev_i32_e32 v{{[0-9]+}}, 31, [[LOAD]]
     19 ; SI: buffer_store_dwordx2
     20 define void @sextload_global_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(1)* %in) nounwind {
     21   %a = load i32, i32 addrspace(1)* %in
     22   %ext = sext i32 %a to i64
     23   store i64 %ext, i64 addrspace(1)* %out
     24   ret void
     25 }
     26 
     27 ; FUNC-LABEL: {{^}}zextload_global_v1i32_to_v1i64:
     28 ; SI: buffer_load_dword
     29 ; SI: buffer_store_dwordx2
     30 ; SI: s_endpgm
     31 define void @zextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
     32   %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
     33   %ext = zext <1 x i32> %load to <1 x i64>
     34   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
     35   ret void
     36 }
     37 
     38 ; FUNC-LABEL: {{^}}sextload_global_v1i32_to_v1i64:
     39 ; SI: buffer_load_dword
     40 ; SI: v_ashrrev_i32
     41 ; SI: buffer_store_dwordx2
     42 ; SI: s_endpgm
     43 define void @sextload_global_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(1)* nocapture %in) nounwind {
     44   %load = load <1 x i32>, <1 x i32> addrspace(1)* %in
     45   %ext = sext <1 x i32> %load to <1 x i64>
     46   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: {{^}}zextload_global_v2i32_to_v2i64:
     51 ; SI: buffer_load_dwordx2
     52 ; SI: buffer_store_dwordx4
     53 ; SI: s_endpgm
     54 define void @zextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
     55   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
     56   %ext = zext <2 x i32> %load to <2 x i64>
     57   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; FUNC-LABEL: {{^}}sextload_global_v2i32_to_v2i64:
     62 ; SI: buffer_load_dwordx2
     63 ; SI-DAG: v_ashrrev_i32
     64 ; SI-DAG: v_ashrrev_i32
     65 ; SI-DAG: buffer_store_dwordx4
     66 ; SI: s_endpgm
     67 define void @sextload_global_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(1)* nocapture %in) nounwind {
     68   %load = load <2 x i32>, <2 x i32> addrspace(1)* %in
     69   %ext = sext <2 x i32> %load to <2 x i64>
     70   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
     71   ret void
     72 }
     73 
     74 ; FUNC-LABEL: {{^}}zextload_global_v4i32_to_v4i64:
     75 ; SI: buffer_load_dwordx4
     76 ; SI: buffer_store_dwordx4
     77 ; SI: buffer_store_dwordx4
     78 ; SI: s_endpgm
     79 define void @zextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
     80   %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
     81   %ext = zext <4 x i32> %load to <4 x i64>
     82   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
     83   ret void
     84 }
     85 
     86 ; FUNC-LABEL: {{^}}sextload_global_v4i32_to_v4i64:
     87 ; SI: buffer_load_dwordx4
     88 ; SI-DAG: v_ashrrev_i32
     89 ; SI-DAG: v_ashrrev_i32
     90 ; SI-DAG: v_ashrrev_i32
     91 ; SI-DAG: v_ashrrev_i32
     92 ; SI-DAG: buffer_store_dwordx4
     93 ; SI-DAG: buffer_store_dwordx4
     94 ; SI: s_endpgm
     95 define void @sextload_global_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(1)* nocapture %in) nounwind {
     96   %load = load <4 x i32>, <4 x i32> addrspace(1)* %in
     97   %ext = sext <4 x i32> %load to <4 x i64>
     98   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
     99   ret void
    100 }
    101 
    102 ; FUNC-LABEL: {{^}}zextload_global_v8i32_to_v8i64:
    103 ; SI: buffer_load_dwordx4
    104 ; SI: buffer_load_dwordx4
    105 ; SI-DAG: buffer_store_dwordx4
    106 ; SI-DAG: buffer_store_dwordx4
    107 ; SI-DAG: buffer_store_dwordx4
    108 ; SI-DAG: buffer_store_dwordx4
    109 ; SI: s_endpgm
    110 define void @zextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
    111   %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
    112   %ext = zext <8 x i32> %load to <8 x i64>
    113   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    114   ret void
    115 }
    116 
    117 ; FUNC-LABEL: {{^}}sextload_global_v8i32_to_v8i64:
    118 ; SI: buffer_load_dwordx4
    119 ; SI: buffer_load_dwordx4
    120 
    121 ; SI-DAG: v_ashrrev_i32
    122 ; SI-DAG: v_ashrrev_i32
    123 ; SI-DAG: v_ashrrev_i32
    124 ; SI-DAG: v_ashrrev_i32
    125 ; SI-DAG: v_ashrrev_i32
    126 ; SI-DAG: v_ashrrev_i32
    127 ; SI-DAG: v_ashrrev_i32
    128 ; SI-DAG: v_ashrrev_i32
    129 ; SI-DAG: buffer_store_dwordx4
    130 ; SI-DAG: buffer_store_dwordx4
    131 ; SI-DAG: buffer_store_dwordx4
    132 ; SI-DAG: buffer_store_dwordx4
    133 ; SI: s_endpgm
    134 define void @sextload_global_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(1)* nocapture %in) nounwind {
    135   %load = load <8 x i32>, <8 x i32> addrspace(1)* %in
    136   %ext = sext <8 x i32> %load to <8 x i64>
    137   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    138   ret void
    139 }
    140 
    141 ; FUNC-LABEL: {{^}}sextload_global_v16i32_to_v16i64:
    142 ; SI: buffer_load_dwordx4
    143 ; SI: buffer_load_dwordx4
    144 ; SI: buffer_load_dwordx4
    145 ; SI: buffer_load_dwordx4
    146 
    147 ; SI-DAG: v_ashrrev_i32
    148 ; SI-DAG: v_ashrrev_i32
    149 ; SI-DAG: v_ashrrev_i32
    150 ; SI-DAG: v_ashrrev_i32
    151 ; SI-DAG: buffer_store_dwordx4
    152 
    153 ; SI-DAG: v_ashrrev_i32
    154 ; SI-DAG: v_ashrrev_i32
    155 ; SI-DAG: v_ashrrev_i32
    156 ; SI-DAG: v_ashrrev_i32
    157 ; SI-DAG: buffer_store_dwordx4
    158 
    159 ; SI-DAG: v_ashrrev_i32
    160 ; SI-DAG: v_ashrrev_i32
    161 ; SI-DAG: v_ashrrev_i32
    162 ; SI-DAG: v_ashrrev_i32
    163 ; SI-DAG: buffer_store_dwordx4
    164 
    165 ; SI-DAG: v_ashrrev_i32
    166 ; SI-DAG: v_ashrrev_i32
    167 ; SI-DAG: v_ashrrev_i32
    168 ; SI-DAG: v_ashrrev_i32
    169 ; SI-DAG: buffer_store_dwordx4
    170 ; SI: s_endpgm
    171 define void @sextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
    172   %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
    173   %ext = sext <16 x i32> %load to <16 x i64>
    174   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    175   ret void
    176 }
    177 
    178 ; FUNC-LABEL: {{^}}zextload_global_v16i32_to_v16i64
    179 ; SI: buffer_load_dwordx4
    180 ; SI: buffer_load_dwordx4
    181 ; SI: buffer_load_dwordx4
    182 ; SI: buffer_load_dwordx4
    183 
    184 ; SI: buffer_store_dwordx4
    185 ; SI: buffer_store_dwordx4
    186 ; SI: buffer_store_dwordx4
    187 ; SI: buffer_store_dwordx4
    188 ; SI: buffer_store_dwordx4
    189 ; SI: buffer_store_dwordx4
    190 ; SI: buffer_store_dwordx4
    191 ; SI: buffer_store_dwordx4
    192 ; SI: s_endpgm
    193 define void @zextload_global_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(1)* nocapture %in) nounwind {
    194   %load = load <16 x i32>, <16 x i32> addrspace(1)* %in
    195   %ext = zext <16 x i32> %load to <16 x i64>
    196   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    197   ret void
    198 }
    199 
    200 ; FUNC-LABEL: {{^}}sextload_global_v32i32_to_v32i64:
    201 ; SI: buffer_load_dwordx4
    202 ; SI: buffer_load_dwordx4
    203 ; SI: buffer_load_dwordx4
    204 ; SI: buffer_load_dwordx4
    205 ; SI: buffer_load_dwordx4
    206 ; SI: buffer_load_dwordx4
    207 ; SI: buffer_load_dwordx4
    208 ; SI: buffer_load_dwordx4
    209 
    210 
    211 ; SI-DAG: v_ashrrev_i32
    212 ; SI-DAG: v_ashrrev_i32
    213 ; SI-DAG: v_ashrrev_i32
    214 ; SI-DAG: v_ashrrev_i32
    215 ; SI-DAG: v_ashrrev_i32
    216 ; SI-DAG: v_ashrrev_i32
    217 ; SI-DAG: v_ashrrev_i32
    218 ; SI-DAG: v_ashrrev_i32
    219 ; SI-DAG: v_ashrrev_i32
    220 ; SI-DAG: v_ashrrev_i32
    221 ; SI-DAG: v_ashrrev_i32
    222 ; SI-DAG: v_ashrrev_i32
    223 ; SI-DAG: v_ashrrev_i32
    224 ; SI-DAG: v_ashrrev_i32
    225 ; SI-DAG: v_ashrrev_i32
    226 ; SI-DAG: v_ashrrev_i32
    227 ; SI-DAG: v_ashrrev_i32
    228 ; SI-DAG: v_ashrrev_i32
    229 ; SI-DAG: v_ashrrev_i32
    230 ; SI-DAG: v_ashrrev_i32
    231 ; SI-DAG: v_ashrrev_i32
    232 ; SI-DAG: v_ashrrev_i32
    233 ; SI-DAG: v_ashrrev_i32
    234 ; SI-DAG: v_ashrrev_i32
    235 ; SI-DAG: v_ashrrev_i32
    236 ; SI-DAG: v_ashrrev_i32
    237 ; SI-DAG: v_ashrrev_i32
    238 ; SI-DAG: v_ashrrev_i32
    239 ; SI-DAG: v_ashrrev_i32
    240 ; SI-DAG: v_ashrrev_i32
    241 ; SI-DAG: v_ashrrev_i32
    242 ; SI-DAG: v_ashrrev_i32
    243 
    244 ; SI-DAG: buffer_store_dwordx4
    245 ; SI-DAG: buffer_store_dwordx4
    246 ; SI-DAG: buffer_store_dwordx4
    247 ; SI-DAG: buffer_store_dwordx4
    248 
    249 ; SI-DAG: buffer_store_dwordx4
    250 ; SI-DAG: buffer_store_dwordx4
    251 ; SI-DAG: buffer_store_dwordx4
    252 ; SI-DAG: buffer_store_dwordx4
    253 
    254 ; SI-DAG: buffer_store_dwordx4
    255 ; SI-DAG: buffer_store_dwordx4
    256 ; SI-DAG: buffer_store_dwordx4
    257 ; SI-DAG: buffer_store_dwordx4
    258 
    259 ; SI-DAG: buffer_store_dwordx4
    260 ; SI-DAG: buffer_store_dwordx4
    261 ; SI-DAG: buffer_store_dwordx4
    262 ; SI-DAG: buffer_store_dwordx4
    263 
    264 ; SI: s_endpgm
    265 define void @sextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
    266   %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
    267   %ext = sext <32 x i32> %load to <32 x i64>
    268   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    269   ret void
    270 }
    271 
    272 ; FUNC-LABEL: {{^}}zextload_global_v32i32_to_v32i64:
    273 ; SI: buffer_load_dwordx4
    274 ; SI: buffer_load_dwordx4
    275 ; SI: buffer_load_dwordx4
    276 ; SI: buffer_load_dwordx4
    277 ; SI: buffer_load_dwordx4
    278 ; SI: buffer_load_dwordx4
    279 ; SI: buffer_load_dwordx4
    280 ; SI: buffer_load_dwordx4
    281 
    282 ; SI-DAG: buffer_store_dwordx4
    283 ; SI-DAG: buffer_store_dwordx4
    284 ; SI-DAG: buffer_store_dwordx4
    285 ; SI-DAG: buffer_store_dwordx4
    286 
    287 ; SI-DAG: buffer_store_dwordx4
    288 ; SI-DAG: buffer_store_dwordx4
    289 ; SI-DAG: buffer_store_dwordx4
    290 ; SI-DAG: buffer_store_dwordx4
    291 
    292 ; SI-DAG: buffer_store_dwordx4
    293 ; SI-DAG: buffer_store_dwordx4
    294 ; SI-DAG: buffer_store_dwordx4
    295 ; SI-DAG: buffer_store_dwordx4
    296 
    297 ; SI-DAG: buffer_store_dwordx4
    298 ; SI-DAG: buffer_store_dwordx4
    299 ; SI-DAG: buffer_store_dwordx4
    300 ; SI-DAG: buffer_store_dwordx4
    301 
    302 ; SI: s_endpgm
    303 define void @zextload_global_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(1)* nocapture %in) nounwind {
    304   %load = load <32 x i32>, <32 x i32> addrspace(1)* %in
    305   %ext = zext <32 x i32> %load to <32 x i64>
    306   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    307   ret void
    308 }
    309