Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 
      4 declare void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* nocapture, i8 addrspace(3)* nocapture, i32, i32, i1) nounwind
      5 declare void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* nocapture, i8 addrspace(1)* nocapture, i64, i32, i1) nounwind
      6 
      7 
      8 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align1:
      9 ; SI: ds_read_u8
     10 ; SI: ds_read_u8
     11 ; SI: ds_read_u8
     12 ; SI: ds_read_u8
     13 ; SI: ds_read_u8
     14 ; SI: ds_read_u8
     15 ; SI: ds_read_u8
     16 ; SI: ds_read_u8
     17 
     18 ; SI: ds_read_u8
     19 ; SI: ds_read_u8
     20 ; SI: ds_read_u8
     21 ; SI: ds_read_u8
     22 ; SI: ds_read_u8
     23 ; SI: ds_read_u8
     24 ; SI: ds_read_u8
     25 ; SI: ds_read_u8
     26 
     27 ; SI: ds_read_u8
     28 ; SI: ds_read_u8
     29 ; SI: ds_read_u8
     30 ; SI: ds_read_u8
     31 ; SI: ds_read_u8
     32 ; SI: ds_read_u8
     33 ; SI: ds_read_u8
     34 ; SI: ds_read_u8
     35 
     36 ; SI: ds_read_u8
     37 ; SI: ds_read_u8
     38 ; SI: ds_read_u8
     39 ; SI: ds_read_u8
     40 ; SI: ds_read_u8
     41 ; SI: ds_read_u8
     42 ; SI: ds_read_u8
     43 ; SI: ds_read_u8
     44 
     45 ; SI: ds_write_b8
     46 ; SI: ds_write_b8
     47 ; SI: ds_write_b8
     48 ; SI: ds_write_b8
     49 ; SI: ds_write_b8
     50 ; SI: ds_write_b8
     51 ; SI: ds_write_b8
     52 ; SI: ds_write_b8
     53 
     54 ; SI: ds_write_b8
     55 ; SI: ds_write_b8
     56 ; SI: ds_write_b8
     57 ; SI: ds_write_b8
     58 ; SI: ds_write_b8
     59 ; SI: ds_write_b8
     60 ; SI: ds_write_b8
     61 ; SI: ds_write_b8
     62 
     63 ; SI: ds_write_b8
     64 ; SI: ds_write_b8
     65 ; SI: ds_write_b8
     66 ; SI: ds_write_b8
     67 ; SI: ds_write_b8
     68 ; SI: ds_write_b8
     69 ; SI: ds_write_b8
     70 ; SI: ds_write_b8
     71 
     72 ; SI: ds_write_b8
     73 ; SI: ds_write_b8
     74 ; SI: ds_write_b8
     75 ; SI: ds_write_b8
     76 ; SI: ds_write_b8
     77 ; SI: ds_write_b8
     78 ; SI: ds_write_b8
     79 ; SI: ds_write_b8
     80 
     81 ; SI: s_endpgm
     82 define void @test_small_memcpy_i64_lds_to_lds_align1(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
     83   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
     84   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
     85   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 1, i1 false) nounwind
     86   ret void
     87 }
     88 
     89 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align2:
     90 ; SI: ds_read_u16
     91 ; SI: ds_read_u16
     92 ; SI: ds_read_u16
     93 ; SI: ds_read_u16
     94 ; SI: ds_read_u16
     95 ; SI: ds_read_u16
     96 ; SI: ds_read_u16
     97 ; SI: ds_read_u16
     98 
     99 ; SI: ds_read_u16
    100 ; SI: ds_read_u16
    101 ; SI: ds_read_u16
    102 ; SI: ds_read_u16
    103 ; SI: ds_read_u16
    104 ; SI: ds_read_u16
    105 ; SI: ds_read_u16
    106 ; SI: ds_read_u16
    107 
    108 ; SI: ds_write_b16
    109 ; SI: ds_write_b16
    110 ; SI: ds_write_b16
    111 ; SI: ds_write_b16
    112 ; SI: ds_write_b16
    113 ; SI: ds_write_b16
    114 ; SI: ds_write_b16
    115 ; SI: ds_write_b16
    116 
    117 ; SI: ds_write_b16
    118 ; SI: ds_write_b16
    119 ; SI: ds_write_b16
    120 ; SI: ds_write_b16
    121 ; SI: ds_write_b16
    122 ; SI: ds_write_b16
    123 ; SI: ds_write_b16
    124 ; SI: ds_write_b16
    125 
    126 ; SI: s_endpgm
    127 define void @test_small_memcpy_i64_lds_to_lds_align2(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
    128   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
    129   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
    130   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 2, i1 false) nounwind
    131   ret void
    132 }
    133 
    134 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align4:
    135 ; SI: ds_read2_b32
    136 ; SI: ds_read2_b32
    137 ; SI: ds_read2_b32
    138 ; SI: ds_read2_b32
    139 
    140 ; SI: ds_write2_b32
    141 ; SI: ds_write2_b32
    142 ; SI: ds_write2_b32
    143 ; SI: ds_write2_b32
    144 
    145 ; SI: s_endpgm
    146 define void @test_small_memcpy_i64_lds_to_lds_align4(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
    147   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
    148   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
    149   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 4, i1 false) nounwind
    150   ret void
    151 }
    152 
    153 ; FIXME: Use 64-bit ops
    154 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_lds_to_lds_align8:
    155 
    156 ; SI: ds_read_b64
    157 ; SI: ds_read_b64
    158 ; SI: ds_read_b64
    159 ; SI: ds_read_b64
    160 
    161 ; SI: ds_write_b64
    162 ; SI: ds_write_b64
    163 ; SI: ds_write_b64
    164 ; SI: ds_write_b64
    165 
    166 ; SI-DAG: s_endpgm
    167 define void @test_small_memcpy_i64_lds_to_lds_align8(i64 addrspace(3)* noalias %out, i64 addrspace(3)* noalias %in) nounwind {
    168   %bcin = bitcast i64 addrspace(3)* %in to i8 addrspace(3)*
    169   %bcout = bitcast i64 addrspace(3)* %out to i8 addrspace(3)*
    170   call void @llvm.memcpy.p3i8.p3i8.i32(i8 addrspace(3)* %bcout, i8 addrspace(3)* %bcin, i32 32, i32 8, i1 false) nounwind
    171   ret void
    172 }
    173 
    174 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align1:
    175 ; SI-DAG: buffer_load_ubyte
    176 ; SI-DAG: buffer_store_byte
    177 ; SI-DAG: buffer_load_ubyte
    178 ; SI-DAG: buffer_store_byte
    179 ; SI-DAG: buffer_load_ubyte
    180 ; SI-DAG: buffer_store_byte
    181 ; SI-DAG: buffer_load_ubyte
    182 ; SI-DAG: buffer_store_byte
    183 ; SI-DAG: buffer_load_ubyte
    184 ; SI-DAG: buffer_store_byte
    185 ; SI-DAG: buffer_load_ubyte
    186 ; SI-DAG: buffer_store_byte
    187 ; SI-DAG: buffer_load_ubyte
    188 ; SI-DAG: buffer_store_byte
    189 ; SI-DAG: buffer_load_ubyte
    190 ; SI-DAG: buffer_store_byte
    191 
    192 ; SI-DAG: buffer_load_ubyte
    193 ; SI-DAG: buffer_store_byte
    194 ; SI-DAG: buffer_load_ubyte
    195 ; SI-DAG: buffer_store_byte
    196 ; SI-DAG: buffer_load_ubyte
    197 ; SI-DAG: buffer_store_byte
    198 ; SI-DAG: buffer_load_ubyte
    199 ; SI-DAG: buffer_store_byte
    200 ; SI-DAG: buffer_load_ubyte
    201 ; SI-DAG: buffer_store_byte
    202 ; SI-DAG: buffer_load_ubyte
    203 ; SI-DAG: buffer_store_byte
    204 ; SI-DAG: buffer_load_ubyte
    205 ; SI-DAG: buffer_store_byte
    206 ; SI-DAG: buffer_load_ubyte
    207 ; SI-DAG: buffer_store_byte
    208 
    209 ; SI-DAG: buffer_load_ubyte
    210 ; SI-DAG: buffer_store_byte
    211 ; SI-DAG: buffer_load_ubyte
    212 ; SI-DAG: buffer_store_byte
    213 ; SI-DAG: buffer_load_ubyte
    214 ; SI-DAG: buffer_store_byte
    215 ; SI-DAG: buffer_load_ubyte
    216 ; SI-DAG: buffer_store_byte
    217 ; SI-DAG: buffer_load_ubyte
    218 ; SI-DAG: buffer_store_byte
    219 ; SI-DAG: buffer_load_ubyte
    220 ; SI-DAG: buffer_store_byte
    221 ; SI-DAG: buffer_load_ubyte
    222 ; SI-DAG: buffer_store_byte
    223 ; SI-DAG: buffer_load_ubyte
    224 ; SI-DAG: buffer_store_byte
    225 
    226 ; SI-DAG: buffer_load_ubyte
    227 ; SI-DAG: buffer_store_byte
    228 ; SI-DAG: buffer_load_ubyte
    229 ; SI-DAG: buffer_store_byte
    230 ; SI-DAG: buffer_load_ubyte
    231 ; SI-DAG: buffer_store_byte
    232 ; SI-DAG: buffer_load_ubyte
    233 ; SI-DAG: buffer_store_byte
    234 ; SI-DAG: buffer_load_ubyte
    235 ; SI-DAG: buffer_store_byte
    236 ; SI-DAG: buffer_load_ubyte
    237 ; SI-DAG: buffer_store_byte
    238 ; SI-DAG: buffer_load_ubyte
    239 ; SI-DAG: buffer_store_byte
    240 ; SI-DAG: buffer_load_ubyte
    241 ; SI-DAG: buffer_store_byte
    242 
    243 ; SI: s_endpgm
    244 define void @test_small_memcpy_i64_global_to_global_align1(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    245   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
    246   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
    247   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 1, i1 false) nounwind
    248   ret void
    249 }
    250 
    251 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align2:
    252 ; SI-DAG: buffer_load_ushort
    253 ; SI-DAG: buffer_load_ushort
    254 ; SI-DAG: buffer_load_ushort
    255 ; SI-DAG: buffer_load_ushort
    256 ; SI-DAG: buffer_load_ushort
    257 ; SI-DAG: buffer_load_ushort
    258 ; SI-DAG: buffer_load_ushort
    259 ; SI-DAG: buffer_load_ushort
    260 ; SI-DAG: buffer_load_ushort
    261 ; SI-DAG: buffer_load_ushort
    262 ; SI-DAG: buffer_load_ushort
    263 ; SI-DAG: buffer_load_ushort
    264 ; SI-DAG: buffer_load_ushort
    265 ; SI-DAG: buffer_load_ushort
    266 ; SI-DAG: buffer_load_ushort
    267 ; SI-DAG: buffer_load_ushort
    268 
    269 ; SI-DAG: buffer_store_short
    270 ; SI-DAG: buffer_store_short
    271 ; SI-DAG: buffer_store_short
    272 ; SI-DAG: buffer_store_short
    273 ; SI-DAG: buffer_store_short
    274 ; SI-DAG: buffer_store_short
    275 ; SI-DAG: buffer_store_short
    276 ; SI-DAG: buffer_store_short
    277 ; SI-DAG: buffer_store_short
    278 ; SI-DAG: buffer_store_short
    279 ; SI-DAG: buffer_store_short
    280 ; SI-DAG: buffer_store_short
    281 ; SI-DAG: buffer_store_short
    282 ; SI-DAG: buffer_store_short
    283 ; SI-DAG: buffer_store_short
    284 ; SI-DAG: buffer_store_short
    285 
    286 ; SI: s_endpgm
    287 define void @test_small_memcpy_i64_global_to_global_align2(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    288   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
    289   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
    290   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 2, i1 false) nounwind
    291   ret void
    292 }
    293 
    294 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align4:
    295 ; SI: buffer_load_dwordx4
    296 ; SI: buffer_load_dwordx4
    297 ; SI: buffer_store_dwordx4
    298 ; SI: buffer_store_dwordx4
    299 ; SI: s_endpgm
    300 define void @test_small_memcpy_i64_global_to_global_align4(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    301   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
    302   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
    303   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 4, i1 false) nounwind
    304   ret void
    305 }
    306 
    307 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align8:
    308 ; SI: buffer_load_dwordx4
    309 ; SI: buffer_load_dwordx4
    310 ; SI: buffer_store_dwordx4
    311 ; SI: buffer_store_dwordx4
    312 ; SI: s_endpgm
    313 define void @test_small_memcpy_i64_global_to_global_align8(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    314   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
    315   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
    316   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 8, i1 false) nounwind
    317   ret void
    318 }
    319 
    320 ; FUNC-LABEL: {{^}}test_small_memcpy_i64_global_to_global_align16:
    321 ; SI: buffer_load_dwordx4
    322 ; SI: buffer_load_dwordx4
    323 ; SI: buffer_store_dwordx4
    324 ; SI: buffer_store_dwordx4
    325 ; SI: s_endpgm
    326 define void @test_small_memcpy_i64_global_to_global_align16(i64 addrspace(1)* noalias %out, i64 addrspace(1)* noalias %in) nounwind {
    327   %bcin = bitcast i64 addrspace(1)* %in to i8 addrspace(1)*
    328   %bcout = bitcast i64 addrspace(1)* %out to i8 addrspace(1)*
    329   call void @llvm.memcpy.p1i8.p1i8.i64(i8 addrspace(1)* %bcout, i8 addrspace(1)* %bcin, i64 32, i32 16, i1 false) nounwind
    330   ret void
    331 }
    332