Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SIVI -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GFX9 -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood -verify-machineinstrs < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman -verify-machineinstrs < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
      6 
      7 ; FUNC-LABEL: {{^}}store_i1:
      8 ; EG: MEM_RAT MSKOR
      9 ; EG-NOT: MEM_RAT MSKOR
     10 
     11 ; CM: MEM_RAT MSKOR
     12 ; CM-NOT: MEM_RAT MSKOR
     13 
     14 ; SIVI: buffer_store_byte
     15 ; GFX9: global_store_byte
     16 define amdgpu_kernel void @store_i1(i1 addrspace(1)* %out) {
     17 entry:
     18   store i1 true, i1 addrspace(1)* %out
     19   ret void
     20 }
     21 
     22 ; i8 store
     23 ; FUNC-LABEL: {{^}}store_i8:
     24 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
     25 ; EG-NOT: MEM_RAT MSKOR
     26 
     27 ; EG: VTX_READ_8
     28 ; EG: AND_INT
     29 ; EG: AND_INT
     30 ; EG: LSHL
     31 ; EG: LSHL
     32 ; EG: LSHL
     33 
     34 ; SIVI: buffer_store_byte
     35 ; GFX9: global_store_byte
     36 define amdgpu_kernel void @store_i8(i8 addrspace(1)* %out, i8 %in) {
     37 entry:
     38   store i8 %in, i8 addrspace(1)* %out
     39   ret void
     40 }
     41 
     42 ; i16 store
     43 ; FUNC-LABEL: {{^}}store_i16:
     44 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
     45 ; EG-NOT: MEM_RAT MSKOR
     46 
     47 ; EG: VTX_READ_16
     48 ; EG: AND_INT
     49 ; EG: AND_INT
     50 ; EG: LSHL
     51 ; EG: LSHL
     52 ; EG: LSHL
     53 
     54 
     55 ; SIVI: buffer_store_short
     56 ; GFX9: global_store_short
     57 define amdgpu_kernel void @store_i16(i16 addrspace(1)* %out, i16 %in) {
     58 entry:
     59   store i16 %in, i16 addrspace(1)* %out
     60   ret void
     61 }
     62 
     63 ; FUNC-LABEL: {{^}}store_i24:
     64 ; SIVI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
     65 ; SIVI-DAG: buffer_store_byte
     66 ; SIVI-DAG: buffer_store_short
     67 
     68 ; GFX9-DAG: global_store_byte_d16_hi v{{\[[0-9]:[0-9]+\]}}, v{{[0-9]+}}, off offset:2
     69 ; GFX9-DAG: global_store_short
     70 
     71 ; EG: MEM_RAT MSKOR
     72 ; EG: MEM_RAT MSKOR
     73 define amdgpu_kernel void @store_i24(i24 addrspace(1)* %out, i24 %in) {
     74 entry:
     75   store i24 %in, i24 addrspace(1)* %out
     76   ret void
     77 }
     78 
     79 ; FUNC-LABEL: {{^}}store_i25:
     80 ; GCN: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}}
     81 ; GCN: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]]
     82 ; SIVI: buffer_store_dword [[VAND]]
     83 ; GFX9: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, [[VAND]]
     84 
     85 ; EG: MEM_RAT_CACHELESS STORE_RAW
     86 ; EG-NOT: MEM_RAT
     87 
     88 ; CM: MEM_RAT_CACHELESS STORE_DWORD
     89 ; CM-NOT: MEM_RAT
     90 define amdgpu_kernel void @store_i25(i25 addrspace(1)* %out, i25 %in) {
     91 entry:
     92   store i25 %in, i25 addrspace(1)* %out
     93   ret void
     94 }
     95 
     96 ; FUNC-LABEL: {{^}}store_v2i8:
     97 ; v2i8 is naturally 2B aligned
     98 ; EG: MEM_RAT MSKOR
     99 ; EG-NOT: MEM_RAT MSKOR
    100 
    101 ; CM: MEM_RAT MSKOR
    102 ; CM-NOT: MEM_RAT MSKOR
    103 
    104 ; SIVI: buffer_store_short
    105 ; GFX9: global_store_short
    106 define amdgpu_kernel void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
    107 entry:
    108   %0 = trunc <2 x i32> %in to <2 x i8>
    109   store <2 x i8> %0, <2 x i8> addrspace(1)* %out
    110   ret void
    111 }
    112 
    113 ; FUNC-LABEL: {{^}}store_v2i8_unaligned:
    114 ; EG: MEM_RAT MSKOR
    115 ; EG: MEM_RAT MSKOR
    116 ; EG-NOT: MEM_RAT MSKOR
    117 
    118 ; CM: MEM_RAT MSKOR
    119 ; CM: MEM_RAT MSKOR
    120 ; CM-NOT: MEM_RAT MSKOR
    121 
    122 ; SI: buffer_store_byte
    123 define amdgpu_kernel void @store_v2i8_unaligned(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
    124 entry:
    125   %0 = trunc <2 x i32> %in to <2 x i8>
    126   store <2 x i8> %0, <2 x i8> addrspace(1)* %out, align 1
    127   ret void
    128 }
    129 
    130 
    131 ; FUNC-LABEL: {{^}}store_v2i16:
    132 ; EG: MEM_RAT_CACHELESS STORE_RAW
    133 
    134 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    135 
    136 ; SIVI: buffer_store_dword
    137 ; GFX9: global_store_dword
    138 define amdgpu_kernel void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
    139 entry:
    140   %0 = trunc <2 x i32> %in to <2 x i16>
    141   store <2 x i16> %0, <2 x i16> addrspace(1)* %out
    142   ret void
    143 }
    144 
    145 ; FUNC-LABEL: {{^}}store_v2i16_unaligned:
    146 ; EG: MEM_RAT MSKOR
    147 ; EG: MEM_RAT MSKOR
    148 ; EG-NOT: MEM_RAT MSKOR
    149 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    150 
    151 ; CM: MEM_RAT MSKOR
    152 ; CM: MEM_RAT MSKOR
    153 ; CM-NOT: MEM_RAT MSKOR
    154 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    155 
    156 ; SIVI: buffer_store_short
    157 ; SIVI: buffer_store_short
    158 
    159 ; GFX9: global_store_short
    160 ; GFX9: global_store_short
    161 define amdgpu_kernel void @store_v2i16_unaligned(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
    162 entry:
    163   %0 = trunc <2 x i32> %in to <2 x i16>
    164   store <2 x i16> %0, <2 x i16> addrspace(1)* %out, align 2
    165   ret void
    166 }
    167 
    168 ; FUNC-LABEL: {{^}}store_v4i8:
    169 ; EG: MEM_RAT_CACHELESS STORE_RAW
    170 
    171 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    172 
    173 ; SIVI: buffer_store_dword
    174 ; GFX9: global_store_dword
    175 define amdgpu_kernel void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
    176 entry:
    177   %0 = trunc <4 x i32> %in to <4 x i8>
    178   store <4 x i8> %0, <4 x i8> addrspace(1)* %out
    179   ret void
    180 }
    181 
    182 ; FUNC-LABEL: {{^}}store_v4i8_unaligned:
    183 ; EG: MEM_RAT MSKOR
    184 ; EG: MEM_RAT MSKOR
    185 ; EG: MEM_RAT MSKOR
    186 ; EG: MEM_RAT MSKOR
    187 ; EG-NOT: MEM_RAT MSKOR
    188 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    189 
    190 ; CM: MEM_RAT MSKOR
    191 ; CM: MEM_RAT MSKOR
    192 ; CM: MEM_RAT MSKOR
    193 ; CM: MEM_RAT MSKOR
    194 ; CM-NOT: MEM_RAT MSKOR
    195 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    196 
    197 ; SI: buffer_store_byte
    198 ; SI: buffer_store_byte
    199 ; SI: buffer_store_byte
    200 ; SI: buffer_store_byte
    201 ; SI-NOT: buffer_store_dword
    202 define amdgpu_kernel void @store_v4i8_unaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
    203 entry:
    204   %0 = trunc <4 x i32> %in to <4 x i8>
    205   store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 1
    206   ret void
    207 }
    208 
    209 ; FUNC-LABEL: {{^}}store_v4i8_halfaligned:
    210 ; EG: MEM_RAT MSKOR
    211 ; EG: MEM_RAT MSKOR
    212 ; EG-NOT: MEM_RAT MSKOR
    213 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    214 
    215 ; CM: MEM_RAT MSKOR
    216 ; CM: MEM_RAT MSKOR
    217 ; CM-NOT: MEM_RAT MSKOR
    218 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    219 
    220 ; SI: buffer_store_short
    221 ; SI: buffer_store_short
    222 ; SI-NOT: buffer_store_dword
    223 define amdgpu_kernel void @store_v4i8_halfaligned(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
    224 entry:
    225   %0 = trunc <4 x i32> %in to <4 x i8>
    226   store <4 x i8> %0, <4 x i8> addrspace(1)* %out, align 2
    227   ret void
    228 }
    229 
    230 ; floating-point store
    231 ; FUNC-LABEL: {{^}}store_f32:
    232 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
    233 
    234 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
    235 
    236 ; SIVI: buffer_store_dword
    237 ; GFX9: global_store_dword
    238 
    239 define amdgpu_kernel void @store_f32(float addrspace(1)* %out, float %in) {
    240   store float %in, float addrspace(1)* %out
    241   ret void
    242 }
    243 
    244 ; FUNC-LABEL: {{^}}store_v4i16:
    245 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY
    246 
    247 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}
    248 
    249 ; SIVI: buffer_store_dwordx2
    250 ; GFX9: global_store_dwordx2
    251 define amdgpu_kernel void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
    252 entry:
    253   %0 = trunc <4 x i32> %in to <4 x i16>
    254   store <4 x i16> %0, <4 x i16> addrspace(1)* %out
    255   ret void
    256 }
    257 
    258 ; vec2 floating-point stores
    259 ; FUNC-LABEL: {{^}}store_v2f32:
    260 ; EG: MEM_RAT_CACHELESS STORE_RAW
    261 
    262 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    263 
    264 ; SIVI: buffer_store_dwordx2
    265 ; GFX9: global_store_dwordx2
    266 
    267 define amdgpu_kernel void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
    268 entry:
    269   %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
    270   %1 = insertelement <2 x float> %0, float %b, i32 1
    271   store <2 x float> %1, <2 x float> addrspace(1)* %out
    272   ret void
    273 }
    274 
    275 ; FUNC-LABEL: {{^}}store_v3i32:
    276 ; SIVI-DAG: buffer_store_dwordx2
    277 ; SIVI-DAG: buffer_store_dword v
    278 
    279 ; GFX9-DAG: global_store_dwordx2
    280 ; GFX9-DAG: global_store_dword v
    281 
    282 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.[XYZW]}}, {{T[0-9]+\.[XYZW]}},
    283 ; EG-DAG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XY}}, {{T[0-9]+\.[XYZW]}},
    284 define amdgpu_kernel void @store_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> %a) nounwind {
    285   store <3 x i32> %a, <3 x i32> addrspace(1)* %out, align 16
    286   ret void
    287 }
    288 
    289 ; FUNC-LABEL: {{^}}store_v4i32:
    290 ; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}}
    291 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    292 
    293 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    294 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    295 
    296 ; SIVI: buffer_store_dwordx4
    297 ; GFX9: global_store_dwordx4
    298 define amdgpu_kernel void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
    299 entry:
    300   store <4 x i32> %in, <4 x i32> addrspace(1)* %out
    301   ret void
    302 }
    303 
    304 ; FUNC-LABEL: {{^}}store_v4i32_unaligned:
    305 ; EG: MEM_RAT_CACHELESS STORE_RAW {{T[0-9]+\.XYZW}}
    306 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    307 
    308 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    309 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    310 
    311 ; SIVI: buffer_store_dwordx4
    312 ; GFX9: global_store_dwordx4
    313 define amdgpu_kernel void @store_v4i32_unaligned(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
    314 entry:
    315   store <4 x i32> %in, <4 x i32> addrspace(1)* %out, align 4
    316   ret void
    317 }
    318 
    319 ; v4f32 store
    320 ; FUNC-LABEL: {{^}}store_v4f32:
    321 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XYZW, T[0-9]+\.X}}, 1
    322 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    323 
    324 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    325 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    326 
    327 ; SIVI: buffer_store_dwordx4
    328 ; GFX9: global_store_dwordx4
    329 define amdgpu_kernel void @store_v4f32(<4 x float> addrspace(1)* %out, <4 x float> addrspace(1)* %in) {
    330   %1 = load <4 x float>, <4 x float> addrspace(1) * %in
    331   store <4 x float> %1, <4 x float> addrspace(1)* %out
    332   ret void
    333 }
    334 
    335 ; FUNC-LABEL: {{^}}store_i64_i8:
    336 ; EG: MEM_RAT MSKOR
    337 
    338 ; CM: MEM_RAT MSKOR
    339 
    340 ; SIVI: buffer_store_byte
    341 ; GFX9: global_store_byte
    342 define amdgpu_kernel void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
    343 entry:
    344   %0 = trunc i64 %in to i8
    345   store i8 %0, i8 addrspace(1)* %out
    346   ret void
    347 }
    348 
    349 ; FUNC-LABEL: {{^}}store_i64_i16:
    350 ; EG: MEM_RAT MSKOR
    351 ; SIVI: buffer_store_short
    352 ; GFX9: global_store_short
    353 define amdgpu_kernel void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
    354 entry:
    355   %0 = trunc i64 %in to i16
    356   store i16 %0, i16 addrspace(1)* %out
    357   ret void
    358 }
    359 
    360 ; The stores in this function are combined by the optimizer to create a
    361 ; 64-bit store with 32-bit alignment.  This is legal and the legalizer
    362 ; should not try to split the 64-bit store back into 2 32-bit stores.
    363 
    364 ; FUNC-LABEL: {{^}}vecload2:
    365 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.XY, T[0-9]+\.X}}, 1
    366 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    367 
    368 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    369 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    370 
    371 ; SIVI: buffer_store_dwordx2
    372 ; GFX9: global_store_dwordx2
    373 define amdgpu_kernel void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(4)* nocapture %mem) #0 {
    374 entry:
    375   %0 = load i32, i32 addrspace(4)* %mem, align 4
    376   %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(4)* %mem, i64 1
    377   %1 = load i32, i32 addrspace(4)* %arrayidx1.i, align 4
    378   store i32 %0, i32 addrspace(1)* %out, align 4
    379   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
    380   store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
    381   ret void
    382 }
    383 
    384 ; When i128 was a legal type this program generated cannot select errors:
    385 
    386 ; FUNC-LABEL: {{^}}"i128-const-store":
    387 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1
    388 
    389 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X
    390 
    391 ; SIVI: buffer_store_dwordx4
    392 ; GFX9: global_store_dwordx4
    393 define amdgpu_kernel void @i128-const-store(i32 addrspace(1)* %out) {
    394 entry:
    395   store i32 1, i32 addrspace(1)* %out, align 4
    396   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
    397   store i32 1, i32 addrspace(1)* %arrayidx2, align 4
    398   %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
    399   store i32 2, i32 addrspace(1)* %arrayidx4, align 4
    400   %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
    401   store i32 2, i32 addrspace(1)* %arrayidx6, align 4
    402   ret void
    403 }
    404 
    405 attributes #0 = { nounwind }
    406