Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      2 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      3 ; RUN: llc -march=amdgcn -mtriple=amdgcn---amdgiz -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
      4 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
      5 ; RUN: llc -march=r600 -mtriple=r600---amdgiz -mcpu=cayman < %s | FileCheck -check-prefixes=CM,FUNC %s
      6 
      7 ; FUNC-LABEL: {{^}}store_local_i1:
      8 ; SICIVI: s_mov_b32 m0
      9 ; GFX9-NOT: m0
     10 
     11 ; EG: LDS_BYTE_WRITE
     12 
     13 ; CM: LDS_BYTE_WRITE
     14 
     15 ; GCN: ds_write_b8
     16 define amdgpu_kernel void @store_local_i1(i1 addrspace(3)* %out) {
     17 entry:
     18   store i1 true, i1 addrspace(3)* %out
     19   ret void
     20 }
     21 
     22 ; FUNC-LABEL: {{^}}store_local_i8:
     23 ; SICIVI: s_mov_b32 m0
     24 ; GFX9-NOT: m0
     25 
     26 ; EG: LDS_BYTE_WRITE
     27 
     28 ; CM: LDS_BYTE_WRITE
     29 
     30 ; GCN: ds_write_b8
     31 define amdgpu_kernel void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
     32   store i8 %in, i8 addrspace(3)* %out
     33   ret void
     34 }
     35 
     36 ; FUNC-LABEL: {{^}}store_local_i16:
     37 ; SICIVI: s_mov_b32 m0
     38 ; GFX9-NOT: m0
     39 
     40 ; EG: LDS_SHORT_WRITE
     41 
     42 ; CM: LDS_SHORT_WRITE
     43 
     44 ; GCN: ds_write_b16
     45 define amdgpu_kernel void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
     46   store i16 %in, i16 addrspace(3)* %out
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: {{^}}store_local_v2i16:
     51 ; SICIVI: s_mov_b32 m0
     52 ; GFX9-NOT: m0
     53 
     54 ; EG: LDS_WRITE
     55 
     56 ; CM: LDS_WRITE
     57 
     58 ; GCN: ds_write_b32
     59 define amdgpu_kernel void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
     60 entry:
     61   store <2 x i16> %in, <2 x i16> addrspace(3)* %out
     62   ret void
     63 }
     64 
     65 ; FUNC-LABEL: {{^}}store_local_v4i8:
     66 ; SICIVI: s_mov_b32 m0
     67 ; GFX9-NOT: m0
     68 
     69 ; EG: LDS_WRITE
     70 
     71 ; CM: LDS_WRITE
     72 
     73 ; GCN: ds_write_b32
     74 define amdgpu_kernel void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
     75 entry:
     76   store <4 x i8> %in, <4 x i8> addrspace(3)* %out
     77   ret void
     78 }
     79 
     80 ; FUNC-LABEL: {{^}}store_local_v4i8_unaligned:
     81 ; SICIVI: s_mov_b32 m0
     82 ; GFX9-NOT: m0
     83 
     84 ; EG: LDS_BYTE_WRITE
     85 ; EG: LDS_BYTE_WRITE
     86 ; EG: LDS_BYTE_WRITE
     87 ; EG: LDS_BYTE_WRITE
     88 ; EG-NOT: LDS_WRITE
     89 
     90 ; CM: LDS_BYTE_WRITE
     91 ; CM: LDS_BYTE_WRITE
     92 ; CM: LDS_BYTE_WRITE
     93 ; CM: LDS_BYTE_WRITE
     94 ; CM-NOT: LDS_WRITE
     95 
     96 ; GCN: ds_write_b8
     97 ; GCN: ds_write_b8
     98 ; GCN: ds_write_b8
     99 ; GCN: ds_write_b8
    100 define amdgpu_kernel void @store_local_v4i8_unaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
    101 entry:
    102   store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 1
    103   ret void
    104 }
    105 
    106 ; FUNC-LABEL: {{^}}store_local_v4i8_halfaligned:
    107 ; SICIVI: s_mov_b32 m0
    108 ; GFX9-NOT: m0
    109 
    110 ; EG: LDS_SHORT_WRITE
    111 ; EG: LDS_SHORT_WRITE
    112 ; EG-NOT: LDS_WRITE
    113 
    114 ; CM: LDS_SHORT_WRITE
    115 ; CM: LDS_SHORT_WRITE
    116 ; CM-NOT: LDS_WRITE
    117 
    118 ; GCN: ds_write_b16
    119 ; GCN: ds_write_b16
    120 define amdgpu_kernel void @store_local_v4i8_halfaligned(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
    121 entry:
    122   store <4 x i8> %in, <4 x i8> addrspace(3)* %out, align 2
    123   ret void
    124 }
    125 
    126 ; FUNC-LABEL: {{^}}store_local_v2i32:
    127 ; SICIVI: s_mov_b32 m0
    128 ; GFX9-NOT: m0
    129 
    130 ; EG: LDS_WRITE
    131 ; EG: LDS_WRITE
    132 ; EG-NOT: LDS_WRITE
    133 
    134 ; CM: LDS_WRITE
    135 ; CM: LDS_WRITE
    136 ; CM-NOT: LDS_WRITE
    137 
    138 ; GCN: ds_write_b64
    139 define amdgpu_kernel void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
    140 entry:
    141   store <2 x i32> %in, <2 x i32> addrspace(3)* %out
    142   ret void
    143 }
    144 
    145 ; FUNC-LABEL: {{^}}store_local_v4i32:
    146 ; SICIVI: s_mov_b32 m0
    147 ; GFX9-NOT: m0
    148 
    149 ; EG: LDS_WRITE
    150 ; EG: LDS_WRITE
    151 ; EG: LDS_WRITE
    152 ; EG: LDS_WRITE
    153 
    154 ; CM: LDS_WRITE
    155 ; CM: LDS_WRITE
    156 ; CM: LDS_WRITE
    157 ; CM: LDS_WRITE
    158 
    159 ; GCN: ds_write2_b64
    160 define amdgpu_kernel void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
    161 entry:
    162   store <4 x i32> %in, <4 x i32> addrspace(3)* %out
    163   ret void
    164 }
    165 
    166 ; FUNC-LABEL: {{^}}store_local_v4i32_align4:
    167 ; SICIVI: s_mov_b32 m0
    168 ; GFX9-NOT: m0
    169 
    170 ; EG: LDS_WRITE
    171 ; EG: LDS_WRITE
    172 ; EG: LDS_WRITE
    173 ; EG: LDS_WRITE
    174 
    175 ; CM: LDS_WRITE
    176 ; CM: LDS_WRITE
    177 ; CM: LDS_WRITE
    178 ; CM: LDS_WRITE
    179 
    180 ; GCN: ds_write2_b32
    181 ; GCN: ds_write2_b32
    182 define amdgpu_kernel void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
    183 entry:
    184   store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
    185   ret void
    186 }
    187 
    188 ; FUNC-LABEL: {{^}}store_local_i64_i8:
    189 ; SICIVI: s_mov_b32 m0
    190 ; GFX9-NOT: m0
    191 
    192 ; EG: LDS_BYTE_WRITE
    193 ; GCN: ds_write_b8
    194 define amdgpu_kernel void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
    195 entry:
    196   %0 = trunc i64 %in to i8
    197   store i8 %0, i8 addrspace(3)* %out
    198   ret void
    199 }
    200 
    201 ; FUNC-LABEL: {{^}}store_local_i64_i16:
    202 ; SICIVI: s_mov_b32 m0
    203 ; GFX9-NOT: m0
    204 
    205 ; EG: LDS_SHORT_WRITE
    206 ; GCN: ds_write_b16
    207 define amdgpu_kernel void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
    208 entry:
    209   %0 = trunc i64 %in to i16
    210   store i16 %0, i16 addrspace(3)* %out
    211   ret void
    212 }
    213