Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=SI -check-prefix=FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=CM -check-prefix=FUNC %s
      5 
      6 ;===------------------------------------------------------------------------===;
      7 ; Global Address Space
      8 ;===------------------------------------------------------------------------===;
      9 ; FUNC-LABEL: {{^}}store_i1:
     10 ; EG: MEM_RAT MSKOR
     11 ; SI: buffer_store_byte
     12 define void @store_i1(i1 addrspace(1)* %out) {
     13 entry:
     14   store i1 true, i1 addrspace(1)* %out
     15   ret void
     16 }
     17 
     18 ; i8 store
     19 ; FUNC-LABEL: {{^}}store_i8:
     20 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
     21 
     22 ; IG 0: Get the byte index and truncate the value
     23 ; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
     24 ; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
     25 ; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
     26 ; EG-NEXT: 3(4.203895e-45), 255(3.573311e-43)
     27 
     28 
     29 ; IG 1: Truncate the calculated the shift amount for the mask
     30 
     31 ; IG 2: Shift the value and the mask
     32 ; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
     33 ; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
     34 ; EG-NEXT: 255
     35 ; IG 3: Initialize the Y and Z channels to zero
     36 ;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
     37 ; EG: MOV T[[RW_GPR]].Y, 0.0
     38 ; EG: MOV * T[[RW_GPR]].Z, 0.0
     39 
     40 ; SI: buffer_store_byte
     41 
     42 define void @store_i8(i8 addrspace(1)* %out, i8 %in) {
     43 entry:
     44   store i8 %in, i8 addrspace(1)* %out
     45   ret void
     46 }
     47 
     48 ; i16 store
     49 ; FUNC-LABEL: {{^}}store_i16:
     50 ; EG: MEM_RAT MSKOR T[[RW_GPR:[0-9]]].XW, T{{[0-9]}}.X
     51 
     52 ; IG 0: Get the byte index and truncate the value
     53 
     54 
     55 ; EG: AND_INT * T{{[0-9]}}.[[BI_CHAN:[XYZW]]], KC0[2].Y, literal.x
     56 ; EG-NEXT: 3(4.203895e-45),
     57 
     58 ; EG: LSHL T{{[0-9]}}.[[SHIFT_CHAN:[XYZW]]], PV.[[BI_CHAN]], literal.x
     59 ; EG: AND_INT * T{{[0-9]}}.[[TRUNC_CHAN:[XYZW]]], KC0[2].Z, literal.y
     60 
     61 ; EG-NEXT: 3(4.203895e-45), 65535(9.183409e-41)
     62 ; IG 1: Truncate the calculated the shift amount for the mask
     63 
     64 ; IG 2: Shift the value and the mask
     65 ; EG: LSHL T[[RW_GPR]].X, PS, PV.[[SHIFT_CHAN]]
     66 ; EG: LSHL * T[[RW_GPR]].W, literal.x, PV.[[SHIFT_CHAN]]
     67 ; EG-NEXT: 65535
     68 ; IG 3: Initialize the Y and Z channels to zero
     69 ;       XXX: An optimal scheduler should merge this into one of the prevous IGs.
     70 ; EG: MOV T[[RW_GPR]].Y, 0.0
     71 ; EG: MOV * T[[RW_GPR]].Z, 0.0
     72 
     73 ; SI: buffer_store_short
     74 define void @store_i16(i16 addrspace(1)* %out, i16 %in) {
     75 entry:
     76   store i16 %in, i16 addrspace(1)* %out
     77   ret void
     78 }
     79 
     80 ; FUNC-LABEL: {{^}}store_i24:
     81 ; SI: s_lshr_b32 s{{[0-9]+}}, s{{[0-9]+}}, 16
     82 ; SI-DAG: buffer_store_byte
     83 ; SI-DAG: buffer_store_short
     84 define void @store_i24(i24 addrspace(1)* %out, i24 %in) {
     85 entry:
     86   store i24 %in, i24 addrspace(1)* %out
     87   ret void
     88 }
     89 
     90 ; FUNC-LABEL: {{^}}store_i25:
     91 ; SI: s_and_b32 [[AND:s[0-9]+]], s{{[0-9]+}}, 0x1ffffff{{$}}
     92 ; SI: v_mov_b32_e32 [[VAND:v[0-9]+]], [[AND]]
     93 ; SI: buffer_store_dword [[VAND]]
     94 define void @store_i25(i25 addrspace(1)* %out, i25 %in) {
     95 entry:
     96   store i25 %in, i25 addrspace(1)* %out
     97   ret void
     98 }
     99 
    100 ; FUNC-LABEL: {{^}}store_v2i8:
    101 ; EG: MEM_RAT MSKOR
    102 ; EG-NOT: MEM_RAT MSKOR
    103 
    104 ; SI: buffer_store_short
    105 define void @store_v2i8(<2 x i8> addrspace(1)* %out, <2 x i32> %in) {
    106 entry:
    107   %0 = trunc <2 x i32> %in to <2 x i8>
    108   store <2 x i8> %0, <2 x i8> addrspace(1)* %out
    109   ret void
    110 }
    111 
    112 
    113 ; FUNC-LABEL: {{^}}store_v2i16:
    114 ; EG: MEM_RAT_CACHELESS STORE_RAW
    115 
    116 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    117 
    118 ; SI: buffer_store_dword
    119 define void @store_v2i16(<2 x i16> addrspace(1)* %out, <2 x i32> %in) {
    120 entry:
    121   %0 = trunc <2 x i32> %in to <2 x i16>
    122   store <2 x i16> %0, <2 x i16> addrspace(1)* %out
    123   ret void
    124 }
    125 
    126 ; FUNC-LABEL: {{^}}store_v4i8:
    127 ; EG: MEM_RAT_CACHELESS STORE_RAW
    128 
    129 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    130 
    131 ; SI: buffer_store_dword
    132 define void @store_v4i8(<4 x i8> addrspace(1)* %out, <4 x i32> %in) {
    133 entry:
    134   %0 = trunc <4 x i32> %in to <4 x i8>
    135   store <4 x i8> %0, <4 x i8> addrspace(1)* %out
    136   ret void
    137 }
    138 
    139 ; floating-point store
    140 ; FUNC-LABEL: {{^}}store_f32:
    141 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+\.X, T[0-9]+\.X}}, 1
    142 
    143 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+\.X, T[0-9]+\.X}}
    144 
    145 ; SI: buffer_store_dword
    146 
    147 define void @store_f32(float addrspace(1)* %out, float %in) {
    148   store float %in, float addrspace(1)* %out
    149   ret void
    150 }
    151 
    152 ; FUNC-LABEL: {{^}}store_v4i16:
    153 ; MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW
    154 
    155 ; SI: buffer_store_dwordx2
    156 define void @store_v4i16(<4 x i16> addrspace(1)* %out, <4 x i32> %in) {
    157 entry:
    158   %0 = trunc <4 x i32> %in to <4 x i16>
    159   store <4 x i16> %0, <4 x i16> addrspace(1)* %out
    160   ret void
    161 }
    162 
    163 ; vec2 floating-point stores
    164 ; FUNC-LABEL: {{^}}store_v2f32:
    165 ; EG: MEM_RAT_CACHELESS STORE_RAW
    166 
    167 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    168 
    169 ; SI: buffer_store_dwordx2
    170 
    171 define void @store_v2f32(<2 x float> addrspace(1)* %out, float %a, float %b) {
    172 entry:
    173   %0 = insertelement <2 x float> <float 0.0, float 0.0>, float %a, i32 0
    174   %1 = insertelement <2 x float> %0, float %b, i32 1
    175   store <2 x float> %1, <2 x float> addrspace(1)* %out
    176   ret void
    177 }
    178 
    179 ; FUNC-LABEL: {{^}}store_v4i32:
    180 ; EG: MEM_RAT_CACHELESS STORE_RAW
    181 ; EG-NOT: MEM_RAT_CACHELESS STORE_RAW
    182 
    183 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    184 ; CM-NOT: MEM_RAT_CACHELESS STORE_DWORD
    185 
    186 ; SI: buffer_store_dwordx4
    187 define void @store_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> %in) {
    188 entry:
    189   store <4 x i32> %in, <4 x i32> addrspace(1)* %out
    190   ret void
    191 }
    192 
    193 ; FUNC-LABEL: {{^}}store_i64_i8:
    194 ; EG: MEM_RAT MSKOR
    195 ; SI: buffer_store_byte
    196 define void @store_i64_i8(i8 addrspace(1)* %out, i64 %in) {
    197 entry:
    198   %0 = trunc i64 %in to i8
    199   store i8 %0, i8 addrspace(1)* %out
    200   ret void
    201 }
    202 
    203 ; FUNC-LABEL: {{^}}store_i64_i16:
    204 ; EG: MEM_RAT MSKOR
    205 ; SI: buffer_store_short
    206 define void @store_i64_i16(i16 addrspace(1)* %out, i64 %in) {
    207 entry:
    208   %0 = trunc i64 %in to i16
    209   store i16 %0, i16 addrspace(1)* %out
    210   ret void
    211 }
    212 
    213 ;===------------------------------------------------------------------------===;
    214 ; Local Address Space
    215 ;===------------------------------------------------------------------------===;
    216 
    217 ; FUNC-LABEL: {{^}}store_local_i1:
    218 ; EG: LDS_BYTE_WRITE
    219 ; SI: ds_write_b8
    220 define void @store_local_i1(i1 addrspace(3)* %out) {
    221 entry:
    222   store i1 true, i1 addrspace(3)* %out
    223   ret void
    224 }
    225 
    226 ; FUNC-LABEL: {{^}}store_local_i8:
    227 ; EG: LDS_BYTE_WRITE
    228 
    229 ; SI: ds_write_b8
    230 define void @store_local_i8(i8 addrspace(3)* %out, i8 %in) {
    231   store i8 %in, i8 addrspace(3)* %out
    232   ret void
    233 }
    234 
    235 ; FUNC-LABEL: {{^}}store_local_i16:
    236 ; EG: LDS_SHORT_WRITE
    237 
    238 ; SI: ds_write_b16
    239 define void @store_local_i16(i16 addrspace(3)* %out, i16 %in) {
    240   store i16 %in, i16 addrspace(3)* %out
    241   ret void
    242 }
    243 
    244 ; FUNC-LABEL: {{^}}store_local_v2i16:
    245 ; EG: LDS_WRITE
    246 
    247 ; CM: LDS_WRITE
    248 
    249 ; SI: ds_write_b32
    250 define void @store_local_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> %in) {
    251 entry:
    252   store <2 x i16> %in, <2 x i16> addrspace(3)* %out
    253   ret void
    254 }
    255 
    256 ; FUNC-LABEL: {{^}}store_local_v4i8:
    257 ; EG: LDS_WRITE
    258 
    259 ; CM: LDS_WRITE
    260 
    261 ; SI: ds_write_b32
    262 define void @store_local_v4i8(<4 x i8> addrspace(3)* %out, <4 x i8> %in) {
    263 entry:
    264   store <4 x i8> %in, <4 x i8> addrspace(3)* %out
    265   ret void
    266 }
    267 
    268 ; FUNC-LABEL: {{^}}store_local_v2i32:
    269 ; EG: LDS_WRITE
    270 ; EG: LDS_WRITE
    271 
    272 ; CM: LDS_WRITE
    273 ; CM: LDS_WRITE
    274 
    275 ; SI: ds_write_b64
    276 define void @store_local_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> %in) {
    277 entry:
    278   store <2 x i32> %in, <2 x i32> addrspace(3)* %out
    279   ret void
    280 }
    281 
    282 ; FUNC-LABEL: {{^}}store_local_v4i32:
    283 ; EG: LDS_WRITE
    284 ; EG: LDS_WRITE
    285 ; EG: LDS_WRITE
    286 ; EG: LDS_WRITE
    287 
    288 ; CM: LDS_WRITE
    289 ; CM: LDS_WRITE
    290 ; CM: LDS_WRITE
    291 ; CM: LDS_WRITE
    292 
    293 ; SI: ds_write2_b64
    294 define void @store_local_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
    295 entry:
    296   store <4 x i32> %in, <4 x i32> addrspace(3)* %out
    297   ret void
    298 }
    299 
    300 ; FUNC-LABEL: {{^}}store_local_v4i32_align4:
    301 ; EG: LDS_WRITE
    302 ; EG: LDS_WRITE
    303 ; EG: LDS_WRITE
    304 ; EG: LDS_WRITE
    305 
    306 ; CM: LDS_WRITE
    307 ; CM: LDS_WRITE
    308 ; CM: LDS_WRITE
    309 ; CM: LDS_WRITE
    310 
    311 ; SI: ds_write2_b32
    312 ; SI: ds_write2_b32
    313 define void @store_local_v4i32_align4(<4 x i32> addrspace(3)* %out, <4 x i32> %in) {
    314 entry:
    315   store <4 x i32> %in, <4 x i32> addrspace(3)* %out, align 4
    316   ret void
    317 }
    318 
    319 ; FUNC-LABEL: {{^}}store_local_i64_i8:
    320 ; EG: LDS_BYTE_WRITE
    321 ; SI: ds_write_b8
    322 define void @store_local_i64_i8(i8 addrspace(3)* %out, i64 %in) {
    323 entry:
    324   %0 = trunc i64 %in to i8
    325   store i8 %0, i8 addrspace(3)* %out
    326   ret void
    327 }
    328 
    329 ; FUNC-LABEL: {{^}}store_local_i64_i16:
    330 ; EG: LDS_SHORT_WRITE
    331 ; SI: ds_write_b16
    332 define void @store_local_i64_i16(i16 addrspace(3)* %out, i64 %in) {
    333 entry:
    334   %0 = trunc i64 %in to i16
    335   store i16 %0, i16 addrspace(3)* %out
    336   ret void
    337 }
    338 
    339 ; The stores in this function are combined by the optimizer to create a
    340 ; 64-bit store with 32-bit alignment.  This is legal for SI and the legalizer
    341 ; should not try to split the 64-bit store back into 2 32-bit stores.
    342 ;
    343 ; Evergreen / Northern Islands don't support 64-bit stores yet, so there should
    344 ; be two 32-bit stores.
    345 
    346 ; FUNC-LABEL: {{^}}vecload2:
    347 ; EG: MEM_RAT_CACHELESS STORE_RAW
    348 
    349 ; CM: MEM_RAT_CACHELESS STORE_DWORD
    350 
    351 ; SI: buffer_store_dwordx2
    352 define void @vecload2(i32 addrspace(1)* nocapture %out, i32 addrspace(2)* nocapture %mem) #0 {
    353 entry:
    354   %0 = load i32, i32 addrspace(2)* %mem, align 4
    355   %arrayidx1.i = getelementptr inbounds i32, i32 addrspace(2)* %mem, i64 1
    356   %1 = load i32, i32 addrspace(2)* %arrayidx1.i, align 4
    357   store i32 %0, i32 addrspace(1)* %out, align 4
    358   %arrayidx1 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
    359   store i32 %1, i32 addrspace(1)* %arrayidx1, align 4
    360   ret void
    361 }
    362 
    363 ; When i128 was a legal type this program generated cannot select errors:
    364 
    365 ; FUNC-LABEL: {{^}}"i128-const-store":
    366 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 1
    367 
    368 ; CM: MEM_RAT_CACHELESS STORE_DWORD T{{[0-9]+}}, T{{[0-9]+}}.X
    369 
    370 ; SI: buffer_store_dwordx4
    371 define void @i128-const-store(i32 addrspace(1)* %out) {
    372 entry:
    373   store i32 1, i32 addrspace(1)* %out, align 4
    374   %arrayidx2 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 1
    375   store i32 1, i32 addrspace(1)* %arrayidx2, align 4
    376   %arrayidx4 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 2
    377   store i32 2, i32 addrspace(1)* %arrayidx4, align 4
    378   %arrayidx6 = getelementptr inbounds i32, i32 addrspace(1)* %out, i64 3
    379   store i32 2, i32 addrspace(1)* %arrayidx6, align 4
    380   ret void
    381 }
    382 
    383 attributes #0 = { nounwind }
    384