Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      3 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
      4 
      5 ; Testing for ds_read/write_128
      6 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
      7 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
      8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
      9 
     10 ; FUNC-LABEL: {{^}}load_f32_local:
     11 ; SICIVI: s_mov_b32 m0
     12 ; GFX9-NOT: m0
     13 ; GCN: ds_read_b32
     14 
     15 ; EG: LDS_READ_RET
     16 define amdgpu_kernel void @load_f32_local(float addrspace(1)* %out, float addrspace(3)* %in) #0 {
     17 entry:
     18   %tmp0 = load float, float addrspace(3)* %in
     19   store float %tmp0, float addrspace(1)* %out
     20   ret void
     21 }
     22 
     23 ; FUNC-LABEL: {{^}}load_v2f32_local:
     24 ; SICIVI: s_mov_b32 m0
     25 ; GFX9-NOT: m0
     26 
     27 ; GCN: ds_read_b64
     28 
     29 ; EG: LDS_READ_RET
     30 ; EG: LDS_READ_RET
     31 define amdgpu_kernel void @load_v2f32_local(<2 x float> addrspace(1)* %out, <2 x float> addrspace(3)* %in) #0 {
     32 entry:
     33   %tmp0 = load <2 x float>, <2 x float> addrspace(3)* %in
     34   store <2 x float> %tmp0, <2 x float> addrspace(1)* %out
     35   ret void
     36 }
     37 
     38 ; FIXME: should this do a read2_b64?
     39 ; FUNC-LABEL: {{^}}local_load_v3f32:
     40 ; SICIVI: s_mov_b32 m0
     41 ; GFX9-NOT: m0
     42 
     43 ; GCN-DAG: ds_read_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8
     44 ; GCN-DAG: ds_read_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+$}}
     45 ; GCN: s_waitcnt
     46 ; GCN-DAG: ds_write_b64
     47 ; GCN-DAG: ds_write_b32 v{{[0-9]+}}, v{{[0-9]+}} offset:8{{$}}
     48 
     49 ; EG: LDS_READ_RET
     50 ; EG: LDS_READ_RET
     51 ; EG: LDS_READ_RET
     52 define amdgpu_kernel void @local_load_v3f32(<3 x float> addrspace(3)* %out, <3 x float> addrspace(3)* %in) #0 {
     53 entry:
     54   %tmp0 = load <3 x float>, <3 x float> addrspace(3)* %in
     55   store <3 x float> %tmp0, <3 x float> addrspace(3)* %out
     56   ret void
     57 }
     58 
     59 ; FUNC-LABEL: {{^}}local_load_v4f32:
     60 ; SICIVI: s_mov_b32 m0
     61 ; GFX9-NOT: m0
     62 
     63 ; GCN: ds_read2_b64
     64 
     65 ; EG: LDS_READ_RET
     66 ; EG: LDS_READ_RET
     67 ; EG: LDS_READ_RET
     68 ; EG: LDS_READ_RET
     69 define amdgpu_kernel void @local_load_v4f32(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) #0 {
     70 entry:
     71   %tmp0 = load <4 x float>, <4 x float> addrspace(3)* %in
     72   store <4 x float> %tmp0, <4 x float> addrspace(3)* %out
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}local_load_v8f32:
     77 ; SICIVI: s_mov_b32 m0
     78 ; GFX9-NOT: m0
     79 
     80 ; GCN: ds_read2_b64
     81 ; GCN: ds_read2_b64
     82 
     83 ; EG: LDS_READ_RET
     84 ; EG: LDS_READ_RET
     85 ; EG: LDS_READ_RET
     86 ; EG: LDS_READ_RET
     87 ; EG: LDS_READ_RET
     88 ; EG: LDS_READ_RET
     89 ; EG: LDS_READ_RET
     90 ; EG: LDS_READ_RET
     91 define amdgpu_kernel void @local_load_v8f32(<8 x float> addrspace(3)* %out, <8 x float> addrspace(3)* %in) #0 {
     92 entry:
     93   %tmp0 = load <8 x float>, <8 x float> addrspace(3)* %in
     94   store <8 x float> %tmp0, <8 x float> addrspace(3)* %out
     95   ret void
     96 }
     97 
     98 ; FUNC-LABEL: {{^}}local_load_v16f32:
     99 ; SICIVI: s_mov_b32 m0
    100 ; GFX9-NOT: m0
    101 
    102 ; GCN: ds_read2_b64
    103 ; GCN: ds_read2_b64
    104 ; GCN: ds_read2_b64
    105 ; GCN: ds_read2_b64
    106 
    107 ; EG: LDS_READ_RET
    108 ; EG: LDS_READ_RET
    109 ; EG: LDS_READ_RET
    110 ; EG: LDS_READ_RET
    111 ; EG: LDS_READ_RET
    112 ; EG: LDS_READ_RET
    113 ; EG: LDS_READ_RET
    114 ; EG: LDS_READ_RET
    115 ; EG: LDS_READ_RET
    116 ; EG: LDS_READ_RET
    117 ; EG: LDS_READ_RET
    118 ; EG: LDS_READ_RET
    119 ; EG: LDS_READ_RET
    120 ; EG: LDS_READ_RET
    121 ; EG: LDS_READ_RET
    122 ; EG: LDS_READ_RET
    123 define amdgpu_kernel void @local_load_v16f32(<16 x float> addrspace(3)* %out, <16 x float> addrspace(3)* %in) #0 {
    124 entry:
    125   %tmp0 = load <16 x float>, <16 x float> addrspace(3)* %in
    126   store <16 x float> %tmp0, <16 x float> addrspace(3)* %out
    127   ret void
    128 }
    129 
    130 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
    131 ; FUNC-LABEL: {{^}}local_v4f32_to_128:
    132 
    133 ; SI-NOT: ds_read_b128
    134 ; SI-NOT: ds_write_b128
    135 
    136 ; CIVI: ds_read_b128
    137 ; CIVI: ds_write_b128
    138 
    139 ; EG: LDS_READ_RET
    140 ; EG: LDS_READ_RET
    141 ; EG: LDS_READ_RET
    142 ; EG: LDS_READ_RET
    143 define amdgpu_kernel void @local_v4f32_to_128(<4 x float> addrspace(3)* %out, <4 x float> addrspace(3)* %in) {
    144   %ld = load <4 x float>, <4 x float> addrspace(3)* %in, align 16
    145   store <4 x float> %ld, <4 x float> addrspace(3)* %out, align 16
    146   ret void
    147 }
    148 
    149 attributes #0 = { nounwind }
    150