Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI,FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 
      6 ; Testing for ds_read/write_128
      7 ; RUN: llc -march=amdgcn -mcpu=tahiti -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=SI,FUNC %s
      8 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
      9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
     10 
     11 ; FUNC-LABEL: {{^}}local_load_i32:
     12 ; GCN-NOT: s_wqm_b64
     13 ; SICIVI: s_mov_b32 m0, -1
     14 ; GFX9-NOT: m0
     15 ; GCN: ds_read_b32
     16 
     17 ; EG: LDS_READ_RET
     18 define amdgpu_kernel void @local_load_i32(i32 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
     19 entry:
     20   %ld = load i32, i32 addrspace(3)* %in
     21   store i32 %ld, i32 addrspace(3)* %out
     22   ret void
     23 }
     24 
     25 ; FUNC-LABEL: {{^}}local_load_v2i32:
     26 ; SICIVI: s_mov_b32 m0, -1
     27 ; GFX9-NOT: m0
     28 
     29 ; GCN: ds_read_b64
     30 define amdgpu_kernel void @local_load_v2i32(<2 x i32> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
     31 entry:
     32   %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
     33   store <2 x i32> %ld, <2 x i32> addrspace(3)* %out
     34   ret void
     35 }
     36 
     37 ; FUNC-LABEL: {{^}}local_load_v3i32:
     38 ; SICIVI: s_mov_b32 m0, -1
     39 ; GFX9-NOT: m0
     40 
     41 ; GCN-DAG: ds_read_b64
     42 ; GCN-DAG: ds_read_b32
     43 define amdgpu_kernel void @local_load_v3i32(<3 x i32> addrspace(3)* %out, <3 x i32> addrspace(3)* %in) #0 {
     44 entry:
     45   %ld = load <3 x i32>, <3 x i32> addrspace(3)* %in
     46   store <3 x i32> %ld, <3 x i32> addrspace(3)* %out
     47   ret void
     48 }
     49 
     50 ; FUNC-LABEL: {{^}}local_load_v4i32:
     51 ; SICIVI: s_mov_b32 m0, -1
     52 ; GFX9-NOT: m0
     53 
     54 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
     55 
     56 define amdgpu_kernel void @local_load_v4i32(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
     57 entry:
     58   %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
     59   store <4 x i32> %ld, <4 x i32> addrspace(3)* %out
     60   ret void
     61 }
     62 
     63 ; FUNC-LABEL: {{^}}local_load_v8i32:
     64 ; SICIVI: s_mov_b32 m0, -1
     65 ; GFX9-NOT: m0
     66 
     67 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
     68 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
     69 define amdgpu_kernel void @local_load_v8i32(<8 x i32> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
     70 entry:
     71   %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
     72   store <8 x i32> %ld, <8 x i32> addrspace(3)* %out
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}local_load_v16i32:
     77 ; SICIVI: s_mov_b32 m0, -1
     78 ; GFX9-NOT: m0
     79 
     80 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7{{$}}
     81 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5{{$}}
     82 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}}
     83 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}}
     84 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7
     85 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5
     86 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3
     87 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1
     88 define amdgpu_kernel void @local_load_v16i32(<16 x i32> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
     89 entry:
     90   %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
     91   store <16 x i32> %ld, <16 x i32> addrspace(3)* %out
     92   ret void
     93 }
     94 
     95 ; FUNC-LABEL: {{^}}local_zextload_i32_to_i64:
     96 ; SICIVI: s_mov_b32 m0, -1
     97 ; GFX9-NOT: m0
     98 
     99 define amdgpu_kernel void @local_zextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
    100   %ld = load i32, i32 addrspace(3)* %in
    101   %ext = zext i32 %ld to i64
    102   store i64 %ext, i64 addrspace(3)* %out
    103   ret void
    104 }
    105 
    106 ; FUNC-LABEL: {{^}}local_sextload_i32_to_i64:
    107 ; SICIVI: s_mov_b32 m0, -1
    108 ; GFX9-NOT: m0
    109 
    110 define amdgpu_kernel void @local_sextload_i32_to_i64(i64 addrspace(3)* %out, i32 addrspace(3)* %in) #0 {
    111   %ld = load i32, i32 addrspace(3)* %in
    112   %ext = sext i32 %ld to i64
    113   store i64 %ext, i64 addrspace(3)* %out
    114   ret void
    115 }
    116 
    117 ; FUNC-LABEL: {{^}}local_zextload_v1i32_to_v1i64:
    118 ; SICIVI: s_mov_b32 m0, -1
    119 ; GFX9-NOT: m0
    120 
    121 define amdgpu_kernel void @local_zextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
    122   %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
    123   %ext = zext <1 x i32> %ld to <1 x i64>
    124   store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
    125   ret void
    126 }
    127 
    128 ; FUNC-LABEL: {{^}}local_sextload_v1i32_to_v1i64:
    129 ; SICIVI: s_mov_b32 m0, -1
    130 ; GFX9-NOT: m0
    131 
    132 define amdgpu_kernel void @local_sextload_v1i32_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i32> addrspace(3)* %in) #0 {
    133   %ld = load <1 x i32>, <1 x i32> addrspace(3)* %in
    134   %ext = sext <1 x i32> %ld to <1 x i64>
    135   store <1 x i64> %ext, <1 x i64> addrspace(3)* %out
    136   ret void
    137 }
    138 
    139 ; FUNC-LABEL: {{^}}local_zextload_v2i32_to_v2i64:
    140 ; SICIVI: s_mov_b32 m0, -1
    141 ; GFX9-NOT: m0
    142 
    143 define amdgpu_kernel void @local_zextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
    144   %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
    145   %ext = zext <2 x i32> %ld to <2 x i64>
    146   store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
    147   ret void
    148 }
    149 
    150 ; FUNC-LABEL: {{^}}local_sextload_v2i32_to_v2i64:
    151 ; SICIVI: s_mov_b32 m0, -1
    152 ; GFX9-NOT: m0
    153 
    154 define amdgpu_kernel void @local_sextload_v2i32_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i32> addrspace(3)* %in) #0 {
    155   %ld = load <2 x i32>, <2 x i32> addrspace(3)* %in
    156   %ext = sext <2 x i32> %ld to <2 x i64>
    157   store <2 x i64> %ext, <2 x i64> addrspace(3)* %out
    158   ret void
    159 }
    160 
    161 ; FUNC-LABEL: {{^}}local_zextload_v4i32_to_v4i64:
    162 ; SICIVI: s_mov_b32 m0, -1
    163 ; GFX9-NOT: m0
    164 
    165 define amdgpu_kernel void @local_zextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
    166   %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
    167   %ext = zext <4 x i32> %ld to <4 x i64>
    168   store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
    169   ret void
    170 }
    171 
    172 ; FUNC-LABEL: {{^}}local_sextload_v4i32_to_v4i64:
    173 ; SICIVI: s_mov_b32 m0, -1
    174 ; GFX9-NOT: m0
    175 
    176 define amdgpu_kernel void @local_sextload_v4i32_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) #0 {
    177   %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in
    178   %ext = sext <4 x i32> %ld to <4 x i64>
    179   store <4 x i64> %ext, <4 x i64> addrspace(3)* %out
    180   ret void
    181 }
    182 
    183 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
    184 ; FUNC-LABEL: {{^}}local_v4i32_to_128:
    185 
    186 ; SI-NOT: ds_read_b128
    187 ; SI-NOT: ds_write_b128
    188 
    189 ; CIVI: ds_read_b128
    190 ; CIVI: ds_write_b128
    191 
    192 ; EG: LDS_READ_RET
    193 ; EG: LDS_READ_RET
    194 ; EG: LDS_READ_RET
    195 ; EG: LDS_READ_RET
    196 define amdgpu_kernel void @local_v4i32_to_128(<4 x i32> addrspace(3)* %out, <4 x i32> addrspace(3)* %in) {
    197   %ld = load <4 x i32>, <4 x i32> addrspace(3)* %in, align 16
    198   store <4 x i32> %ld, <4 x i32> addrspace(3)* %out, align 16
    199   ret void
    200 }
    201 
    202 ; FUNC-LABEL: {{^}}local_zextload_v8i32_to_v8i64:
    203 ; SICIVI: s_mov_b32 m0, -1
    204 ; GFX9-NOT: m0
    205 
    206 define amdgpu_kernel void @local_zextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
    207   %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
    208   %ext = zext <8 x i32> %ld to <8 x i64>
    209   store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
    210   ret void
    211 }
    212 
    213 ; FUNC-LABEL: {{^}}local_sextload_v8i32_to_v8i64:
    214 ; SICIVI: s_mov_b32 m0, -1
    215 ; GFX9-NOT: m0
    216 
    217 define amdgpu_kernel void @local_sextload_v8i32_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i32> addrspace(3)* %in) #0 {
    218   %ld = load <8 x i32>, <8 x i32> addrspace(3)* %in
    219   %ext = sext <8 x i32> %ld to <8 x i64>
    220   store <8 x i64> %ext, <8 x i64> addrspace(3)* %out
    221   ret void
    222 }
    223 
    224 ; FUNC-LABEL: {{^}}local_sextload_v16i32_to_v16i64:
    225 ; SICIVI: s_mov_b32 m0, -1
    226 ; GFX9-NOT: m0
    227 
    228 define amdgpu_kernel void @local_sextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
    229   %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
    230   %ext = sext <16 x i32> %ld to <16 x i64>
    231   store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
    232   ret void
    233 }
    234 
    235 ; FUNC-LABEL: {{^}}local_zextload_v16i32_to_v16i64
    236 ; SICIVI: s_mov_b32 m0, -1
    237 ; GFX9-NOT: m0
    238 
    239 define amdgpu_kernel void @local_zextload_v16i32_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i32> addrspace(3)* %in) #0 {
    240   %ld = load <16 x i32>, <16 x i32> addrspace(3)* %in
    241   %ext = zext <16 x i32> %ld to <16 x i64>
    242   store <16 x i64> %ext, <16 x i64> addrspace(3)* %out
    243   ret void
    244 }
    245 
    246 ; FUNC-LABEL: {{^}}local_sextload_v32i32_to_v32i64:
    247 ; SICIVI: s_mov_b32 m0, -1
    248 ; GFX9-NOT: m0
    249 
    250 define amdgpu_kernel void @local_sextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
    251   %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
    252   %ext = sext <32 x i32> %ld to <32 x i64>
    253   store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
    254   ret void
    255 }
    256 
    257 ; FUNC-LABEL: {{^}}local_zextload_v32i32_to_v32i64:
    258 ; SICIVI: s_mov_b32 m0, -1
    259 ; GFX9-NOT: m0
    260 
    261 define amdgpu_kernel void @local_zextload_v32i32_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i32> addrspace(3)* %in) #0 {
    262   %ld = load <32 x i32>, <32 x i32> addrspace(3)* %in
    263   %ext = zext <32 x i32> %ld to <32 x i64>
    264   store <32 x i64> %ext, <32 x i64> addrspace(3)* %out
    265   ret void
    266 }
    267 
    268 attributes #0 = { nounwind }
    269