Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s
      4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s
      5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s
      6 
      7 ; Testing for ds_read/write_b128
      8 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
      9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s
     10 
     11 ; FUNC-LABEL: {{^}}local_load_i64:
     12 ; SICIVI: s_mov_b32 m0
     13 ; GFX9-NOT: m0
     14 
     15 ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}}
     16 ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]]
     17 
     18 ; EG: LDS_READ_RET
     19 ; EG: LDS_READ_RET
     20 define amdgpu_kernel void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 {
     21   %ld = load i64, i64 addrspace(3)* %in
     22   store i64 %ld, i64 addrspace(3)* %out
     23   ret void
     24 }
     25 
     26 ; FUNC-LABEL: {{^}}local_load_v2i64:
     27 ; SICIVI: s_mov_b32 m0
     28 ; GFX9-NOT: m0
     29 
     30 ; GCN: ds_read2_b64
     31 
     32 ; EG: LDS_READ_RET
     33 ; EG: LDS_READ_RET
     34 ; EG: LDS_READ_RET
     35 ; EG: LDS_READ_RET
     36 define amdgpu_kernel void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 {
     37 entry:
     38   %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in
     39   store <2 x i64> %ld, <2 x i64> addrspace(3)* %out
     40   ret void
     41 }
     42 
     43 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load.
     44 ; FUNC-LABEL: {{^}}local_load_v2i64_to_128:
     45 
     46 ; CIVI: ds_read_b128
     47 ; CIVI: ds_write_b128
     48 
     49 define amdgpu_kernel void @local_load_v2i64_to_128(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) {
     50 entry:
     51   %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in, align 16
     52   store <2 x i64> %ld, <2 x i64> addrspace(3)* %out, align 16
     53   ret void
     54 }
     55 
     56 ; FUNC-LABEL: {{^}}local_load_v3i64:
     57 ; SICIVI: s_mov_b32 m0
     58 ; GFX9-NOT: m0
     59 
     60 ; GCN-DAG: ds_read2_b64
     61 ; GCN-DAG: ds_read_b64
     62 
     63 ; EG: LDS_READ_RET
     64 ; EG: LDS_READ_RET
     65 ; EG: LDS_READ_RET
     66 ; EG: LDS_READ_RET
     67 ; EG: LDS_READ_RET
     68 ; EG: LDS_READ_RET
     69 define amdgpu_kernel void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 {
     70 entry:
     71   %ld = load <3 x i64>, <3 x i64> addrspace(3)* %in
     72   store <3 x i64> %ld, <3 x i64> addrspace(3)* %out
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}local_load_v4i64:
     77 ; SICIVI: s_mov_b32 m0
     78 ; GFX9-NOT: m0
     79 
     80 ; GCN: ds_read2_b64
     81 ; GCN: ds_read2_b64
     82 
     83 ; EG: LDS_READ_RET
     84 ; EG: LDS_READ_RET
     85 ; EG: LDS_READ_RET
     86 ; EG: LDS_READ_RET
     87 
     88 ; EG: LDS_READ_RET
     89 ; EG: LDS_READ_RET
     90 ; EG: LDS_READ_RET
     91 ; EG: LDS_READ_RET
     92 define amdgpu_kernel void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 {
     93 entry:
     94   %ld = load <4 x i64>, <4 x i64> addrspace(3)* %in
     95   store <4 x i64> %ld, <4 x i64> addrspace(3)* %out
     96   ret void
     97 }
     98 
     99 ; FUNC-LABEL: {{^}}local_load_v8i64:
    100 ; SICIVI: s_mov_b32 m0
    101 ; GFX9-NOT: m0
    102 
    103 ; GCN: ds_read2_b64
    104 ; GCN: ds_read2_b64
    105 ; GCN: ds_read2_b64
    106 ; GCN: ds_read2_b64
    107 
    108 ; EG: LDS_READ_RET
    109 ; EG: LDS_READ_RET
    110 ; EG: LDS_READ_RET
    111 ; EG: LDS_READ_RET
    112 ; EG: LDS_READ_RET
    113 ; EG: LDS_READ_RET
    114 ; EG: LDS_READ_RET
    115 ; EG: LDS_READ_RET
    116 ; EG: LDS_READ_RET
    117 ; EG: LDS_READ_RET
    118 ; EG: LDS_READ_RET
    119 ; EG: LDS_READ_RET
    120 ; EG: LDS_READ_RET
    121 ; EG: LDS_READ_RET
    122 ; EG: LDS_READ_RET
    123 ; EG: LDS_READ_RET
    124 define amdgpu_kernel void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 {
    125 entry:
    126   %ld = load <8 x i64>, <8 x i64> addrspace(3)* %in
    127   store <8 x i64> %ld, <8 x i64> addrspace(3)* %out
    128   ret void
    129 }
    130 
    131 ; FUNC-LABEL: {{^}}local_load_v16i64:
    132 ; SICIVI: s_mov_b32 m0
    133 ; GFX9-NOT: m0
    134 
    135 ; GCN: ds_read2_b64
    136 ; GCN: ds_read2_b64
    137 ; GCN: ds_read2_b64
    138 ; GCN: ds_read2_b64
    139 ; GCN: ds_read2_b64
    140 ; GCN: ds_read2_b64
    141 ; GCN: ds_read2_b64
    142 ; GCN: ds_read2_b64
    143 
    144 ; EG: LDS_READ_RET
    145 ; EG: LDS_READ_RET
    146 ; EG: LDS_READ_RET
    147 ; EG: LDS_READ_RET
    148 
    149 ; EG: LDS_READ_RET
    150 ; EG: LDS_READ_RET
    151 ; EG: LDS_READ_RET
    152 ; EG: LDS_READ_RET
    153 
    154 ; EG: LDS_READ_RET
    155 ; EG: LDS_READ_RET
    156 ; EG: LDS_READ_RET
    157 ; EG: LDS_READ_RET
    158 
    159 ; EG: LDS_READ_RET
    160 ; EG: LDS_READ_RET
    161 ; EG: LDS_READ_RET
    162 ; EG: LDS_READ_RET
    163 
    164 ; EG: LDS_READ_RET
    165 ; EG: LDS_READ_RET
    166 ; EG: LDS_READ_RET
    167 ; EG: LDS_READ_RET
    168 
    169 ; EG: LDS_READ_RET
    170 ; EG: LDS_READ_RET
    171 ; EG: LDS_READ_RET
    172 ; EG: LDS_READ_RET
    173 
    174 ; EG: LDS_READ_RET
    175 ; EG: LDS_READ_RET
    176 ; EG: LDS_READ_RET
    177 ; EG: LDS_READ_RET
    178 
    179 ; EG: LDS_READ_RET
    180 ; EG: LDS_READ_RET
    181 ; EG: LDS_READ_RET
    182 ; EG: LDS_READ_RET
    183 define amdgpu_kernel void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 {
    184 entry:
    185   %ld = load <16 x i64>, <16 x i64> addrspace(3)* %in
    186   store <16 x i64> %ld, <16 x i64> addrspace(3)* %out
    187   ret void
    188 }
    189 
    190 attributes #0 = { nounwind }
    191