1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s 6 7 ; Testing for ds_read/write_b128 8 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 10 11 ; FUNC-LABEL: {{^}}local_load_i64: 12 ; SICIVI: s_mov_b32 m0 13 ; GFX9-NOT: m0 14 15 ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} 16 ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] 17 18 ; EG: LDS_READ_RET 19 ; EG: LDS_READ_RET 20 define amdgpu_kernel void @local_load_i64(i64 addrspace(3)* %out, i64 addrspace(3)* %in) #0 { 21 %ld = load i64, i64 addrspace(3)* %in 22 store i64 %ld, i64 addrspace(3)* %out 23 ret void 24 } 25 26 ; FUNC-LABEL: {{^}}local_load_v2i64: 27 ; SICIVI: s_mov_b32 m0 28 ; GFX9-NOT: m0 29 30 ; GCN: ds_read2_b64 31 32 ; EG: LDS_READ_RET 33 ; EG: LDS_READ_RET 34 ; EG: LDS_READ_RET 35 ; EG: LDS_READ_RET 36 define amdgpu_kernel void @local_load_v2i64(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) #0 { 37 entry: 38 %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in 39 store <2 x i64> %ld, <2 x i64> addrspace(3)* %out 40 ret void 41 } 42 43 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 44 ; FUNC-LABEL: {{^}}local_load_v2i64_to_128: 45 46 ; CIVI: ds_read_b128 47 ; CIVI: ds_write_b128 48 49 define amdgpu_kernel void @local_load_v2i64_to_128(<2 x i64> addrspace(3)* %out, <2 x i64> addrspace(3)* %in) { 50 entry: 51 %ld = load <2 x i64>, <2 x i64> addrspace(3)* %in, align 16 52 store <2 x i64> %ld, <2 x i64> addrspace(3)* %out, align 16 53 ret void 54 } 55 56 ; FUNC-LABEL: {{^}}local_load_v3i64: 57 ; SICIVI: s_mov_b32 m0 58 ; GFX9-NOT: m0 59 60 ; GCN-DAG: ds_read2_b64 61 ; GCN-DAG: ds_read_b64 62 63 ; EG: LDS_READ_RET 64 ; EG: LDS_READ_RET 65 ; EG: LDS_READ_RET 66 ; EG: LDS_READ_RET 67 ; EG: LDS_READ_RET 68 ; EG: LDS_READ_RET 69 define amdgpu_kernel void @local_load_v3i64(<3 x i64> addrspace(3)* %out, <3 x i64> addrspace(3)* %in) #0 { 70 entry: 71 %ld = load <3 x i64>, <3 x i64> addrspace(3)* %in 72 store <3 x i64> %ld, <3 x i64> addrspace(3)* %out 73 ret void 74 } 75 76 ; FUNC-LABEL: {{^}}local_load_v4i64: 77 ; SICIVI: s_mov_b32 m0 78 ; GFX9-NOT: m0 79 80 ; GCN: ds_read2_b64 81 ; GCN: ds_read2_b64 82 83 ; EG: LDS_READ_RET 84 ; EG: LDS_READ_RET 85 ; EG: LDS_READ_RET 86 ; EG: LDS_READ_RET 87 88 ; EG: LDS_READ_RET 89 ; EG: LDS_READ_RET 90 ; EG: LDS_READ_RET 91 ; EG: LDS_READ_RET 92 define amdgpu_kernel void @local_load_v4i64(<4 x i64> addrspace(3)* %out, <4 x i64> addrspace(3)* %in) #0 { 93 entry: 94 %ld = load <4 x i64>, <4 x i64> addrspace(3)* %in 95 store <4 x i64> %ld, <4 x i64> addrspace(3)* %out 96 ret void 97 } 98 99 ; FUNC-LABEL: {{^}}local_load_v8i64: 100 ; SICIVI: s_mov_b32 m0 101 ; GFX9-NOT: m0 102 103 ; GCN: ds_read2_b64 104 ; GCN: ds_read2_b64 105 ; GCN: ds_read2_b64 106 ; GCN: ds_read2_b64 107 108 ; EG: LDS_READ_RET 109 ; EG: LDS_READ_RET 110 ; EG: LDS_READ_RET 111 ; EG: LDS_READ_RET 112 ; EG: LDS_READ_RET 113 ; EG: LDS_READ_RET 114 ; EG: LDS_READ_RET 115 ; EG: LDS_READ_RET 116 ; EG: LDS_READ_RET 117 ; EG: LDS_READ_RET 118 ; EG: LDS_READ_RET 119 ; EG: LDS_READ_RET 120 ; EG: LDS_READ_RET 121 ; EG: LDS_READ_RET 122 ; EG: LDS_READ_RET 123 ; EG: LDS_READ_RET 124 define amdgpu_kernel void @local_load_v8i64(<8 x i64> addrspace(3)* %out, <8 x i64> addrspace(3)* %in) #0 { 125 entry: 126 %ld = load <8 x i64>, <8 x i64> addrspace(3)* %in 127 store <8 x i64> %ld, <8 x i64> addrspace(3)* %out 128 ret void 129 } 130 131 ; FUNC-LABEL: {{^}}local_load_v16i64: 132 ; SICIVI: s_mov_b32 m0 133 ; GFX9-NOT: m0 134 135 ; GCN: ds_read2_b64 136 ; GCN: ds_read2_b64 137 ; GCN: ds_read2_b64 138 ; GCN: ds_read2_b64 139 ; GCN: ds_read2_b64 140 ; GCN: ds_read2_b64 141 ; GCN: ds_read2_b64 142 ; GCN: ds_read2_b64 143 144 ; EG: LDS_READ_RET 145 ; EG: LDS_READ_RET 146 ; EG: LDS_READ_RET 147 ; EG: LDS_READ_RET 148 149 ; EG: LDS_READ_RET 150 ; EG: LDS_READ_RET 151 ; EG: LDS_READ_RET 152 ; EG: LDS_READ_RET 153 154 ; EG: LDS_READ_RET 155 ; EG: LDS_READ_RET 156 ; EG: LDS_READ_RET 157 ; EG: LDS_READ_RET 158 159 ; EG: LDS_READ_RET 160 ; EG: LDS_READ_RET 161 ; EG: LDS_READ_RET 162 ; EG: LDS_READ_RET 163 164 ; EG: LDS_READ_RET 165 ; EG: LDS_READ_RET 166 ; EG: LDS_READ_RET 167 ; EG: LDS_READ_RET 168 169 ; EG: LDS_READ_RET 170 ; EG: LDS_READ_RET 171 ; EG: LDS_READ_RET 172 ; EG: LDS_READ_RET 173 174 ; EG: LDS_READ_RET 175 ; EG: LDS_READ_RET 176 ; EG: LDS_READ_RET 177 ; EG: LDS_READ_RET 178 179 ; EG: LDS_READ_RET 180 ; EG: LDS_READ_RET 181 ; EG: LDS_READ_RET 182 ; EG: LDS_READ_RET 183 define amdgpu_kernel void @local_load_v16i64(<16 x i64> addrspace(3)* %out, <16 x i64> addrspace(3)* %in) #0 { 184 entry: 185 %ld = load <16 x i64>, <16 x i64> addrspace(3)* %in 186 store <16 x i64> %ld, <16 x i64> addrspace(3)* %out 187 ret void 188 } 189 190 attributes #0 = { nounwind } 191