1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SICIVI,FUNC %s 4 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9,FUNC %s 5 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefixes=EG,FUNC %s 6 7 ; Testing for ds_read_b128 8 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 9 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs -mattr=+enable-ds128 < %s | FileCheck -check-prefixes=CIVI,FUNC %s 10 11 ; FUNC-LABEL: {{^}}local_load_f64: 12 ; SICIV: s_mov_b32 m0 13 ; GFX9-NOT: m0 14 15 ; GCN: ds_read_b64 [[VAL:v\[[0-9]+:[0-9]+\]]], v{{[0-9]+}}{{$}} 16 ; GCN: ds_write_b64 v{{[0-9]+}}, [[VAL]] 17 18 ; EG: LDS_READ_RET 19 ; EG: LDS_READ_RET 20 define amdgpu_kernel void @local_load_f64(double addrspace(3)* %out, double addrspace(3)* %in) #0 { 21 %ld = load double, double addrspace(3)* %in 22 store double %ld, double addrspace(3)* %out 23 ret void 24 } 25 26 ; FUNC-LABEL: {{^}}local_load_v2f64: 27 ; SICIV: s_mov_b32 m0 28 ; GFX9-NOT: m0 29 30 ; GCN: ds_read2_b64 31 32 ; EG: LDS_READ_RET 33 ; EG: LDS_READ_RET 34 ; EG: LDS_READ_RET 35 ; EG: LDS_READ_RET 36 define amdgpu_kernel void @local_load_v2f64(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) #0 { 37 entry: 38 %ld = load <2 x double>, <2 x double> addrspace(3)* %in 39 store <2 x double> %ld, <2 x double> addrspace(3)* %out 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}local_load_v3f64: 44 ; SICIV: s_mov_b32 m0 45 ; GFX9-NOT: m0 46 47 ; GCN-DAG: ds_read2_b64 48 ; GCN-DAG: ds_read_b64 49 50 ; EG: LDS_READ_RET 51 ; EG: LDS_READ_RET 52 ; EG: LDS_READ_RET 53 ; EG: LDS_READ_RET 54 ; EG: LDS_READ_RET 55 ; EG: LDS_READ_RET 56 define amdgpu_kernel void @local_load_v3f64(<3 x double> addrspace(3)* %out, <3 x double> addrspace(3)* %in) #0 { 57 entry: 58 %ld = load <3 x double>, <3 x double> addrspace(3)* %in 59 store <3 x double> %ld, <3 x double> addrspace(3)* %out 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}local_load_v4f64: 64 ; SICIV: s_mov_b32 m0 65 ; GFX9-NOT: m0 66 67 ; GCN: ds_read2_b64 68 ; GCN: ds_read2_b64 69 70 ; EG: LDS_READ_RET 71 ; EG: LDS_READ_RET 72 ; EG: LDS_READ_RET 73 ; EG: LDS_READ_RET 74 75 ; EG: LDS_READ_RET 76 ; EG: LDS_READ_RET 77 ; EG: LDS_READ_RET 78 ; EG: LDS_READ_RET 79 define amdgpu_kernel void @local_load_v4f64(<4 x double> addrspace(3)* %out, <4 x double> addrspace(3)* %in) #0 { 80 entry: 81 %ld = load <4 x double>, <4 x double> addrspace(3)* %in 82 store <4 x double> %ld, <4 x double> addrspace(3)* %out 83 ret void 84 } 85 86 ; FUNC-LABEL: {{^}}local_load_v8f64: 87 ; SICIV: s_mov_b32 m0 88 ; GFX9-NOT: m0 89 90 ; GCN: ds_read2_b64 91 ; GCN: ds_read2_b64 92 ; GCN: ds_read2_b64 93 ; GCN: ds_read2_b64 94 95 ; EG: LDS_READ_RET 96 ; EG: LDS_READ_RET 97 ; EG: LDS_READ_RET 98 ; EG: LDS_READ_RET 99 ; EG: LDS_READ_RET 100 ; EG: LDS_READ_RET 101 ; EG: LDS_READ_RET 102 ; EG: LDS_READ_RET 103 ; EG: LDS_READ_RET 104 ; EG: LDS_READ_RET 105 ; EG: LDS_READ_RET 106 ; EG: LDS_READ_RET 107 ; EG: LDS_READ_RET 108 ; EG: LDS_READ_RET 109 ; EG: LDS_READ_RET 110 ; EG: LDS_READ_RET 111 define amdgpu_kernel void @local_load_v8f64(<8 x double> addrspace(3)* %out, <8 x double> addrspace(3)* %in) #0 { 112 entry: 113 %ld = load <8 x double>, <8 x double> addrspace(3)* %in 114 store <8 x double> %ld, <8 x double> addrspace(3)* %out 115 ret void 116 } 117 118 ; FUNC-LABEL: {{^}}local_load_v16f64: 119 ; SICIV: s_mov_b32 m0 120 ; GFX9-NOT: m0 121 122 ; GCN: ds_read2_b64 123 ; GCN: ds_read2_b64 124 ; GCN: ds_read2_b64 125 ; GCN: ds_read2_b64 126 ; GCN: ds_read2_b64 127 ; GCN: ds_read2_b64 128 ; GCN: ds_read2_b64 129 ; GCN: ds_read2_b64 130 131 ; EG: LDS_READ_RET 132 ; EG: LDS_READ_RET 133 ; EG: LDS_READ_RET 134 ; EG: LDS_READ_RET 135 136 ; EG: LDS_READ_RET 137 ; EG: LDS_READ_RET 138 ; EG: LDS_READ_RET 139 ; EG: LDS_READ_RET 140 141 ; EG: LDS_READ_RET 142 ; EG: LDS_READ_RET 143 ; EG: LDS_READ_RET 144 ; EG: LDS_READ_RET 145 146 ; EG: LDS_READ_RET 147 ; EG: LDS_READ_RET 148 ; EG: LDS_READ_RET 149 ; EG: LDS_READ_RET 150 151 ; EG: LDS_READ_RET 152 ; EG: LDS_READ_RET 153 ; EG: LDS_READ_RET 154 ; EG: LDS_READ_RET 155 156 ; EG: LDS_READ_RET 157 ; EG: LDS_READ_RET 158 ; EG: LDS_READ_RET 159 ; EG: LDS_READ_RET 160 161 ; EG: LDS_READ_RET 162 ; EG: LDS_READ_RET 163 ; EG: LDS_READ_RET 164 ; EG: LDS_READ_RET 165 166 ; EG: LDS_READ_RET 167 ; EG: LDS_READ_RET 168 ; EG: LDS_READ_RET 169 ; EG: LDS_READ_RET 170 define amdgpu_kernel void @local_load_v16f64(<16 x double> addrspace(3)* %out, <16 x double> addrspace(3)* %in) #0 { 171 entry: 172 %ld = load <16 x double>, <16 x double> addrspace(3)* %in 173 store <16 x double> %ld, <16 x double> addrspace(3)* %out 174 ret void 175 } 176 177 ; Tests if ds_read_b128 gets generated for the 16 byte aligned load. 178 ; FUNC-LABEL: {{^}}local_load_v2f64_to_128: 179 180 ; CIVI: ds_read_b128 181 ; CIVI: ds_write_b128 182 183 ; EG: LDS_READ_RET 184 ; EG: LDS_READ_RET 185 ; EG: LDS_READ_RET 186 ; EG: LDS_READ_RET 187 define amdgpu_kernel void @local_load_v2f64_to_128(<2 x double> addrspace(3)* %out, <2 x double> addrspace(3)* %in) { 188 entry: 189 %ld = load <2 x double>, <2 x double> addrspace(3)* %in, align 16 190 store <2 x double> %ld, <2 x double> addrspace(3)* %out, align 16 191 ret void 192 } 193 194 attributes #0 = { nounwind } 195