1 ; RUN: llc -march=amdgcn -mcpu=verde -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,SI %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s 4 5 ; GCN-LABEL: {{^}}load_1d: 6 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 7 define amdgpu_ps <4 x float> @load_1d(<8 x i32> inreg %rsrc, i32 %s) { 8 main_body: 9 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 10 ret <4 x float> %v 11 } 12 13 ; GCN-LABEL: {{^}}load_2d: 14 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} 15 define amdgpu_ps <4 x float> @load_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t) { 16 main_body: 17 %v = call <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 18 ret <4 x float> %v 19 } 20 21 ; GCN-LABEL: {{^}}load_3d: 22 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 23 define amdgpu_ps <4 x float> @load_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r) { 24 main_body: 25 %v = call <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 26 ret <4 x float> %v 27 } 28 29 ; GCN-LABEL: {{^}}load_cube: 30 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 31 define amdgpu_ps <4 x float> @load_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 32 main_body: 33 %v = call <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 34 ret <4 x float> %v 35 } 36 37 ; GCN-LABEL: {{^}}load_1darray: 38 ; GCN: image_load v[0:3], v[0:1], s[0:7] dmask:0xf unorm da{{$}} 39 define amdgpu_ps <4 x float> @load_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice) { 40 main_body: 41 %v = call <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 42 ret <4 x float> %v 43 } 44 45 ; GCN-LABEL: {{^}}load_2darray: 46 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 47 define amdgpu_ps <4 x float> @load_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice) { 48 main_body: 49 %v = call <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 50 ret <4 x float> %v 51 } 52 53 ; GCN-LABEL: {{^}}load_2dmsaa: 54 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 55 define amdgpu_ps <4 x float> @load_2dmsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %fragid) { 56 main_body: 57 %v = call <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 58 ret <4 x float> %v 59 } 60 61 ; GCN-LABEL: {{^}}load_2darraymsaa: 62 ; GCN: image_load v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 63 define amdgpu_ps <4 x float> @load_2darraymsaa(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 64 main_body: 65 %v = call <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 66 ret <4 x float> %v 67 } 68 69 ; GCN-LABEL: {{^}}load_mip_1d: 70 ; GCN: image_load_mip v[0:3], v[0:1], s[0:7] dmask:0xf unorm{{$}} 71 define amdgpu_ps <4 x float> @load_mip_1d(<8 x i32> inreg %rsrc, i32 %s, i32 %mip) { 72 main_body: 73 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 74 ret <4 x float> %v 75 } 76 77 ; GCN-LABEL: {{^}}load_mip_2d: 78 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 79 define amdgpu_ps <4 x float> @load_mip_2d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %mip) { 80 main_body: 81 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 82 ret <4 x float> %v 83 } 84 85 ; GCN-LABEL: {{^}}load_mip_3d: 86 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm{{$}} 87 define amdgpu_ps <4 x float> @load_mip_3d(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %r, i32 %mip) { 88 main_body: 89 %v = call <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 90 ret <4 x float> %v 91 } 92 93 ; GCN-LABEL: {{^}}load_mip_cube: 94 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 95 define amdgpu_ps <4 x float> @load_mip_cube(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { 96 main_body: 97 %v = call <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 98 ret <4 x float> %v 99 } 100 101 ; GCN-LABEL: {{^}}load_mip_1darray: 102 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 103 define amdgpu_ps <4 x float> @load_mip_1darray(<8 x i32> inreg %rsrc, i32 %s, i32 %slice, i32 %mip) { 104 main_body: 105 %v = call <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 106 ret <4 x float> %v 107 } 108 109 ; GCN-LABEL: {{^}}load_mip_2darray: 110 ; GCN: image_load_mip v[0:3], v[0:3], s[0:7] dmask:0xf unorm da{{$}} 111 define amdgpu_ps <4 x float> @load_mip_2darray(<8 x i32> inreg %rsrc, i32 %s, i32 %t, i32 %slice, i32 %mip) { 112 main_body: 113 %v = call <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 114 ret <4 x float> %v 115 } 116 117 ; GCN-LABEL: {{^}}store_1d: 118 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm{{$}} 119 define amdgpu_ps void @store_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 120 main_body: 121 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 122 ret void 123 } 124 125 ; GCN-LABEL: {{^}}store_2d: 126 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} 127 define amdgpu_ps void @store_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t) { 128 main_body: 129 call void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, <8 x i32> %rsrc, i32 0, i32 0) 130 ret void 131 } 132 133 ; GCN-LABEL: {{^}}store_3d: 134 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 135 define amdgpu_ps void @store_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r) { 136 main_body: 137 call void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, <8 x i32> %rsrc, i32 0, i32 0) 138 ret void 139 } 140 141 ; GCN-LABEL: {{^}}store_cube: 142 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 143 define amdgpu_ps void @store_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 144 main_body: 145 call void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 146 ret void 147 } 148 149 ; GCN-LABEL: {{^}}store_1darray: 150 ; GCN: image_store v[0:3], v[4:5], s[0:7] dmask:0xf unorm da{{$}} 151 define amdgpu_ps void @store_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice) { 152 main_body: 153 call void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 154 ret void 155 } 156 157 ; GCN-LABEL: {{^}}store_2darray: 158 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 159 define amdgpu_ps void @store_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice) { 160 main_body: 161 call void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, <8 x i32> %rsrc, i32 0, i32 0) 162 ret void 163 } 164 165 ; GCN-LABEL: {{^}}store_2dmsaa: 166 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 167 define amdgpu_ps void @store_2dmsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %fragid) { 168 main_body: 169 call void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 170 ret void 171 } 172 173 ; GCN-LABEL: {{^}}store_2darraymsaa: 174 ; GCN: image_store v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 175 define amdgpu_ps void @store_2darraymsaa(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %fragid) { 176 main_body: 177 call void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %fragid, <8 x i32> %rsrc, i32 0, i32 0) 178 ret void 179 } 180 181 ; GCN-LABEL: {{^}}store_mip_1d: 182 ; GCN: image_store_mip v[0:3], v[4:5], s[0:7] dmask:0xf unorm{{$}} 183 define amdgpu_ps void @store_mip_1d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %mip) { 184 main_body: 185 call void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 186 ret void 187 } 188 189 ; GCN-LABEL: {{^}}store_mip_2d: 190 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 191 define amdgpu_ps void @store_mip_2d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %mip) { 192 main_body: 193 call void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 194 ret void 195 } 196 197 ; GCN-LABEL: {{^}}store_mip_3d: 198 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm{{$}} 199 define amdgpu_ps void @store_mip_3d(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %r, i32 %mip) { 200 main_body: 201 call void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %r, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 202 ret void 203 } 204 205 ; GCN-LABEL: {{^}}store_mip_cube: 206 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 207 define amdgpu_ps void @store_mip_cube(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { 208 main_body: 209 call void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 210 ret void 211 } 212 213 ; GCN-LABEL: {{^}}store_mip_1darray: 214 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 215 define amdgpu_ps void @store_mip_1darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %slice, i32 %mip) { 216 main_body: 217 call void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 218 ret void 219 } 220 221 ; GCN-LABEL: {{^}}store_mip_2darray: 222 ; GCN: image_store_mip v[0:3], v[4:7], s[0:7] dmask:0xf unorm da{{$}} 223 define amdgpu_ps void @store_mip_2darray(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s, i32 %t, i32 %slice, i32 %mip) { 224 main_body: 225 call void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, i32 %t, i32 %slice, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 226 ret void 227 } 228 229 ; GCN-LABEL: {{^}}getresinfo_1d: 230 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 231 define amdgpu_ps <4 x float> @getresinfo_1d(<8 x i32> inreg %rsrc, i32 %mip) { 232 main_body: 233 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 234 ret <4 x float> %v 235 } 236 237 ; GCN-LABEL: {{^}}getresinfo_2d: 238 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 239 define amdgpu_ps <4 x float> @getresinfo_2d(<8 x i32> inreg %rsrc, i32 %mip) { 240 main_body: 241 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 242 ret <4 x float> %v 243 } 244 245 ; GCN-LABEL: {{^}}getresinfo_3d: 246 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 247 define amdgpu_ps <4 x float> @getresinfo_3d(<8 x i32> inreg %rsrc, i32 %mip) { 248 main_body: 249 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 250 ret <4 x float> %v 251 } 252 253 ; GCN-LABEL: {{^}}getresinfo_cube: 254 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 255 define amdgpu_ps <4 x float> @getresinfo_cube(<8 x i32> inreg %rsrc, i32 %mip) { 256 main_body: 257 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 258 ret <4 x float> %v 259 } 260 261 ; GCN-LABEL: {{^}}getresinfo_1darray: 262 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 263 define amdgpu_ps <4 x float> @getresinfo_1darray(<8 x i32> inreg %rsrc, i32 %mip) { 264 main_body: 265 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 266 ret <4 x float> %v 267 } 268 269 ; GCN-LABEL: {{^}}getresinfo_2darray: 270 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 271 define amdgpu_ps <4 x float> @getresinfo_2darray(<8 x i32> inreg %rsrc, i32 %mip) { 272 main_body: 273 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 274 ret <4 x float> %v 275 } 276 277 ; GCN-LABEL: {{^}}getresinfo_2dmsaa: 278 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm{{$}} 279 define amdgpu_ps <4 x float> @getresinfo_2dmsaa(<8 x i32> inreg %rsrc, i32 %mip) { 280 main_body: 281 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 282 ret <4 x float> %v 283 } 284 285 ; GCN-LABEL: {{^}}getresinfo_2darraymsaa: 286 ; GCN: image_get_resinfo v[0:3], v0, s[0:7] dmask:0xf unorm da{{$}} 287 define amdgpu_ps <4 x float> @getresinfo_2darraymsaa(<8 x i32> inreg %rsrc, i32 %mip) { 288 main_body: 289 %v = call <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32 15, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 290 ret <4 x float> %v 291 } 292 293 ; GCN-LABEL: {{^}}load_1d_V1: 294 ; GCN: image_load v0, v0, s[0:7] dmask:0x8 unorm{{$}} 295 define amdgpu_ps float @load_1d_V1(<8 x i32> inreg %rsrc, i32 %s) { 296 main_body: 297 %v = call float @llvm.amdgcn.image.load.1d.f32.i32(i32 8, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 298 ret float %v 299 } 300 301 ; GCN-LABEL: {{^}}load_1d_V2: 302 ; GCN: image_load v[0:1], v0, s[0:7] dmask:0x9 unorm{{$}} 303 define amdgpu_ps <2 x float> @load_1d_V2(<8 x i32> inreg %rsrc, i32 %s) { 304 main_body: 305 %v = call <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32 9, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 306 ret <2 x float> %v 307 } 308 309 ; GCN-LABEL: {{^}}store_1d_V1: 310 ; GCN: image_store v0, v1, s[0:7] dmask:0x2 unorm{{$}} 311 define amdgpu_ps void @store_1d_V1(<8 x i32> inreg %rsrc, float %vdata, i32 %s) { 312 main_body: 313 call void @llvm.amdgcn.image.store.1d.f32.i32(float %vdata, i32 2, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 314 ret void 315 } 316 317 ; GCN-LABEL: {{^}}store_1d_V2: 318 ; GCN: image_store v[0:1], v2, s[0:7] dmask:0xc unorm{{$}} 319 define amdgpu_ps void @store_1d_V2(<8 x i32> inreg %rsrc, <2 x float> %vdata, i32 %s) { 320 main_body: 321 call void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float> %vdata, i32 12, i32 %s, <8 x i32> %rsrc, i32 0, i32 0) 322 ret void 323 } 324 325 ; GCN-LABEL: {{^}}load_1d_glc: 326 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc{{$}} 327 define amdgpu_ps <4 x float> @load_1d_glc(<8 x i32> inreg %rsrc, i32 %s) { 328 main_body: 329 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 330 ret <4 x float> %v 331 } 332 333 ; GCN-LABEL: {{^}}load_1d_slc: 334 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm slc{{$}} 335 define amdgpu_ps <4 x float> @load_1d_slc(<8 x i32> inreg %rsrc, i32 %s) { 336 main_body: 337 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 338 ret <4 x float> %v 339 } 340 341 ; GCN-LABEL: {{^}}load_1d_glc_slc: 342 ; GCN: image_load v[0:3], v0, s[0:7] dmask:0xf unorm glc slc{{$}} 343 define amdgpu_ps <4 x float> @load_1d_glc_slc(<8 x i32> inreg %rsrc, i32 %s) { 344 main_body: 345 %v = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 346 ret <4 x float> %v 347 } 348 349 ; GCN-LABEL: {{^}}store_1d_glc: 350 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc{{$}} 351 define amdgpu_ps void @store_1d_glc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 352 main_body: 353 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 1) 354 ret void 355 } 356 357 ; GCN-LABEL: {{^}}store_1d_slc: 358 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm slc{{$}} 359 define amdgpu_ps void @store_1d_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 360 main_body: 361 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 2) 362 ret void 363 } 364 365 ; GCN-LABEL: {{^}}store_1d_glc_slc: 366 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm glc slc{{$}} 367 define amdgpu_ps void @store_1d_glc_slc(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %s) { 368 main_body: 369 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %vdata, i32 15, i32 %s, <8 x i32> %rsrc, i32 0, i32 3) 370 ret void 371 } 372 373 ; GCN-LABEL: {{^}}getresinfo_dmask0: 374 ; GCN-NOT: image 375 ; GCN: ; return to shader part epilog 376 define amdgpu_ps <4 x float> @getresinfo_dmask0(<8 x i32> inreg %rsrc, <4 x float> %vdata, i32 %mip) #0 { 377 main_body: 378 %r = call <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32 0, i32 %mip, <8 x i32> %rsrc, i32 0, i32 0) 379 ret <4 x float> %r 380 } 381 382 ; Ideally, the register allocator would avoid the wait here 383 ; 384 ; GCN-LABEL: {{^}}image_store_wait: 385 ; GCN: image_store v[0:3], v4, s[0:7] dmask:0xf unorm 386 ; SI: s_waitcnt expcnt(0) 387 ; GCN: image_load v[0:3], v4, s[8:15] dmask:0xf unorm 388 ; GCN: s_waitcnt vmcnt(0) 389 ; GCN: image_store v[0:3], v4, s[16:23] dmask:0xf unorm 390 define amdgpu_ps void @image_store_wait(<8 x i32> inreg %arg, <8 x i32> inreg %arg1, <8 x i32> inreg %arg2, <4 x float> %arg3, i32 %arg4) #0 { 391 main_body: 392 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %arg3, i32 15, i32 %arg4, <8 x i32> %arg, i32 0, i32 0) 393 %data = call <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32 15, i32 %arg4, <8 x i32> %arg1, i32 0, i32 0) 394 call void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float> %data, i32 15, i32 %arg4, <8 x i32> %arg2, i32 0, i32 0) 395 ret void 396 } 397 398 ; SI won't merge ds memory operations, because of the signed offset bug, so 399 ; we only have check lines for VI. 400 ; VI-LABEL: image_load_mmo 401 ; VI: v_mov_b32_e32 [[ZERO:v[0-9]+]], 0 402 ; VI: ds_write2_b32 v{{[0-9]+}}, [[ZERO]], [[ZERO]] offset1:4 403 define amdgpu_ps float @image_load_mmo(<8 x i32> inreg %rsrc, float addrspace(3)* %lds, <2 x i32> %c) #0 { 404 store float 0.000000e+00, float addrspace(3)* %lds 405 %c0 = extractelement <2 x i32> %c, i32 0 406 %c1 = extractelement <2 x i32> %c, i32 1 407 %tex = call float @llvm.amdgcn.image.load.2d.f32.i32(i32 15, i32 %c0, i32 %c1, <8 x i32> %rsrc, i32 0, i32 0) 408 %tmp2 = getelementptr float, float addrspace(3)* %lds, i32 4 409 store float 0.000000e+00, float addrspace(3)* %tmp2 410 ret float %tex 411 } 412 413 declare <4 x float> @llvm.amdgcn.image.load.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #1 414 declare <4 x float> @llvm.amdgcn.image.load.2d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 415 declare <4 x float> @llvm.amdgcn.image.load.3d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 416 declare <4 x float> @llvm.amdgcn.image.load.cube.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 417 declare <4 x float> @llvm.amdgcn.image.load.1darray.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 418 declare <4 x float> @llvm.amdgcn.image.load.2darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 419 declare <4 x float> @llvm.amdgcn.image.load.2dmsaa.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 420 declare <4 x float> @llvm.amdgcn.image.load.2darraymsaa.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 421 422 declare <4 x float> @llvm.amdgcn.image.load.mip.1d.v4f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 423 declare <4 x float> @llvm.amdgcn.image.load.mip.2d.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 424 declare <4 x float> @llvm.amdgcn.image.load.mip.3d.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 425 declare <4 x float> @llvm.amdgcn.image.load.mip.cube.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 426 declare <4 x float> @llvm.amdgcn.image.load.mip.1darray.v4f32.i32(i32, i32, i32, i32, <8 x i32>, i32, i32) #1 427 declare <4 x float> @llvm.amdgcn.image.load.mip.2darray.v4f32.i32(i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #1 428 429 declare void @llvm.amdgcn.image.store.1d.v4f32.i32(<4 x float>, i32, i32, <8 x i32>, i32, i32) #0 430 declare void @llvm.amdgcn.image.store.2d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 431 declare void @llvm.amdgcn.image.store.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 432 declare void @llvm.amdgcn.image.store.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 433 declare void @llvm.amdgcn.image.store.1darray.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 434 declare void @llvm.amdgcn.image.store.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 435 declare void @llvm.amdgcn.image.store.2dmsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 436 declare void @llvm.amdgcn.image.store.2darraymsaa.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 437 438 declare void @llvm.amdgcn.image.store.mip.1d.v4f32.i32(<4 x float>, i32, i32, i32, <8 x i32>, i32, i32) #0 439 declare void @llvm.amdgcn.image.store.mip.2d.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 440 declare void @llvm.amdgcn.image.store.mip.3d.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 441 declare void @llvm.amdgcn.image.store.mip.cube.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 442 declare void @llvm.amdgcn.image.store.mip.1darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 443 declare void @llvm.amdgcn.image.store.mip.2darray.v4f32.i32(<4 x float>, i32, i32, i32, i32, i32, <8 x i32>, i32, i32) #0 444 445 declare <4 x float> @llvm.amdgcn.image.getresinfo.1d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 446 declare <4 x float> @llvm.amdgcn.image.getresinfo.2d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 447 declare <4 x float> @llvm.amdgcn.image.getresinfo.3d.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 448 declare <4 x float> @llvm.amdgcn.image.getresinfo.cube.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 449 declare <4 x float> @llvm.amdgcn.image.getresinfo.1darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 450 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darray.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 451 declare <4 x float> @llvm.amdgcn.image.getresinfo.2dmsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 452 declare <4 x float> @llvm.amdgcn.image.getresinfo.2darraymsaa.v4f32.i32(i32, i32, <8 x i32>, i32, i32) #2 453 454 declare float @llvm.amdgcn.image.load.1d.f32.i32(i32, i32, <8 x i32>, i32, i32) #1 455 declare float @llvm.amdgcn.image.load.2d.f32.i32(i32, i32, i32, <8 x i32>, i32, i32) #1 456 declare <2 x float> @llvm.amdgcn.image.load.1d.v2f32.i32(i32, i32, <8 x i32>, i32, i32) #1 457 declare void @llvm.amdgcn.image.store.1d.f32.i32(float, i32, i32, <8 x i32>, i32, i32) #0 458 declare void @llvm.amdgcn.image.store.1d.v2f32.i32(<2 x float>, i32, i32, <8 x i32>, i32, i32) #0 459 460 attributes #0 = { nounwind } 461 attributes #1 = { nounwind readonly } 462 attributes #2 = { nounwind readnone } 463