1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s 5 6 ; FUNC-LABEL: {{^}}constant_load_i32: 7 ; GCN: s_load_dword s{{[0-9]+}} 8 9 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0 10 define void @constant_load_i32(i32 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { 11 entry: 12 %ld = load i32, i32 addrspace(2)* %in 13 store i32 %ld, i32 addrspace(1)* %out 14 ret void 15 } 16 17 ; FUNC-LABEL: {{^}}constant_load_v2i32: 18 ; GCN: s_load_dwordx2 19 20 ; EG: VTX_READ_64 21 define void @constant_load_v2i32(<2 x i32> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { 22 entry: 23 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in 24 store <2 x i32> %ld, <2 x i32> addrspace(1)* %out 25 ret void 26 } 27 28 ; FUNC-LABEL: {{^}}constant_load_v3i32: 29 ; GCN: s_load_dwordx4 30 31 ; EG: VTX_READ_128 32 define void @constant_load_v3i32(<3 x i32> addrspace(1)* %out, <3 x i32> addrspace(2)* %in) #0 { 33 entry: 34 %ld = load <3 x i32>, <3 x i32> addrspace(2)* %in 35 store <3 x i32> %ld, <3 x i32> addrspace(1)* %out 36 ret void 37 } 38 39 ; FUNC-LABEL: {{^}}constant_load_v4i32: 40 ; GCN: s_load_dwordx4 41 42 ; EG: VTX_READ_128 43 define void @constant_load_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { 44 entry: 45 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in 46 store <4 x i32> %ld, <4 x i32> addrspace(1)* %out 47 ret void 48 } 49 50 ; FUNC-LABEL: {{^}}constant_load_v8i32: 51 ; GCN: s_load_dwordx8 52 53 ; EG: VTX_READ_128 54 ; EG: VTX_READ_128 55 define void @constant_load_v8i32(<8 x i32> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { 56 entry: 57 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in 58 store <8 x i32> %ld, <8 x i32> addrspace(1)* %out 59 ret void 60 } 61 62 ; FUNC-LABEL: {{^}}constant_load_v16i32: 63 ; GCN: s_load_dwordx16 64 65 ; EG: VTX_READ_128 66 ; EG: VTX_READ_128 67 ; EG: VTX_READ_128 68 ; EG: VTX_READ_128 69 define void @constant_load_v16i32(<16 x i32> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { 70 entry: 71 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in 72 store <16 x i32> %ld, <16 x i32> addrspace(1)* %out 73 ret void 74 } 75 76 ; FUNC-LABEL: {{^}}constant_zextload_i32_to_i64: 77 ; GCN-DAG: s_load_dword s[[SLO:[0-9]+]], 78 ; GCN-DAG: v_mov_b32_e32 v[[SHI:[0-9]+]], 0{{$}} 79 ; GCN: store_dwordx2 80 81 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 82 ; EG: CF_END 83 ; EG: VTX_READ_32 84 define void @constant_zextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { 85 %ld = load i32, i32 addrspace(2)* %in 86 %ext = zext i32 %ld to i64 87 store i64 %ext, i64 addrspace(1)* %out 88 ret void 89 } 90 91 ; FUNC-LABEL: {{^}}constant_sextload_i32_to_i64: 92 ; GCN: s_load_dword s[[SLO:[0-9]+]] 93 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[SLO]], 31 94 ; GCN: store_dwordx2 95 96 ; EG: MEM_RAT_CACHELESS STORE_RAW T{{[0-9]+}}.XY 97 ; EG: CF_END 98 ; EG: VTX_READ_32 99 ; EG: ASHR {{[* ]*}}T{{[0-9]\.[XYZW]}}, T{{[0-9]\.[XYZW]}}, literal. 100 ; EG: 31 101 define void @constant_sextload_i32_to_i64(i64 addrspace(1)* %out, i32 addrspace(2)* %in) #0 { 102 %ld = load i32, i32 addrspace(2)* %in 103 %ext = sext i32 %ld to i64 104 store i64 %ext, i64 addrspace(1)* %out 105 ret void 106 } 107 108 ; FUNC-LABEL: {{^}}constant_zextload_v1i32_to_v1i64: 109 ; GCN: s_load_dword 110 ; GCN: store_dwordx2 111 define void @constant_zextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 { 112 %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in 113 %ext = zext <1 x i32> %ld to <1 x i64> 114 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 115 ret void 116 } 117 118 ; FUNC-LABEL: {{^}}constant_sextload_v1i32_to_v1i64: 119 ; GCN: s_load_dword s[[LO:[0-9]+]] 120 ; GCN: s_ashr_i32 s[[HI:[0-9]+]], s[[LO]], 31 121 ; GCN: store_dwordx2 122 define void @constant_sextload_v1i32_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i32> addrspace(2)* %in) #0 { 123 %ld = load <1 x i32>, <1 x i32> addrspace(2)* %in 124 %ext = sext <1 x i32> %ld to <1 x i64> 125 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 126 ret void 127 } 128 129 ; FUNC-LABEL: {{^}}constant_zextload_v2i32_to_v2i64: 130 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 131 ; GCN: store_dwordx4 132 define void @constant_zextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { 133 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in 134 %ext = zext <2 x i32> %ld to <2 x i64> 135 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 136 ret void 137 } 138 139 ; FUNC-LABEL: {{^}}constant_sextload_v2i32_to_v2i64: 140 ; GCN: s_load_dwordx2 s{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0x0{{$}} 141 142 ; GCN-DAG: s_ashr_i32 143 ; GCN-DAG: s_ashr_i32 144 145 ; GCN: store_dwordx4 146 define void @constant_sextload_v2i32_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i32> addrspace(2)* %in) #0 { 147 %ld = load <2 x i32>, <2 x i32> addrspace(2)* %in 148 %ext = sext <2 x i32> %ld to <2 x i64> 149 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 150 ret void 151 } 152 153 ; FUNC-LABEL: {{^}}constant_zextload_v4i32_to_v4i64: 154 ; GCN: s_load_dwordx4 155 156 ; GCN: store_dwordx4 157 ; GCN: store_dwordx4 158 define void @constant_zextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { 159 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in 160 %ext = zext <4 x i32> %ld to <4 x i64> 161 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 162 ret void 163 } 164 165 ; FUNC-LABEL: {{^}}constant_sextload_v4i32_to_v4i64: 166 ; GCN: s_load_dwordx4 167 168 ; GCN: s_ashr_i32 169 ; GCN: s_ashr_i32 170 ; GCN: s_ashr_i32 171 ; GCN: s_ashr_i32 172 173 ; GCN: store_dwordx4 174 ; GCN: store_dwordx4 175 define void @constant_sextload_v4i32_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i32> addrspace(2)* %in) #0 { 176 %ld = load <4 x i32>, <4 x i32> addrspace(2)* %in 177 %ext = sext <4 x i32> %ld to <4 x i64> 178 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 179 ret void 180 } 181 182 ; FUNC-LABEL: {{^}}constant_zextload_v8i32_to_v8i64: 183 ; GCN: s_load_dwordx8 184 185 ; GCN-NOHSA-DAG: buffer_store_dwordx4 186 ; GCN-NOHSA-DAG: buffer_store_dwordx4 187 ; GCN-NOHSA-DAG: buffer_store_dwordx4 188 ; GCN-NOHSA-DAG: buffer_store_dwordx4 189 190 ; GCN-HSA-DAG: flat_store_dwordx4 191 ; GCN-HSA-DAG: flat_store_dwordx4 192 ; GCN-SA-DAG: flat_store_dwordx4 193 ; GCN-HSA-DAG: flat_store_dwordx4 194 define void @constant_zextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { 195 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in 196 %ext = zext <8 x i32> %ld to <8 x i64> 197 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 198 ret void 199 } 200 201 ; FUNC-LABEL: {{^}}constant_sextload_v8i32_to_v8i64: 202 ; GCN: s_load_dwordx8 203 204 ; GCN: s_ashr_i32 205 ; GCN: s_ashr_i32 206 ; GCN: s_ashr_i32 207 ; GCN: s_ashr_i32 208 ; GCN: s_ashr_i32 209 ; GCN: s_ashr_i32 210 ; GCN: s_ashr_i32 211 ; GCN: s_ashr_i32 212 213 ; GCN-NOHSA-DAG: buffer_store_dwordx4 214 ; GCN-NOHSA-DAG: buffer_store_dwordx4 215 ; GCN-NOHSA-DAG: buffer_store_dwordx4 216 ; GCN-NOHSA-DAG: buffer_store_dwordx4 217 218 ; GCN-HSA-DAG: flat_store_dwordx4 219 ; GCN-HSA-DAG: flat_store_dwordx4 220 ; GCN-HSA-DAG: flat_store_dwordx4 221 ; GCN-HSA-DAG: flat_store_dwordx4 222 define void @constant_sextload_v8i32_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i32> addrspace(2)* %in) #0 { 223 %ld = load <8 x i32>, <8 x i32> addrspace(2)* %in 224 %ext = sext <8 x i32> %ld to <8 x i64> 225 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 226 ret void 227 } 228 229 ; FUNC-LABEL: {{^}}constant_sextload_v16i32_to_v16i64: 230 ; GCN: s_load_dwordx16 231 232 233 ; GCN-DAG: s_ashr_i32 234 235 ; GCN: store_dwordx4 236 ; GCN: store_dwordx4 237 ; GCN: store_dwordx4 238 ; GCN: store_dwordx4 239 ; GCN: store_dwordx4 240 ; GCN: store_dwordx4 241 ; GCN: store_dwordx4 242 ; GCN: store_dwordx4 243 define void @constant_sextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { 244 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in 245 %ext = sext <16 x i32> %ld to <16 x i64> 246 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 247 ret void 248 } 249 250 ; FUNC-LABEL: {{^}}constant_zextload_v16i32_to_v16i64 251 ; GCN: s_load_dwordx16 252 253 ; GCN-NOHSA: buffer_store_dwordx4 254 ; GCN-NOHSA: buffer_store_dwordx4 255 ; GCN-NOHSA: buffer_store_dwordx4 256 ; GCN-NOHSA: buffer_store_dwordx4 257 ; GCN-NOHSA: buffer_store_dwordx4 258 ; GCN-NOHSA: buffer_store_dwordx4 259 ; GCN-NOHSA: buffer_store_dwordx4 260 ; GCN-NOHSA: buffer_store_dwordx4 261 262 ; GCN-HSA: flat_store_dwordx4 263 ; GCN-HSA: flat_store_dwordx4 264 ; GCN-HSA: flat_store_dwordx4 265 ; GCN-HSA: flat_store_dwordx4 266 ; GCN-HSA: flat_store_dwordx4 267 ; GCN-HSA: flat_store_dwordx4 268 ; GCN-HSA: flat_store_dwordx4 269 ; GCN-HSA: flat_store_dwordx4 270 define void @constant_zextload_v16i32_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i32> addrspace(2)* %in) #0 { 271 %ld = load <16 x i32>, <16 x i32> addrspace(2)* %in 272 %ext = zext <16 x i32> %ld to <16 x i64> 273 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 274 ret void 275 } 276 277 ; FUNC-LABEL: {{^}}constant_sextload_v32i32_to_v32i64: 278 279 ; GCN: s_load_dwordx16 280 ; GCN: s_load_dwordx16 281 282 ; GCN-NOHSA: buffer_store_dwordx4 283 ; GCN-NOHSA: buffer_store_dwordx4 284 ; GCN-NOHSA: buffer_store_dwordx4 285 ; GCN-NOHSA: buffer_store_dwordx4 286 287 ; GCN-NOHSA: buffer_store_dwordx4 288 ; GCN-NOHSA: buffer_store_dwordx4 289 ; GCN-NOHSA: buffer_store_dwordx4 290 ; GCN-NOHSA: buffer_store_dwordx4 291 292 ; GCN-NOHSA: buffer_store_dwordx4 293 ; GCN-NOHSA: buffer_store_dwordx4 294 ; GCN-NOHSA: buffer_store_dwordx4 295 ; GCN-NOHSA: buffer_store_dwordx4 296 297 ; GCN-NOHSA: buffer_store_dwordx4 298 ; GCN-NOHSA: buffer_store_dwordx4 299 ; GCN-NOHSA: buffer_store_dwordx4 300 ; GCN-NOHSA: buffer_store_dwordx4 301 302 ; GCN-HSA: flat_store_dwordx4 303 ; GCN-HSA: flat_store_dwordx4 304 ; GCN-HSA: flat_store_dwordx4 305 ; GCN-HSA: flat_store_dwordx4 306 307 ; GCN-HSA: flat_store_dwordx4 308 ; GCN-HSA: flat_store_dwordx4 309 ; GCN-HSA: flat_store_dwordx4 310 ; GCN-HSA: flat_store_dwordx4 311 312 ; GCN-HSA: flat_store_dwordx4 313 ; GCN-HSA: flat_store_dwordx4 314 ; GCN-HSA: flat_store_dwordx4 315 ; GCN-HSA: flat_store_dwordx4 316 317 ; GCN-HSA: flat_store_dwordx4 318 ; GCN-HSA: flat_store_dwordx4 319 ; GCN-HSA: flat_store_dwordx4 320 ; GCN-HSA: flat_store_dwordx4 321 322 define void @constant_sextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 { 323 %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in 324 %ext = sext <32 x i32> %ld to <32 x i64> 325 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 326 ret void 327 } 328 329 ; FUNC-LABEL: {{^}}constant_zextload_v32i32_to_v32i64: 330 ; GCN: s_load_dwordx16 331 ; GCN: s_load_dwordx16 332 333 ; GCN-NOHSA-DAG: buffer_store_dwordx4 334 ; GCN-NOHSA-DAG: buffer_store_dwordx4 335 ; GCN-NOHSA-DAG: buffer_store_dwordx4 336 ; GCN-NOHSA-DAG: buffer_store_dwordx4 337 338 ; GCN-NOHSA-DAG: buffer_store_dwordx4 339 ; GCN-NOHSA-DAG: buffer_store_dwordx4 340 ; GCN-NOHSA-DAG: buffer_store_dwordx4 341 ; GCN-NOHSA-DAG: buffer_store_dwordx4 342 343 ; GCN-NOHSA-DAG: buffer_store_dwordx4 344 ; GCN-NOHSA-DAG: buffer_store_dwordx4 345 ; GCN-NOHSA-DAG: buffer_store_dwordx4 346 ; GCN-NOHSA-DAG: buffer_store_dwordx4 347 348 ; GCN-NOHSA-DAG: buffer_store_dwordx4 349 ; GCN-NOHSA-DAG: buffer_store_dwordx4 350 ; GCN-NOHSA-DAG: buffer_store_dwordx4 351 ; GCN-NOHSA-DAG: buffer_store_dwordx4 352 353 354 ; GCN-HSA-DAG: flat_store_dwordx4 355 ; GCN-HSA-DAG: flat_store_dwordx4 356 ; GCN-HSA-DAG: flat_store_dwordx4 357 ; GCN-HSA-DAG: flat_store_dwordx4 358 359 ; GCN-HSA-DAG: flat_store_dwordx4 360 ; GCN-HSA-DAG: flat_store_dwordx4 361 ; GCN-HSA-DAG: flat_store_dwordx4 362 ; GCN-HSA-DAG: flat_store_dwordx4 363 364 ; GCN-HSA-DAG: flat_store_dwordx4 365 ; GCN-HSA-DAG: flat_store_dwordx4 366 ; GCN-HSA-DAG: flat_store_dwordx4 367 ; GCN-HSA-DAG: flat_store_dwordx4 368 369 ; GCN-HSA-DAG: flat_store_dwordx4 370 ; GCN-HSA-DAG: flat_store_dwordx4 371 ; GCN-HSA-DAG: flat_store_dwordx4 372 ; GCN-HSA-DAG: flat_store_dwordx4 373 define void @constant_zextload_v32i32_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i32> addrspace(2)* %in) #0 { 374 %ld = load <32 x i32>, <32 x i32> addrspace(2)* %in 375 %ext = zext <32 x i32> %ld to <32 x i64> 376 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 377 ret void 378 } 379 380 attributes #0 = { nounwind } 381