1 ; RUN: llc -march=amdgcn -mcpu=gfx900 -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,GFX9 %s 2 ; RUN: llc -march=amdgcn -mcpu=fiji -amdgpu-sroa=0 -mattr=-promote-alloca -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN,VI %s 3 4 ; GCN-LABEL: {{^}}load_local_lo_v2i16_undeflo: 5 ; GCN: s_waitcnt 6 ; GFX9-NEXT: ds_read_u16_d16 v0, v0 7 ; GFX9-NEXT: s_waitcnt 8 ; GFX9-NEXT: s_setpc_b64 9 10 ; VI: ds_read_u16 11 define <2 x i16> @load_local_lo_v2i16_undeflo(i16 addrspace(3)* %in) #0 { 12 entry: 13 %load = load i16, i16 addrspace(3)* %in 14 %build = insertelement <2 x i16> undef, i16 %load, i32 0 15 ret <2 x i16> %build 16 } 17 18 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo: 19 ; GCN: s_waitcnt 20 ; GFX9-NEXT: ds_read_u16_d16 v0, v0 21 ; GFX9-NEXT: s_waitcnt 22 ; GFX9-NEXT: s_setpc_b64 23 24 ; VI: ds_read_u16 25 define <2 x i16> @load_local_lo_v2i16_reglo(i16 addrspace(3)* %in, i16 %reg) #0 { 26 entry: 27 %load = load i16, i16 addrspace(3)* %in 28 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 29 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 30 ret <2 x i16> %build1 31 } 32 33 ; Show that we get reasonable regalloc without physreg constraints. 34 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg: 35 ; GCN: s_waitcnt 36 ; GFX9-NEXT: ds_read_u16_d16 v0, v0 37 ; GFX9-NEXT: s_waitcnt 38 ; GFX9-NEXT: global_store_dword v[0:1], v0, off{{$}} 39 ; GFX9-NEXT: s_waitcnt 40 ; GFX9-NEXT: s_setpc_b64 41 42 ; VI: ds_read_u16 43 define void @load_local_lo_v2i16_reglo_vreg(i16 addrspace(3)* %in, i16 %reg) #0 { 44 entry: 45 %load = load i16, i16 addrspace(3)* %in 46 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 0 47 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 48 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 49 ret void 50 } 51 52 ; GCN-LABEL: {{^}}load_local_lo_v2i16_zerolo: 53 ; GCN: s_waitcnt 54 ; GFX9-NEXT: v_mov_b32_e32 v1, 0 55 ; GFX9-NEXT: ds_read_u16_d16 v1, v0 56 ; GFX9-NEXT: s_waitcnt 57 ; GFX9-NEXT: v_mov_b32_e32 v0, v1 58 ; GFX9-NEXT: s_setpc_b64 59 60 ; VI: ds_read_u16 v 61 define <2 x i16> @load_local_lo_v2i16_zerolo(i16 addrspace(3)* %in) #0 { 62 entry: 63 %load = load i16, i16 addrspace(3)* %in 64 %build = insertelement <2 x i16> zeroinitializer, i16 %load, i32 0 65 ret <2 x i16> %build 66 } 67 68 ; GCN-LABEL: {{^}}load_local_lo_v2f16_fpimm: 69 ; GCN: s_waitcnt 70 ; GFX9-NEXT: v_mov_b32_e32 v1, 2.0 71 ; GFX9-NEXT: ds_read_u16_d16 v1, v0 72 ; GFX9-NEXT: s_waitcnt 73 ; GFX9-NEXT: v_mov_b32_e32 v0, v1 74 ; GFX9-NEXT: s_setpc_b64 75 76 ; VI: ds_read_u16 v 77 define <2 x half> @load_local_lo_v2f16_fpimm(half addrspace(3)* %in) #0 { 78 entry: 79 %load = load half, half addrspace(3)* %in 80 %build = insertelement <2 x half> <half 0.0, half 2.0>, half %load, i32 0 81 ret <2 x half> %build 82 } 83 84 ; GCN-LABEL: {{^}}load_local_lo_v2f16_reghi_vreg: 85 ; GCN: s_waitcnt 86 ; GFX9-NEXT: ds_read_u16_d16 v1, v0 87 ; GFX9-NEXT: s_waitcnt 88 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 89 ; GFX9-NEXT: s_waitcnt 90 ; GFX9-NEXT: s_setpc_b64 91 92 ; VI: ds_read_u16 v 93 define void @load_local_lo_v2f16_reghi_vreg(half addrspace(3)* %in, i32 %reg) #0 { 94 entry: 95 %reg.bc = bitcast i32 %reg to <2 x half> 96 %load = load half, half addrspace(3)* %in 97 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 98 store <2 x half> %build1, <2 x half> addrspace(1)* undef 99 ret void 100 } 101 102 ; GCN-LABEL: {{^}}load_local_lo_v2f16_reglo_vreg: 103 104 ; GFX9: ds_read_u16 v 105 ; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 106 ; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}} 107 ; GFX9: global_store_dword 108 109 ; VI: ds_read_u16 v 110 define void @load_local_lo_v2f16_reglo_vreg(half addrspace(3)* %in, half %reg) #0 { 111 entry: 112 %load = load half, half addrspace(3)* %in 113 %build0 = insertelement <2 x half> undef, half %reg, i32 1 114 %build1 = insertelement <2 x half> %build0, half %load, i32 0 115 store <2 x half> %build1, <2 x half> addrspace(1)* undef 116 ret void 117 } 118 119 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_zexti8: 120 ; GCN: s_waitcnt 121 ; GFX9-NEXT: ds_read_u8_d16 v1, v0 122 ; GFX9-NEXT: s_waitcnt 123 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 124 ; GFX9-NEXT: s_waitcnt 125 ; GFX9-NEXT: s_setpc_b64 126 127 ; VI: ds_read_u8 v 128 define void @load_local_lo_v2i16_reghi_vreg_zexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 129 entry: 130 %reg.bc = bitcast i32 %reg to <2 x i16> 131 %load = load i8, i8 addrspace(3)* %in 132 %ext = zext i8 %load to i16 133 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 134 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 135 ret void 136 } 137 138 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_zexti8: 139 ; GCN: s_waitcnt 140 ; GFX9: ds_read_u8 v 141 ; GFX9: global_store_dword 142 ; GFX9-NEXT: s_waitcnt 143 ; GFX9-NEXT: s_setpc_b64 144 145 ; VI: ds_read_u8 v 146 define void @load_local_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 147 entry: 148 %load = load i8, i8 addrspace(3)* %in 149 %ext = zext i8 %load to i16 150 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 151 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 152 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 153 ret void 154 } 155 156 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reghi_vreg_sexti8: 157 ; GCN: s_waitcnt 158 ; GFX9-NEXT: ds_read_i8_d16 v1, v0 159 ; GFX9-NEXT: s_waitcnt 160 ; GFX9-NEXT: global_store_dword v[0:1], v1, off{{$}} 161 ; GFX9-NEXT: s_waitcnt 162 ; GFX9-NEXT: s_setpc_b64 163 164 ; VI: ds_read_i8 v 165 define void @load_local_lo_v2i16_reghi_vreg_sexti8(i8 addrspace(3)* %in, i32 %reg) #0 { 166 entry: 167 %reg.bc = bitcast i32 %reg to <2 x i16> 168 %load = load i8, i8 addrspace(3)* %in 169 %ext = sext i8 %load to i16 170 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 171 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 172 ret void 173 } 174 175 ; GCN-LABEL: {{^}}load_local_lo_v2i16_reglo_vreg_sexti8: 176 ; GCN: s_waitcnt 177 ; GFX9: ds_read_i8 v 178 ; GFX9: v_and_b32_e32 v{{[0-9]+}}, 0xffff, v{{[0-9]+}} 179 ; GFX9: v_lshl_or_b32 v{{[0-9]+}}, v{{[0-9]+}}, 16, v{{[0-9]+}} 180 181 ; VI: ds_read_i8 v 182 define void @load_local_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(3)* %in, i16 %reg) #0 { 183 entry: 184 %load = load i8, i8 addrspace(3)* %in 185 %ext = sext i8 %load to i16 186 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 187 %build1 = insertelement <2 x i16> %build0, i16 %ext, i32 0 188 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 189 ret void 190 } 191 192 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg: 193 ; GCN: s_waitcnt 194 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 195 ; GFX9-NEXT: s_waitcnt 196 ; GFX9-NEXT: global_store_dword 197 ; GFX9-NEXT: s_waitcnt 198 ; GFX9-NEXT: s_setpc_b64 199 define void @load_global_lo_v2i16_reglo_vreg(i16 addrspace(1)* %in, i32 %reg) #0 { 200 entry: 201 %reg.bc = bitcast i32 %reg to <2 x i16> 202 %gep = getelementptr inbounds i16, i16 addrspace(1)* %in, i64 -2047 203 %load = load i16, i16 addrspace(1)* %gep 204 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 205 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 206 ret void 207 } 208 209 ; GCN-LABEL: {{^}}load_global_lo_v2f16_reglo_vreg: 210 ; GCN: s_waitcnt 211 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 212 ; GFX9-NEXT: s_waitcnt 213 ; GFX9-NEXT: global_store_dword 214 ; GFX9-NEXT: s_waitcnt 215 ; GFX9-NEXT: s_setpc_b64 216 define void @load_global_lo_v2f16_reglo_vreg(half addrspace(1)* %in, i32 %reg) #0 { 217 entry: 218 %reg.bc = bitcast i32 %reg to <2 x half> 219 %gep = getelementptr inbounds half, half addrspace(1)* %in, i64 -2047 220 %load = load half, half addrspace(1)* %gep 221 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 222 store <2 x half> %build1, <2 x half> addrspace(1)* undef 223 ret void 224 } 225 226 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_zexti8: 227 ; GCN: s_waitcnt 228 ; GFX9-NEXT: global_load_ubyte_d16 v2, v[0:1], off offset:-4095 229 ; GFX9-NEXT: s_waitcnt 230 ; GFX9-NEXT: global_store_dword 231 ; GFX9-NEXT: s_waitcnt 232 ; GFX9-NEXT: s_setpc_b64 233 define void @load_global_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 234 entry: 235 %reg.bc = bitcast i32 %reg to <2 x i16> 236 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 237 %load = load i8, i8 addrspace(1)* %gep 238 %ext = zext i8 %load to i16 239 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 240 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 241 ret void 242 } 243 244 ; GCN-LABEL: {{^}}load_global_lo_v2i16_reglo_vreg_sexti8: 245 ; GCN: s_waitcnt 246 ; GFX9-NEXT: global_load_sbyte_d16 v2, v[0:1], off offset:-4095 247 ; GFX9-NEXT: s_waitcnt 248 ; GFX9-NEXT: global_store_dword 249 ; GFX9-NEXT: s_waitcnt 250 ; GFX9-NEXT: s_setpc_b64 251 define void @load_global_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(1)* %in, i32 %reg) #0 { 252 entry: 253 %reg.bc = bitcast i32 %reg to <2 x i16> 254 %gep = getelementptr inbounds i8, i8 addrspace(1)* %in, i64 -4095 255 %load = load i8, i8 addrspace(1)* %gep 256 %ext = sext i8 %load to i16 257 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 258 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 259 ret void 260 } 261 262 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reghi_vreg: 263 ; GCN: s_waitcnt 264 ; GFX9-NEXT: flat_load_short_d16 v2, v[0:1] 265 ; GFX9-NEXT: s_waitcnt 266 ; GFX9-NEXT: global_store_dword v[0:1], v2 267 ; GFX9-NEXT: s_waitcnt 268 ; GFX9-NEXT: s_setpc_b64 269 270 ; VI: flat_load_ushort v{{[0-9]+}} 271 ; VI: v_or_b32_e32 272 define void @load_flat_lo_v2i16_reghi_vreg(i16* %in, i32 %reg) #0 { 273 entry: 274 %reg.bc = bitcast i32 %reg to <2 x i16> 275 %load = load i16, i16* %in 276 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 277 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 278 ret void 279 } 280 281 ; GCN-LABEL: {{^}}load_flat_lo_v2f16_reghi_vreg: 282 ; GCN: s_waitcnt 283 ; GFX9-NEXT: flat_load_short_d16 v2, v[0:1] 284 ; GFX9-NEXT: s_waitcnt 285 ; GFX9-NEXT: global_store_dword v[0:1], v2 286 ; GFX9-NEXT: s_waitcnt 287 ; GFX9-NEXT: s_setpc_b64 288 289 ; VI: flat_load_ushort v{{[0-9]+}} 290 ; VI: v_or_b32_e32 291 define void @load_flat_lo_v2f16_reghi_vreg(half* %in, i32 %reg) #0 { 292 entry: 293 %reg.bc = bitcast i32 %reg to <2 x half> 294 %load = load half, half* %in 295 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 296 store <2 x half> %build1, <2 x half> addrspace(1)* undef 297 ret void 298 } 299 300 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_zexti8: 301 ; GCN: s_waitcnt 302 ; GFX9-NEXT: flat_load_ubyte_d16 v2, v[0:1] 303 ; GFX9-NEXT: s_waitcnt 304 ; GFX9-NEXT: global_store_dword v[0:1], v2 305 ; GFX9-NEXT: s_waitcnt 306 ; GFX9-NEXT: s_setpc_b64 307 308 ; VI: flat_load_ubyte v{{[0-9]+}} 309 ; VI: v_or_b32_e32 310 define void @load_flat_lo_v2i16_reglo_vreg_zexti8(i8* %in, i32 %reg) #0 { 311 entry: 312 %reg.bc = bitcast i32 %reg to <2 x i16> 313 %load = load i8, i8* %in 314 %ext = zext i8 %load to i16 315 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 316 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 317 ret void 318 } 319 320 ; GCN-LABEL: {{^}}load_flat_lo_v2i16_reglo_vreg_sexti8: 321 ; GCN: s_waitcnt 322 ; GFX9-NEXT: flat_load_sbyte_d16 v2, v[0:1] 323 ; GFX9-NEXT: s_waitcnt 324 ; GFX9-NEXT: global_store_dword v[0:1], v2 325 ; GFX9-NEXT: s_waitcnt 326 ; GFX9-NEXT: s_setpc_b64 327 328 ; VI: flat_load_sbyte v{{[0-9]+}} 329 ; VI: v_or_b32_sdwa v{{[0-9]+}}, v{{[0-9]+}}, v{{[0-9]+}} dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD 330 331 define void @load_flat_lo_v2i16_reglo_vreg_sexti8(i8* %in, i32 %reg) #0 { 332 entry: 333 %reg.bc = bitcast i32 %reg to <2 x i16> 334 %load = load i8, i8* %in 335 %ext = sext i8 %load to i16 336 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 337 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 338 ret void 339 } 340 341 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg: 342 ; GCN: s_waitcnt 343 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}} 344 ; GFX9-NEXT: s_waitcnt 345 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 346 ; GFX9-NEXT: s_waitcnt 347 ; GFX9-NEXT: s_setpc_b64 348 349 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 350 define void @load_private_lo_v2i16_reglo_vreg(i16 addrspace(5)* byval %in, i32 %reg) #0 { 351 entry: 352 %reg.bc = bitcast i32 %reg to <2 x i16> 353 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045 354 %load = load i16, i16 addrspace(5)* %gep 355 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 356 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 357 ret void 358 } 359 360 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg: 361 ; GCN: s_waitcnt 362 ; GFX9: buffer_load_ushort v1, off, s[0:3], s5 offset:4094{{$}} 363 ; GFX9-NEXT: s_waitcnt 364 ; GFX9: v_and_b32 365 ; GFX9: v_lshl_or_b32 366 367 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} 368 ; GFX9-NEXT: s_waitcnt 369 ; GFX9-NEXT: s_setpc_b64 370 371 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 372 define void @load_private_lo_v2i16_reghi_vreg(i16 addrspace(5)* byval %in, i16 %reg) #0 { 373 entry: 374 %gep = getelementptr inbounds i16, i16 addrspace(5)* %in, i64 2045 375 %load = load i16, i16 addrspace(5)* %gep 376 %build0 = insertelement <2 x i16> undef, i16 %reg, i32 1 377 %build1 = insertelement <2 x i16> %build0, i16 %load, i32 0 378 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 379 ret void 380 } 381 382 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg: 383 ; GCN: s_waitcnt 384 ; GFX9: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094{{$}} 385 ; GFX9-NEXT: s_waitcnt 386 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 387 ; GFX9-NEXT: s_waitcnt 388 ; GFX9-NEXT: s_setpc_b64 389 390 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s5 offset:4094{{$}} 391 define void @load_private_lo_v2f16_reglo_vreg(half addrspace(5)* byval %in, i32 %reg) #0 { 392 entry: 393 %reg.bc = bitcast i32 %reg to <2 x half> 394 %gep = getelementptr inbounds half, half addrspace(5)* %in, i64 2045 395 %load = load half, half addrspace(5)* %gep 396 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 397 store <2 x half> %build1, <2 x half> addrspace(1)* undef 398 ret void 399 } 400 401 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff: 402 ; GCN: s_waitcnt 403 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 404 ; GFX9-NEXT: s_waitcnt 405 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 406 ; GFX9-NEXT: s_waitcnt 407 ; GFX9-NEXT: s_setpc_b64 408 409 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 410 define void @load_private_lo_v2i16_reglo_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 411 entry: 412 %reg.bc = bitcast i32 %reg to <2 x i16> 413 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 414 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 415 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 416 ret void 417 } 418 419 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reghi_vreg_nooff: 420 ; GCN: s_waitcnt 421 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 422 ; GFX9-NEXT: s_waitcnt 423 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 424 ; GFX9-NEXT: s_waitcnt 425 ; GFX9-NEXT: s_setpc_b64 426 427 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 428 define void @load_private_lo_v2i16_reghi_vreg_nooff(i16 addrspace(5)* %in, i32 %reg) #0 { 429 entry: 430 %reg.bc = bitcast i32 %reg to <2 x i16> 431 %load = load volatile i16, i16 addrspace(5)* inttoptr (i32 4094 to i16 addrspace(5)*) 432 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 433 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 434 ret void 435 } 436 437 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff: 438 ; GCN: s_waitcnt 439 ; GFX9-NEXT: buffer_load_short_d16 v1, off, s[0:3], s4 offset:4094{{$}} 440 ; GFX9-NEXT: s_waitcnt 441 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 442 ; GFX9-NEXT: s_waitcnt 443 ; GFX9-NEXT: s_setpc_b64 444 445 ; VI: buffer_load_ushort v{{[0-9]+}}, off, s[0:3], s4 offset:4094{{$}} 446 define void @load_private_lo_v2f16_reglo_vreg_nooff(half addrspace(5)* %in, i32 %reg) #0 { 447 entry: 448 %reg.bc = bitcast i32 %reg to <2 x half> 449 %load = load volatile half, half addrspace(5)* inttoptr (i32 4094 to half addrspace(5)*) 450 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 451 store <2 x half> %build1, <2 x half> addrspace(1)* undef 452 ret void 453 } 454 455 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8: 456 ; GCN: s_waitcnt 457 ; GFX9: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095{{$}} 458 ; GFX9-NEXT: s_waitcnt 459 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 460 ; GFX9-NEXT: s_waitcnt 461 ; GFX9-NEXT: s_setpc_b64 462 463 ; VI: buffer_load_ubyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}} 464 define void @load_private_lo_v2i16_reglo_vreg_zexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 { 465 entry: 466 %reg.bc = bitcast i32 %reg to <2 x i16> 467 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091 468 %load = load i8, i8 addrspace(5)* %gep 469 %ext = zext i8 %load to i16 470 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 471 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 472 ret void 473 } 474 475 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8: 476 ; GCN: s_waitcnt 477 ; GFX9: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095{{$}} 478 ; GFX9-NEXT: s_waitcnt 479 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v0 480 ; GFX9-NEXT: s_waitcnt 481 ; GFX9-NEXT: s_setpc_b64 482 483 ; VI: buffer_load_sbyte v{{[0-9]+}}, off, s[0:3], s5 offset:4095{{$}} 484 define void @load_private_lo_v2i16_reglo_vreg_sexti8(i8 addrspace(5)* byval %in, i32 %reg) #0 { 485 entry: 486 %reg.bc = bitcast i32 %reg to <2 x i16> 487 %gep = getelementptr inbounds i8, i8 addrspace(5)* %in, i64 4091 488 %load = load i8, i8 addrspace(5)* %gep 489 %ext = sext i8 %load to i16 490 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 491 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 492 ret void 493 } 494 495 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_zexti8: 496 ; GCN: s_waitcnt 497 ; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 498 ; GFX9-NEXT: s_waitcnt 499 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 500 ; GFX9-NEXT: s_waitcnt 501 ; GFX9-NEXT: s_setpc_b64 502 503 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} 504 define void @load_private_lo_v2i16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 505 entry: 506 %reg.bc = bitcast i32 %reg to <2 x i16> 507 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 508 %ext = zext i8 %load to i16 509 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 510 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 511 ret void 512 } 513 514 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_nooff_sexti8: 515 ; GCN: s_waitcnt 516 ; GFX9-NEXT: buffer_load_sbyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 517 ; GFX9-NEXT: s_waitcnt 518 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 519 ; GFX9-NEXT: s_waitcnt 520 ; GFX9-NEXT: s_setpc_b64 521 522 ; VI: buffer_load_sbyte v0, off, s[0:3], s4 offset:4094{{$}} 523 define void @load_private_lo_v2i16_reglo_vreg_nooff_sexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 524 entry: 525 %reg.bc = bitcast i32 %reg to <2 x i16> 526 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 527 %ext = sext i8 %load to i16 528 %build1 = insertelement <2 x i16> %reg.bc, i16 %ext, i32 0 529 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 530 ret void 531 } 532 533 ; GCN-LABEL: {{^}}load_private_lo_v2f16_reglo_vreg_nooff_zexti8: 534 ; GCN: s_waitcnt 535 ; GFX9-NEXT: buffer_load_ubyte_d16 v1, off, s[0:3], s4 offset:4094{{$}} 536 ; GFX9-NEXT: s_waitcnt 537 ; GFX9-NEXT: global_store_dword v{{\[[0-9]+:[0-9]+\]}}, v1 538 ; GFX9-NEXT: s_waitcnt 539 ; GFX9-NEXT: s_setpc_b64 540 541 ; VI: buffer_load_ubyte v0, off, s[0:3], s4 offset:4094{{$}} 542 define void @load_private_lo_v2f16_reglo_vreg_nooff_zexti8(i8 addrspace(5)* %in, i32 %reg) #0 { 543 entry: 544 %reg.bc = bitcast i32 %reg to <2 x half> 545 %load = load volatile i8, i8 addrspace(5)* inttoptr (i32 4094 to i8 addrspace(5)*) 546 %ext = zext i8 %load to i16 547 %bc.ext = bitcast i16 %ext to half 548 %build1 = insertelement <2 x half> %reg.bc, half %bc.ext, i32 0 549 store <2 x half> %build1, <2 x half> addrspace(1)* undef 550 ret void 551 } 552 553 ; GCN-LABEL: {{^}}load_constant_lo_v2i16_reglo_vreg: 554 ; GCN: s_waitcnt 555 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 556 ; GFX9-NEXT: s_waitcnt 557 ; GFX9-NEXT: global_store_dword 558 ; GFX9-NEXT: s_waitcnt 559 ; GFX9-NEXT: s_setpc_b64 560 561 ; VI: flat_load_ushort 562 define void @load_constant_lo_v2i16_reglo_vreg(i16 addrspace(4)* %in, i32 %reg) #0 { 563 entry: 564 %reg.bc = bitcast i32 %reg to <2 x i16> 565 %gep = getelementptr inbounds i16, i16 addrspace(4)* %in, i64 -2047 566 %load = load i16, i16 addrspace(4)* %gep 567 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 568 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 569 ret void 570 } 571 572 ; GCN-LABEL: load_constant_lo_v2f16_reglo_vreg 573 ; GCN: s_waitcnt 574 ; GFX9-NEXT: global_load_short_d16 v2, v[0:1], off offset:-4094 575 ; GFX9-NEXT: s_waitcnt 576 ; GFX9-NEXT: global_store_dword 577 ; GFX9-NEXT: s_waitcnt 578 ; GFX9-NEXT: s_setpc_b64 579 580 ; VI: flat_load_ushort 581 define void @load_constant_lo_v2f16_reglo_vreg(half addrspace(4)* %in, i32 %reg) #0 { 582 entry: 583 %reg.bc = bitcast i32 %reg to <2 x half> 584 %gep = getelementptr inbounds half, half addrspace(4)* %in, i64 -2047 585 %load = load half, half addrspace(4)* %gep 586 %build1 = insertelement <2 x half> %reg.bc, half %load, i32 0 587 store <2 x half> %build1, <2 x half> addrspace(1)* undef 588 ret void 589 } 590 591 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_to_offset: 592 ; GFX9: buffer_store_dword 593 ; GFX9-NEXT: buffer_load_short_d16 v0, off, s[0:3], s5 offset:4094 594 595 ; VI: buffer_load_ushort v 596 define void @load_private_lo_v2i16_reglo_vreg_to_offset(i32 %reg) #0 { 597 entry: 598 %obj0 = alloca [10 x i32], align 4, addrspace(5) 599 %obj1 = alloca [4096 x i16], align 2, addrspace(5) 600 %reg.bc = bitcast i32 %reg to <2 x i16> 601 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 602 store volatile i32 123, i32 addrspace(5)* %bc 603 %gep = getelementptr inbounds [4096 x i16], [4096 x i16] addrspace(5)* %obj1, i32 0, i32 2025 604 %load = load volatile i16, i16 addrspace(5)* %gep 605 %build1 = insertelement <2 x i16> %reg.bc, i16 %load, i32 0 606 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 607 ret void 608 } 609 610 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_sexti8_to_offset: 611 ; GFX9: buffer_store_dword 612 ; GFX9-NEXT: buffer_load_sbyte_d16 v0, off, s[0:3], s5 offset:4095 613 614 ; VI: buffer_load_sbyte v 615 define void @load_private_lo_v2i16_reglo_vreg_sexti8_to_offset(i32 %reg) #0 { 616 entry: 617 %obj0 = alloca [10 x i32], align 4, addrspace(5) 618 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 619 %reg.bc = bitcast i32 %reg to <2 x i16> 620 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 621 store volatile i32 123, i32 addrspace(5)* %bc 622 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051 623 %load = load volatile i8, i8 addrspace(5)* %gep 624 %load.ext = sext i8 %load to i16 625 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 626 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 627 ret void 628 } 629 630 ; GCN-LABEL: {{^}}load_private_lo_v2i16_reglo_vreg_zexti8_to_offset: 631 ; GFX9: buffer_store_dword 632 ; GFX9-NEXT: buffer_load_ubyte_d16 v0, off, s[0:3], s5 offset:4095 633 634 ; VI: buffer_load_ubyte v 635 define void @load_private_lo_v2i16_reglo_vreg_zexti8_to_offset(i32 %reg) #0 { 636 entry: 637 %obj0 = alloca [10 x i32], align 4, addrspace(5) 638 %obj1 = alloca [4096 x i8], align 2, addrspace(5) 639 %reg.bc = bitcast i32 %reg to <2 x i16> 640 %bc = bitcast [10 x i32] addrspace(5)* %obj0 to i32 addrspace(5)* 641 store volatile i32 123, i32 addrspace(5)* %bc 642 %gep = getelementptr inbounds [4096 x i8], [4096 x i8] addrspace(5)* %obj1, i32 0, i32 4051 643 %load = load volatile i8, i8 addrspace(5)* %gep 644 %load.ext = zext i8 %load to i16 645 %build1 = insertelement <2 x i16> %reg.bc, i16 %load.ext, i32 0 646 store <2 x i16> %build1, <2 x i16> addrspace(1)* undef 647 ret void 648 } 649 650 attributes #0 = { nounwind } 651