1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SI,SICIVI,FUNC %s 2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,SICIVI,GFX89,FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GFX9,GFX89,FUNC %s 4 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6 ; Testing for ds_read/write_b128 7 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 8 ; RUN: llc -march=amdgcn -mcpu=gfx900 -mattr=+enable-ds128 < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=CIVI,FUNC %s 9 10 ; FUNC-LABEL: {{^}}local_load_i16: 11 ; GFX9-NOT: m0 12 ; SICIVI: s_mov_b32 m0 13 14 ; GCN: ds_read_u16 v{{[0-9]+}} 15 16 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 17 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 18 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 19 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 20 ; EG: LDS_SHORT_WRITE {{\*?}} [[TO]], [[DATA]] 21 define amdgpu_kernel void @local_load_i16(i16 addrspace(3)* %out, i16 addrspace(3)* %in) { 22 entry: 23 %ld = load i16, i16 addrspace(3)* %in 24 store i16 %ld, i16 addrspace(3)* %out 25 ret void 26 } 27 28 ; FUNC-LABEL: {{^}}local_load_v2i16: 29 ; GFX9-NOT: m0 30 ; SICIVI: s_mov_b32 m0 31 32 ; GCN: ds_read_b32 33 34 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 35 ; EG: LDS_READ_RET {{.*}} [[FROM]] 36 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 37 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 38 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 39 define amdgpu_kernel void @local_load_v2i16(<2 x i16> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) { 40 entry: 41 %ld = load <2 x i16>, <2 x i16> addrspace(3)* %in 42 store <2 x i16> %ld, <2 x i16> addrspace(3)* %out 43 ret void 44 } 45 46 ; FUNC-LABEL: {{^}}local_load_v3i16: 47 ; GFX9-NOT: m0 48 ; SICIVI: s_mov_b32 m0 49 50 ; GCN: ds_read_b64 51 ; GCN-DAG: ds_write_b32 52 ; GCN-DAG: ds_write_b16 53 54 ; EG-DAG: LDS_USHORT_READ_RET 55 ; EG-DAG: LDS_READ_RET 56 define amdgpu_kernel void @local_load_v3i16(<3 x i16> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 57 entry: 58 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 59 store <3 x i16> %ld, <3 x i16> addrspace(3)* %out 60 ret void 61 } 62 63 ; FUNC-LABEL: {{^}}local_load_v4i16: 64 ; GFX9-NOT: m0 65 ; SICIVI: s_mov_b32 m0 66 67 ; GCN: ds_read_b64 68 69 ; EG: LDS_READ_RET 70 ; EG: LDS_READ_RET 71 define amdgpu_kernel void @local_load_v4i16(<4 x i16> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) { 72 entry: 73 %ld = load <4 x i16>, <4 x i16> addrspace(3)* %in 74 store <4 x i16> %ld, <4 x i16> addrspace(3)* %out 75 ret void 76 } 77 78 ; FUNC-LABEL: {{^}}local_load_v8i16: 79 ; GFX9-NOT: m0 80 ; SICIVI: s_mov_b32 m0 81 82 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 83 84 ; EG: LDS_READ_RET 85 ; EG: LDS_READ_RET 86 ; EG: LDS_READ_RET 87 ; EG: LDS_READ_RET 88 define amdgpu_kernel void @local_load_v8i16(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 89 entry: 90 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in 91 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out 92 ret void 93 } 94 95 ; FUNC-LABEL: {{^}}local_load_v16i16: 96 ; GFX9-NOT: m0 97 ; SICIVI: s_mov_b32 m0 98 99 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:3{{$}} 100 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:1 offset1:2{{$}} 101 102 103 ; EG: LDS_READ_RET 104 ; EG: LDS_READ_RET 105 ; EG: LDS_READ_RET 106 ; EG: LDS_READ_RET 107 108 ; EG: LDS_READ_RET 109 ; EG: LDS_READ_RET 110 ; EG: LDS_READ_RET 111 ; EG: LDS_READ_RET 112 define amdgpu_kernel void @local_load_v16i16(<16 x i16> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) { 113 entry: 114 %ld = load <16 x i16>, <16 x i16> addrspace(3)* %in 115 store <16 x i16> %ld, <16 x i16> addrspace(3)* %out 116 ret void 117 } 118 119 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i32: 120 ; GFX9-NOT: m0 121 ; SICIVI: s_mov_b32 m0 122 123 ; GCN: ds_read_u16 124 ; GCN: ds_write_b32 125 126 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 127 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 128 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 129 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 130 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 131 define amdgpu_kernel void @local_zextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 132 %a = load i16, i16 addrspace(3)* %in 133 %ext = zext i16 %a to i32 134 store i32 %ext, i32 addrspace(3)* %out 135 ret void 136 } 137 138 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i32: 139 ; GCN-NOT: s_wqm_b64 140 141 ; GFX9-NOT: m0 142 ; SICIVI: s_mov_b32 m0 143 144 ; GCN: ds_read_i16 145 146 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 147 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 148 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 149 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 150 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 151 ; EG: 16 152 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 153 define amdgpu_kernel void @local_sextload_i16_to_i32(i32 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 154 %a = load i16, i16 addrspace(3)* %in 155 %ext = sext i16 %a to i32 156 store i32 %ext, i32 addrspace(3)* %out 157 ret void 158 } 159 160 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i32: 161 ; GFX9-NOT: m0 162 ; SICIVI: s_mov_b32 m0 163 164 ; GCN: ds_read_u16 165 166 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 167 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 168 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 169 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 170 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 171 define amdgpu_kernel void @local_zextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 172 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 173 %ext = zext <1 x i16> %load to <1 x i32> 174 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 175 ret void 176 } 177 178 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i32: 179 ; GFX9-NOT: m0 180 ; SICIVI: s_mov_b32 m0 181 182 ; GCN: ds_read_i16 183 184 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 185 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 186 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 187 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 188 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 189 ; EG: 16 190 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 191 define amdgpu_kernel void @local_sextload_v1i16_to_v1i32(<1 x i32> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 192 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 193 %ext = sext <1 x i16> %load to <1 x i32> 194 store <1 x i32> %ext, <1 x i32> addrspace(3)* %out 195 ret void 196 } 197 198 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i32: 199 ; GCN-NOT: s_wqm_b64 200 ; GFX9-NOT: m0 201 ; SICIVI: s_mov_b32 m0 202 203 ; GCN: ds_read_b32 204 205 ; EG: LDS_READ_RET 206 define amdgpu_kernel void @local_zextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 207 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 208 %ext = zext <2 x i16> %load to <2 x i32> 209 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 210 ret void 211 } 212 213 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i32: 214 ; GCN-NOT: s_wqm_b64 215 ; GFX9-NOT: m0 216 ; SICIVI: s_mov_b32 m0 217 218 ; GCN: ds_read_b32 219 220 ; EG: LDS_READ_RET 221 ; EG: BFE_INT 222 ; EG: BFE_INT 223 define amdgpu_kernel void @local_sextload_v2i16_to_v2i32(<2 x i32> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 224 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 225 %ext = sext <2 x i16> %load to <2 x i32> 226 store <2 x i32> %ext, <2 x i32> addrspace(3)* %out 227 ret void 228 } 229 230 ; FUNC-LABEL: {{^}}local_local_zextload_v3i16_to_v3i32: 231 ; GFX9-NOT: m0 232 ; SICIVI: s_mov_b32 m0 233 234 ; GCN: ds_read_b64 235 ; GCN-DAG: ds_write_b32 236 ; GCN-DAG: ds_write_b64 237 238 ; EG: LDS_READ_RET 239 define amdgpu_kernel void @local_local_zextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 240 entry: 241 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 242 %ext = zext <3 x i16> %ld to <3 x i32> 243 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 244 ret void 245 } 246 247 ; FUNC-LABEL: {{^}}local_local_sextload_v3i16_to_v3i32: 248 ; GFX9-NOT: m0 249 ; SICIVI: s_mov_b32 m0 250 251 ; GCN: ds_read_b64 252 ; GCN-DAG: ds_write_b32 253 ; GCN-DAG: ds_write_b64 254 255 ; EG: LDS_READ_RET 256 ; EG-DAG: BFE_INT 257 ; EG-DAG: BFE_INT 258 ; EG-DAG: BFE_INT 259 define amdgpu_kernel void @local_local_sextload_v3i16_to_v3i32(<3 x i32> addrspace(3)* %out, <3 x i16> addrspace(3)* %in) { 260 entry: 261 %ld = load <3 x i16>, <3 x i16> addrspace(3)* %in 262 %ext = sext <3 x i16> %ld to <3 x i32> 263 store <3 x i32> %ext, <3 x i32> addrspace(3)* %out 264 ret void 265 } 266 267 ; FUNC-LABEL: {{^}}local_local_zextload_v4i16_to_v4i32: 268 ; GCN-NOT: s_wqm_b64 269 ; GFX9-NOT: m0 270 ; SICIVI: s_mov_b32 m0 271 272 ; GCN: ds_read_b64 273 274 ; EG: LDS_READ_RET 275 ; EG: LDS_READ_RET 276 define amdgpu_kernel void @local_local_zextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 277 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 278 %ext = zext <4 x i16> %load to <4 x i32> 279 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 280 ret void 281 } 282 283 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i32: 284 ; GCN-NOT: s_wqm_b64 285 ; GFX9-NOT: m0 286 ; SICIVI: s_mov_b32 m0 287 288 ; GCN: ds_read_b64 289 290 ; EG: LDS_READ_RET 291 ; EG: LDS_READ_RET 292 ; EG-DAG: BFE_INT 293 ; EG-DAG: BFE_INT 294 ; EG-DAG: BFE_INT 295 ; EG-DAG: BFE_INT 296 define amdgpu_kernel void @local_sextload_v4i16_to_v4i32(<4 x i32> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 297 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 298 %ext = sext <4 x i16> %load to <4 x i32> 299 store <4 x i32> %ext, <4 x i32> addrspace(3)* %out 300 ret void 301 } 302 303 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i32: 304 ; GFX9-NOT: m0 305 ; SICIVI: s_mov_b32 m0 306 307 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 308 309 ; EG: LDS_READ_RET 310 ; EG: LDS_READ_RET 311 ; EG: LDS_READ_RET 312 ; EG: LDS_READ_RET 313 define amdgpu_kernel void @local_zextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 314 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 315 %ext = zext <8 x i16> %load to <8 x i32> 316 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 317 ret void 318 } 319 320 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i32: 321 ; GFX9-NOT: m0 322 ; SICIVI: s_mov_b32 m0 323 324 ; GCN: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 325 326 ; EG: LDS_READ_RET 327 ; EG: LDS_READ_RET 328 ; EG: LDS_READ_RET 329 ; EG: LDS_READ_RET 330 ; EG-DAG: BFE_INT 331 ; EG-DAG: BFE_INT 332 ; EG-DAG: BFE_INT 333 ; EG-DAG: BFE_INT 334 ; EG-DAG: BFE_INT 335 ; EG-DAG: BFE_INT 336 ; EG-DAG: BFE_INT 337 ; EG-DAG: BFE_INT 338 define amdgpu_kernel void @local_sextload_v8i16_to_v8i32(<8 x i32> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 339 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 340 %ext = sext <8 x i16> %load to <8 x i32> 341 store <8 x i32> %ext, <8 x i32> addrspace(3)* %out 342 ret void 343 } 344 345 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i32: 346 ; GFX9-NOT: m0 347 ; SICIVI: s_mov_b32 m0 348 349 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 350 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 351 352 ; GCN: ds_write2_b64 353 ; GCN: ds_write2_b64 354 ; GCN: ds_write2_b64 355 ; GCN: ds_write2_b64 356 357 ; EG: LDS_READ_RET 358 ; EG: LDS_READ_RET 359 ; EG: LDS_READ_RET 360 ; EG: LDS_READ_RET 361 ; EG: LDS_READ_RET 362 ; EG: LDS_READ_RET 363 ; EG: LDS_READ_RET 364 ; EG: LDS_READ_RET 365 define amdgpu_kernel void @local_zextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 366 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 367 %ext = zext <16 x i16> %load to <16 x i32> 368 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 369 ret void 370 } 371 372 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i32: 373 ; GFX9-NOT: m0 374 ; SICIVI: s_mov_b32 m0 375 376 377 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 378 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 379 380 ; EG: LDS_READ_RET 381 ; EG: LDS_READ_RET 382 ; EG: LDS_READ_RET 383 ; EG: LDS_READ_RET 384 ; EG: LDS_READ_RET 385 ; EG: LDS_READ_RET 386 ; EG: LDS_READ_RET 387 ; EG: LDS_READ_RET 388 ; EG-DAG: BFE_INT 389 ; EG-DAG: BFE_INT 390 ; EG-DAG: BFE_INT 391 ; EG-DAG: BFE_INT 392 ; EG-DAG: BFE_INT 393 ; EG-DAG: BFE_INT 394 ; EG-DAG: BFE_INT 395 ; EG-DAG: BFE_INT 396 ; EG-DAG: BFE_INT 397 ; EG-DAG: BFE_INT 398 ; EG-DAG: BFE_INT 399 ; EG-DAG: BFE_INT 400 ; EG-DAG: BFE_INT 401 ; EG-DAG: BFE_INT 402 ; EG-DAG: BFE_INT 403 ; EG-DAG: BFE_INT 404 define amdgpu_kernel void @local_sextload_v16i16_to_v16i32(<16 x i32> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 405 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 406 %ext = sext <16 x i16> %load to <16 x i32> 407 store <16 x i32> %ext, <16 x i32> addrspace(3)* %out 408 ret void 409 } 410 411 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i32: 412 ; GFX9-NOT: m0 413 ; SICIVI: s_mov_b32 m0 414 415 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 416 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 417 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 418 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 419 420 ; EG: LDS_READ_RET 421 ; EG: LDS_READ_RET 422 ; EG: LDS_READ_RET 423 ; EG: LDS_READ_RET 424 ; EG: LDS_READ_RET 425 ; EG: LDS_READ_RET 426 ; EG: LDS_READ_RET 427 ; EG: LDS_READ_RET 428 ; EG: LDS_READ_RET 429 ; EG: LDS_READ_RET 430 ; EG: LDS_READ_RET 431 ; EG: LDS_READ_RET 432 ; EG: LDS_READ_RET 433 ; EG: LDS_READ_RET 434 ; EG: LDS_READ_RET 435 ; EG: LDS_READ_RET 436 define amdgpu_kernel void @local_zextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 437 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 438 %ext = zext <32 x i16> %load to <32 x i32> 439 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 440 ret void 441 } 442 443 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i32: 444 ; GFX9-NOT: m0 445 ; SICIVI: s_mov_b32 m0 446 447 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3{{$}} 448 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 449 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 450 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 451 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 452 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 453 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 454 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 455 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 456 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 457 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 458 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 459 460 ; EG: LDS_READ_RET 461 ; EG: LDS_READ_RET 462 ; EG: LDS_READ_RET 463 ; EG: LDS_READ_RET 464 ; EG: LDS_READ_RET 465 ; EG: LDS_READ_RET 466 ; EG: LDS_READ_RET 467 ; EG: LDS_READ_RET 468 ; EG: LDS_READ_RET 469 ; EG: LDS_READ_RET 470 ; EG: LDS_READ_RET 471 ; EG: LDS_READ_RET 472 ; EG: LDS_READ_RET 473 ; EG: LDS_READ_RET 474 ; EG: LDS_READ_RET 475 ; EG: LDS_READ_RET 476 define amdgpu_kernel void @local_sextload_v32i16_to_v32i32(<32 x i32> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 477 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 478 %ext = sext <32 x i16> %load to <32 x i32> 479 store <32 x i32> %ext, <32 x i32> addrspace(3)* %out 480 ret void 481 } 482 483 ; FUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i32: 484 ; GFX9-NOT: m0 485 ; SICIVI: s_mov_b32 m0 486 487 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:14 offset1:15 488 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset1:1{{$}} 489 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:2 offset1:3 490 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:4 offset1:5 491 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:6 offset1:7 492 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:8 offset1:9 493 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:12 offset1:13 494 ; GCN-DAG: ds_read2_b64 v{{\[[0-9]+:[0-9]+\]}}, v{{[0-9]+}} offset0:10 offset1:11 495 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:30 offset1:31 496 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:28 offset1:29 497 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:26 offset1:27 498 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:24 offset1:25 499 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:22 offset1:23 500 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:20 offset1:21 501 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:18 offset1:19 502 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:16 offset1:17 503 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:14 offset1:15 504 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:12 offset1:13 505 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:10 offset1:11 506 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:8 offset1:9 507 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:6 offset1:7 508 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:4 offset1:5 509 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset0:2 offset1:3 510 ; GCN-DAG: ds_write2_b64 v{{[0-9]+}}, v{{\[[0-9]+:[0-9]+\]}}, v{{\[[0-9]+:[0-9]+\]}} offset1:1 511 512 ; EG: LDS_READ_RET 513 ; EG: LDS_READ_RET 514 ; EG: LDS_READ_RET 515 ; EG: LDS_READ_RET 516 ; EG: LDS_READ_RET 517 ; EG: LDS_READ_RET 518 ; EG: LDS_READ_RET 519 ; EG: LDS_READ_RET 520 ; EG: LDS_READ_RET 521 ; EG: LDS_READ_RET 522 ; EG: LDS_READ_RET 523 ; EG: LDS_READ_RET 524 ; EG: LDS_READ_RET 525 ; EG: LDS_READ_RET 526 ; EG: LDS_READ_RET 527 ; EG: LDS_READ_RET 528 ; EG: LDS_READ_RET 529 ; EG: LDS_READ_RET 530 ; EG: LDS_READ_RET 531 ; EG: LDS_READ_RET 532 ; EG: LDS_READ_RET 533 ; EG: LDS_READ_RET 534 ; EG: LDS_READ_RET 535 ; EG: LDS_READ_RET 536 ; EG: LDS_READ_RET 537 ; EG: LDS_READ_RET 538 ; EG: LDS_READ_RET 539 ; EG: LDS_READ_RET 540 ; EG: LDS_READ_RET 541 ; EG: LDS_READ_RET 542 ; EG: LDS_READ_RET 543 ; EG: LDS_READ_RET 544 define amdgpu_kernel void @local_zextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 545 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 546 %ext = zext <64 x i16> %load to <64 x i32> 547 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 548 ret void 549 } 550 551 ; FUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i32: 552 ; GFX9-NOT: m0 553 ; SICIVI: s_mov_b32 m0 554 555 ; EG: LDS_READ_RET 556 ; EG: LDS_READ_RET 557 ; EG: LDS_READ_RET 558 ; EG: LDS_READ_RET 559 ; EG: LDS_READ_RET 560 ; EG: LDS_READ_RET 561 ; EG: LDS_READ_RET 562 ; EG: LDS_READ_RET 563 ; EG: LDS_READ_RET 564 ; EG: LDS_READ_RET 565 ; EG: LDS_READ_RET 566 ; EG: LDS_READ_RET 567 ; EG: LDS_READ_RET 568 ; EG: LDS_READ_RET 569 ; EG: LDS_READ_RET 570 ; EG: LDS_READ_RET 571 ; EG: LDS_READ_RET 572 ; EG: LDS_READ_RET 573 ; EG: LDS_READ_RET 574 ; EG: LDS_READ_RET 575 ; EG: LDS_READ_RET 576 ; EG: LDS_READ_RET 577 ; EG: LDS_READ_RET 578 ; EG: LDS_READ_RET 579 ; EG: LDS_READ_RET 580 ; EG: LDS_READ_RET 581 ; EG: LDS_READ_RET 582 ; EG: LDS_READ_RET 583 ; EG: LDS_READ_RET 584 ; EG: LDS_READ_RET 585 ; EG: LDS_READ_RET 586 ; EG: LDS_READ_RET 587 define amdgpu_kernel void @local_sextload_v64i16_to_v64i32(<64 x i32> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 588 %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 589 %ext = sext <64 x i16> %load to <64 x i32> 590 store <64 x i32> %ext, <64 x i32> addrspace(3)* %out 591 ret void 592 } 593 594 ; FUNC-LABEL: {{^}}local_zextload_i16_to_i64: 595 ; GFX9-NOT: m0 596 ; SICIVI: s_mov_b32 m0 597 598 ; GCN-DAG: ds_read_u16 v[[LO:[0-9]+]], 599 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 600 601 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 602 603 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 604 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 605 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 606 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 607 ; EG-DAG: LDS_WRITE 608 define amdgpu_kernel void @local_zextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 609 %a = load i16, i16 addrspace(3)* %in 610 %ext = zext i16 %a to i64 611 store i64 %ext, i64 addrspace(3)* %out 612 ret void 613 } 614 615 ; FUNC-LABEL: {{^}}local_sextload_i16_to_i64: 616 ; GFX9-NOT: m0 617 ; SICIVI: s_mov_b32 m0 618 619 ; FIXME: Need to optimize this sequence to avoid an extra shift. 620 ; t25: i32,ch = load<LD2[%in(addrspace=3)], anyext from i16> t12, t10, undef:i32 621 ; t28: i64 = any_extend t25 622 ; t30: i64 = sign_extend_inreg t28, ValueType:ch:i16 623 ; SI: ds_read_i16 v[[LO:[0-9]+]], 624 ; GFX89: ds_read_u16 v[[ULO:[0-9]+]] 625 ; GFX89: v_bfe_i32 v[[LO:[0-9]+]], v[[ULO]], 0, 16 626 ; GCN-DAG: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 627 628 ; GCN: ds_write_b64 v{{[0-9]+}}, v{{\[}}[[LO]]:[[HI]]] 629 630 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 631 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 632 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 633 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 634 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 635 ; EG-DAG: LDS_WRITE 636 ; EG-DAG: 16 637 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 638 define amdgpu_kernel void @local_sextload_i16_to_i64(i64 addrspace(3)* %out, i16 addrspace(3)* %in) #0 { 639 %a = load i16, i16 addrspace(3)* %in 640 %ext = sext i16 %a to i64 641 store i64 %ext, i64 addrspace(3)* %out 642 ret void 643 } 644 645 ; FUNC-LABEL: {{^}}local_zextload_v1i16_to_v1i64: 646 ; GFX9-NOT: m0 647 ; SICIVI: s_mov_b32 m0 648 649 650 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 651 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 652 ; EG-DAG: MOV {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], OQAP 653 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 654 ; EG-DAG: LDS_WRITE 655 define amdgpu_kernel void @local_zextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 656 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 657 %ext = zext <1 x i16> %load to <1 x i64> 658 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 659 ret void 660 } 661 662 ; FUNC-LABEL: {{^}}local_sextload_v1i16_to_v1i64: 663 ; GFX9-NOT: m0 664 ; SICIVI: s_mov_b32 m0 665 666 667 ; EG: MOV {{[* ]*}}[[FROM:T[0-9]+\.[XYZW]]], KC0[2].Z 668 ; EG: LDS_USHORT_READ_RET {{.*}} [[FROM]] 669 ; EG-DAG: MOV {{[* ]*}}[[TMP:T[0-9]+\.[XYZW]]], OQAP 670 ; EG-DAG: MOV {{[* ]*}}[[TO:T[0-9]+\.[XYZW]]], KC0[2].Y 671 ; EG-DAG: BFE_INT {{[* ]*}}[[DATA:T[0-9]+\.[XYZW]]], {{.*}}, 0.0, literal 672 ; EG-DAG: LDS_WRITE 673 ; EG-DAG: 16 674 ; EG: LDS_WRITE {{\*?}} [[TO]], [[DATA]] 675 define amdgpu_kernel void @local_sextload_v1i16_to_v1i64(<1 x i64> addrspace(3)* %out, <1 x i16> addrspace(3)* %in) #0 { 676 %load = load <1 x i16>, <1 x i16> addrspace(3)* %in 677 %ext = sext <1 x i16> %load to <1 x i64> 678 store <1 x i64> %ext, <1 x i64> addrspace(3)* %out 679 ret void 680 } 681 682 ; FUNC-LABEL: {{^}}local_zextload_v2i16_to_v2i64: 683 ; GFX9-NOT: m0 684 ; SICIVI: s_mov_b32 m0 685 686 687 ; EG: LDS_READ_RET 688 define amdgpu_kernel void @local_zextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 689 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 690 %ext = zext <2 x i16> %load to <2 x i64> 691 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 692 ret void 693 } 694 695 ; FUNC-LABEL: {{^}}local_sextload_v2i16_to_v2i64: 696 ; GFX9-NOT: m0 697 ; SICIVI: s_mov_b32 m0 698 699 700 ; EG: LDS_READ_RET 701 ; EG-DAG: BFE_INT 702 ; EG-DAG: ASHR 703 define amdgpu_kernel void @local_sextload_v2i16_to_v2i64(<2 x i64> addrspace(3)* %out, <2 x i16> addrspace(3)* %in) #0 { 704 %load = load <2 x i16>, <2 x i16> addrspace(3)* %in 705 %ext = sext <2 x i16> %load to <2 x i64> 706 store <2 x i64> %ext, <2 x i64> addrspace(3)* %out 707 ret void 708 } 709 710 ; FUNC-LABEL: {{^}}local_zextload_v4i16_to_v4i64: 711 ; GFX9-NOT: m0 712 ; SICIVI: s_mov_b32 m0 713 714 715 ; EG: LDS_READ_RET 716 ; EG: LDS_READ_RET 717 define amdgpu_kernel void @local_zextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 718 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 719 %ext = zext <4 x i16> %load to <4 x i64> 720 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 721 ret void 722 } 723 724 ; FUNC-LABEL: {{^}}local_sextload_v4i16_to_v4i64: 725 ; GFX9-NOT: m0 726 ; SICIVI: s_mov_b32 m0 727 728 729 ; EG: LDS_READ_RET 730 ; EG: LDS_READ_RET 731 ; EG-DAG: BFE_INT 732 ; EG-DAG: BFE_INT 733 ; EG-DAG: ASHR 734 ; EG-DAG: ASHR 735 define amdgpu_kernel void @local_sextload_v4i16_to_v4i64(<4 x i64> addrspace(3)* %out, <4 x i16> addrspace(3)* %in) #0 { 736 %load = load <4 x i16>, <4 x i16> addrspace(3)* %in 737 %ext = sext <4 x i16> %load to <4 x i64> 738 store <4 x i64> %ext, <4 x i64> addrspace(3)* %out 739 ret void 740 } 741 742 ; FUNC-LABEL: {{^}}local_zextload_v8i16_to_v8i64: 743 ; GFX9-NOT: m0 744 ; SICIVI: s_mov_b32 m0 745 746 747 ; EG: LDS_READ_RET 748 ; EG: LDS_READ_RET 749 ; EG: LDS_READ_RET 750 ; EG: LDS_READ_RET 751 define amdgpu_kernel void @local_zextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 752 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 753 %ext = zext <8 x i16> %load to <8 x i64> 754 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 755 ret void 756 } 757 758 ; FUNC-LABEL: {{^}}local_sextload_v8i16_to_v8i64: 759 ; GFX9-NOT: m0 760 ; SICIVI: s_mov_b32 m0 761 762 763 ; EG: LDS_READ_RET 764 ; EG: LDS_READ_RET 765 ; EG: LDS_READ_RET 766 ; EG: LDS_READ_RET 767 ; EG-DAG: BFE_INT 768 ; EG-DAG: BFE_INT 769 ; EG-DAG: ASHR 770 ; EG-DAG: ASHR 771 ; EG-DAG: BFE_INT 772 ; EG-DAG: BFE_INT 773 ; EG-DAG: ASHR 774 ; EG-DAG: ASHR 775 define amdgpu_kernel void @local_sextload_v8i16_to_v8i64(<8 x i64> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) #0 { 776 %load = load <8 x i16>, <8 x i16> addrspace(3)* %in 777 %ext = sext <8 x i16> %load to <8 x i64> 778 store <8 x i64> %ext, <8 x i64> addrspace(3)* %out 779 ret void 780 } 781 782 ; FUNC-LABEL: {{^}}local_zextload_v16i16_to_v16i64: 783 ; GFX9-NOT: m0 784 ; SICIVI: s_mov_b32 m0 785 786 787 ; EG: LDS_READ_RET 788 ; EG: LDS_READ_RET 789 ; EG: LDS_READ_RET 790 ; EG: LDS_READ_RET 791 ; EG: LDS_READ_RET 792 ; EG: LDS_READ_RET 793 ; EG: LDS_READ_RET 794 ; EG: LDS_READ_RET 795 define amdgpu_kernel void @local_zextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 796 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 797 %ext = zext <16 x i16> %load to <16 x i64> 798 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 799 ret void 800 } 801 802 ; FUNC-LABEL: {{^}}local_sextload_v16i16_to_v16i64: 803 ; GFX9-NOT: m0 804 ; SICIVI: s_mov_b32 m0 805 806 807 ; EG: LDS_READ_RET 808 ; EG: LDS_READ_RET 809 ; EG: LDS_READ_RET 810 ; EG: LDS_READ_RET 811 ; EG: LDS_READ_RET 812 ; EG: LDS_READ_RET 813 ; EG: LDS_READ_RET 814 ; EG: LDS_READ_RET 815 ; EG-DAG: BFE_INT 816 ; EG-DAG: BFE_INT 817 ; EG-DAG: ASHR 818 ; EG-DAG: ASHR 819 ; EG-DAG: BFE_INT 820 ; EG-DAG: BFE_INT 821 ; EG-DAG: ASHR 822 ; EG-DAG: ASHR 823 ; EG-DAG: BFE_INT 824 ; EG-DAG: BFE_INT 825 ; EG-DAG: ASHR 826 ; EG-DAG: ASHR 827 ; EG-DAG: BFE_INT 828 ; EG-DAG: BFE_INT 829 ; EG-DAG: ASHR 830 ; EG-DAG: ASHR 831 define amdgpu_kernel void @local_sextload_v16i16_to_v16i64(<16 x i64> addrspace(3)* %out, <16 x i16> addrspace(3)* %in) #0 { 832 %load = load <16 x i16>, <16 x i16> addrspace(3)* %in 833 %ext = sext <16 x i16> %load to <16 x i64> 834 store <16 x i64> %ext, <16 x i64> addrspace(3)* %out 835 ret void 836 } 837 838 ; FUNC-LABEL: {{^}}local_zextload_v32i16_to_v32i64: 839 ; GFX9-NOT: m0 840 ; SICIVI: s_mov_b32 m0 841 842 843 ; EG: LDS_READ_RET 844 ; EG: LDS_READ_RET 845 ; EG: LDS_READ_RET 846 ; EG: LDS_READ_RET 847 ; EG: LDS_READ_RET 848 ; EG: LDS_READ_RET 849 ; EG: LDS_READ_RET 850 ; EG: LDS_READ_RET 851 ; EG: LDS_READ_RET 852 ; EG: LDS_READ_RET 853 ; EG: LDS_READ_RET 854 ; EG: LDS_READ_RET 855 ; EG: LDS_READ_RET 856 ; EG: LDS_READ_RET 857 ; EG: LDS_READ_RET 858 ; EG: LDS_READ_RET 859 define amdgpu_kernel void @local_zextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 860 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 861 %ext = zext <32 x i16> %load to <32 x i64> 862 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 863 ret void 864 } 865 866 ; FUNC-LABEL: {{^}}local_sextload_v32i16_to_v32i64: 867 ; GFX9-NOT: m0 868 ; SICIVI: s_mov_b32 m0 869 870 871 ; EG: LDS_READ_RET 872 ; EG: LDS_READ_RET 873 ; EG: LDS_READ_RET 874 ; EG: LDS_READ_RET 875 ; EG: LDS_READ_RET 876 ; EG: LDS_READ_RET 877 ; EG: LDS_READ_RET 878 ; EG: LDS_READ_RET 879 ; EG: LDS_READ_RET 880 ; EG: LDS_READ_RET 881 ; EG: LDS_READ_RET 882 ; EG: LDS_READ_RET 883 ; EG: LDS_READ_RET 884 ; EG: LDS_READ_RET 885 ; EG: LDS_READ_RET 886 ; EG: LDS_READ_RET 887 ; EG-DAG: BFE_INT 888 ; EG-DAG: BFE_INT 889 ; EG-DAG: ASHR 890 ; EG-DAG: ASHR 891 ; EG-DAG: BFE_INT 892 ; EG-DAG: BFE_INT 893 ; EG-DAG: ASHR 894 ; EG-DAG: ASHR 895 ; EG-DAG: BFE_INT 896 ; EG-DAG: BFE_INT 897 ; EG-DAG: ASHR 898 ; EG-DAG: ASHR 899 ; EG-DAG: BFE_INT 900 ; EG-DAG: BFE_INT 901 ; EG-DAG: ASHR 902 ; EG-DAG: ASHR 903 ; EG-DAG: BFE_INT 904 ; EG-DAG: BFE_INT 905 ; EG-DAG: ASHR 906 ; EG-DAG: ASHR 907 ; EG-DAG: BFE_INT 908 ; EG-DAG: BFE_INT 909 ; EG-DAG: ASHR 910 ; EG-DAG: ASHR 911 ; EG-DAG: BFE_INT 912 ; EG-DAG: BFE_INT 913 ; EG-DAG: ASHR 914 ; EG-DAG: ASHR 915 ; EG-DAG: BFE_INT 916 ; EG-DAG: BFE_INT 917 ; EG-DAG: ASHR 918 ; EG-DAG: ASHR 919 define amdgpu_kernel void @local_sextload_v32i16_to_v32i64(<32 x i64> addrspace(3)* %out, <32 x i16> addrspace(3)* %in) #0 { 920 %load = load <32 x i16>, <32 x i16> addrspace(3)* %in 921 %ext = sext <32 x i16> %load to <32 x i64> 922 store <32 x i64> %ext, <32 x i64> addrspace(3)* %out 923 ret void 924 } 925 926 ; ; XFUNC-LABEL: {{^}}local_zextload_v64i16_to_v64i64: 927 ; define amdgpu_kernel void @local_zextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 928 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 929 ; %ext = zext <64 x i16> %load to <64 x i64> 930 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 931 ; ret void 932 ; } 933 934 ; ; XFUNC-LABEL: {{^}}local_sextload_v64i16_to_v64i64: 935 ; define amdgpu_kernel void @local_sextload_v64i16_to_v64i64(<64 x i64> addrspace(3)* %out, <64 x i16> addrspace(3)* %in) #0 { 936 ; %load = load <64 x i16>, <64 x i16> addrspace(3)* %in 937 ; %ext = sext <64 x i16> %load to <64 x i64> 938 ; store <64 x i64> %ext, <64 x i64> addrspace(3)* %out 939 ; ret void 940 ; } 941 942 ; Tests if ds_read/write_b128 gets generated for the 16 byte aligned load. 943 ; FUNC-LABEL: {{^}}local_v8i16_to_128: 944 945 ; SI-NOT: ds_read_b128 946 ; SI-NOT: ds_write_b128 947 948 ; CIVI: ds_read_b128 949 ; CIVI: ds_write_b128 950 951 ; EG: LDS_READ_RET 952 ; EG: LDS_READ_RET 953 ; EG: LDS_READ_RET 954 ; EG: LDS_READ_RET 955 define amdgpu_kernel void @local_v8i16_to_128(<8 x i16> addrspace(3)* %out, <8 x i16> addrspace(3)* %in) { 956 %ld = load <8 x i16>, <8 x i16> addrspace(3)* %in, align 16 957 store <8 x i16> %ld, <8 x i16> addrspace(3)* %out, align 16 958 ret void 959 } 960 961 attributes #0 = { nounwind } 962