1 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s 2 ; RUN: llc -amdgpu-scalarize-global-loads=false -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,SI,FUNC %s 3 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s 4 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 ; RUN: llc -amdgpu-scalarize-global-loads=false -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 6 7 8 ; FUNC-LABEL: {{^}}global_load_i8: 9 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} 10 ; GCN-HSA: flat_load_ubyte 11 12 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 13 ; TODO: NOT AND 14 define amdgpu_kernel void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 15 entry: 16 %ld = load i8, i8 addrspace(1)* %in 17 store i8 %ld, i8 addrspace(1)* %out 18 ret void 19 } 20 21 ; FUNC-LABEL: {{^}}global_load_v2i8: 22 ; GCN-NOHSA: buffer_load_ushort v 23 ; GCN-HSA: flat_load_ushort v 24 25 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 26 define amdgpu_kernel void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 27 entry: 28 %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in 29 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 30 ret void 31 } 32 33 ; FUNC-LABEL: {{^}}global_load_v3i8: 34 ; GCN-NOHSA: buffer_load_dword v 35 ; GCN-HSA: flat_load_dword v 36 37 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 38 define amdgpu_kernel void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 39 entry: 40 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 41 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out 42 ret void 43 } 44 45 ; FUNC-LABEL: {{^}}global_load_v4i8: 46 ; GCN-NOHSA: buffer_load_dword v 47 ; GCN-HSA: flat_load_dword v 48 49 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 50 define amdgpu_kernel void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 51 entry: 52 %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in 53 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out 54 ret void 55 } 56 57 ; FUNC-LABEL: {{^}}global_load_v8i8: 58 ; GCN-NOHSA: buffer_load_dwordx2 59 ; GCN-HSA: flat_load_dwordx2 60 61 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 62 define amdgpu_kernel void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 63 entry: 64 %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in 65 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out 66 ret void 67 } 68 69 ; FUNC-LABEL: {{^}}global_load_v16i8: 70 ; GCN-NOHSA: buffer_load_dwordx4 71 72 ; GCN-HSA: flat_load_dwordx4 73 74 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 75 define amdgpu_kernel void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 76 entry: 77 %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in 78 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out 79 ret void 80 } 81 82 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i32: 83 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, 84 ; GCN-HSA: flat_load_ubyte 85 86 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 87 define amdgpu_kernel void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 88 %a = load i8, i8 addrspace(1)* %in 89 %ext = zext i8 %a to i32 90 store i32 %ext, i32 addrspace(1)* %out 91 ret void 92 } 93 94 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i32: 95 ; GCN-NOHSA: buffer_load_sbyte 96 ; GCN-HSA: flat_load_sbyte 97 98 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 99 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 100 ; EG: 8 101 define amdgpu_kernel void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 102 %ld = load i8, i8 addrspace(1)* %in 103 %ext = sext i8 %ld to i32 104 store i32 %ext, i32 addrspace(1)* %out 105 ret void 106 } 107 108 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32: 109 110 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 111 define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 112 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 113 %ext = zext <1 x i8> %load to <1 x i32> 114 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 115 ret void 116 } 117 118 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32: 119 120 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1 121 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 122 ; EG: 8 123 define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 124 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 125 %ext = sext <1 x i8> %load to <1 x i32> 126 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 127 ret void 128 } 129 130 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32: 131 ; GCN-NOHSA: buffer_load_ushort 132 ; GCN-HSA: flat_load_ushort 133 134 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 135 ; TODO: These should use DST, but for some there are redundant MOVs 136 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal 137 ; EG-DAG: 8 138 define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 139 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 140 %ext = zext <2 x i8> %load to <2 x i32> 141 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 142 ret void 143 } 144 145 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32: 146 ; GCN-NOHSA: buffer_load_ushort 147 ; GCN-HSA: flat_load_ushort 148 149 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 150 ; TODO: These should use DST, but for some there are redundant MOVs 151 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 152 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 153 ; EG-DAG: 8 154 ; EG-DAG: 8 155 define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 156 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 157 %ext = sext <2 x i8> %load to <2 x i32> 158 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 159 ret void 160 } 161 162 ; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32: 163 ; GCN-NOHSA: buffer_load_dword v 164 ; GCN-HSA: flat_load_dword v 165 166 ; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 167 ; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}} 168 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 169 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff, 170 171 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 172 ; TODO: These should use DST, but for some there are redundant MOVs 173 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 174 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 175 ; EG-DAG: 8 176 ; EG-DAG: 8 177 define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 178 entry: 179 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 180 %ext = zext <3 x i8> %ld to <3 x i32> 181 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 182 ret void 183 } 184 185 ; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32: 186 ; GCN-NOHSA: buffer_load_dword v 187 ; GCN-HSA: flat_load_dword v 188 189 ;FIXME: Need to optimize this sequence to avoid extra shift on VI. 190 191 ; t23: i16 = truncate t18 192 ; t49: i16 = srl t23, Constant:i32<8> 193 ; t57: i32 = any_extend t49 194 ; t58: i32 = sign_extend_inreg t57, ValueType:ch:i8 195 196 ; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8 197 ; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}} 198 ; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8 199 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8 200 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8 201 202 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 203 ; TODO: These should use DST, but for some there are redundant MOVs 204 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 205 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 206 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 207 ; EG-DAG: 8 208 ; EG-DAG: 8 209 ; EG-DAG: 8 210 define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 { 211 entry: 212 %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in 213 %ext = sext <3 x i8> %ld to <3 x i32> 214 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 215 ret void 216 } 217 218 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32: 219 ; GCN-NOHSA: buffer_load_dword 220 ; GCN-HSA: flat_load_dword 221 222 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 223 ; TODO: These should use DST, but for some there are redundant MOVs 224 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 225 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 226 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 227 ; EG-DAG: 8 228 ; EG-DAG: 8 229 ; EG-DAG: 8 230 define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 231 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 232 %ext = zext <4 x i8> %load to <4 x i32> 233 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 234 ret void 235 } 236 237 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32: 238 ; GCN-NOHSA: buffer_load_dword 239 ; GCN-HSA: flat_load_dword 240 241 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 242 ; TODO: These should use DST, but for some there are redundant MOVs 243 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 244 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 245 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 246 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 247 ; EG-DAG: 8 248 ; EG-DAG: 8 249 ; EG-DAG: 8 250 ; EG-DAG: 8 251 define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 252 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 253 %ext = sext <4 x i8> %load to <4 x i32> 254 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 255 ret void 256 } 257 258 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32: 259 260 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 261 ; TODO: These should use DST, but for some there are redundant MOVs 262 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 263 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 264 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 265 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 266 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 267 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 268 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 269 ; EG-DAG: 8 270 ; EG-DAG: 8 271 ; EG-DAG: 8 272 ; EG-DAG: 8 273 ; EG-DAG: 8 274 ; EG-DAG: 8 275 ; EG-DAG: 8 276 define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 277 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 278 %ext = zext <8 x i8> %load to <8 x i32> 279 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 280 ret void 281 } 282 283 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32: 284 285 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1 286 ; TODO: These should use DST, but for some there are redundant MOVs 287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 289 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 290 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 291 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 292 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 293 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 294 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 295 ; EG-DAG: 8 296 ; EG-DAG: 8 297 ; EG-DAG: 8 298 ; EG-DAG: 8 299 ; EG-DAG: 8 300 ; EG-DAG: 8 301 ; EG-DAG: 8 302 ; EG-DAG: 8 303 define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 304 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 305 %ext = sext <8 x i8> %load to <8 x i32> 306 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 307 ret void 308 } 309 310 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32: 311 312 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 313 ; TODO: These should use DST, but for some there are redundant MOVs 314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 323 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 324 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 325 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 326 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 327 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 328 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 329 ; EG-DAG: 8 330 ; EG-DAG: 8 331 ; EG-DAG: 8 332 ; EG-DAG: 8 333 ; EG-DAG: 8 334 ; EG-DAG: 8 335 ; EG-DAG: 8 336 ; EG-DAG: 8 337 ; EG-DAG: 8 338 ; EG-DAG: 8 339 ; EG-DAG: 8 340 ; EG-DAG: 8 341 ; EG-DAG: 8 342 ; EG-DAG: 8 343 ; EG-DAG: 8 344 define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 345 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 346 %ext = zext <16 x i8> %load to <16 x i32> 347 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 348 ret void 349 } 350 351 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32: 352 353 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 354 ; TODO: These should use DST, but for some there are redundant MOVs 355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 365 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 366 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 367 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 368 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 369 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 370 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal 371 ; EG-DAG: 8 372 ; EG-DAG: 8 373 ; EG-DAG: 8 374 ; EG-DAG: 8 375 ; EG-DAG: 8 376 ; EG-DAG: 8 377 ; EG-DAG: 8 378 ; EG-DAG: 8 379 ; EG-DAG: 8 380 ; EG-DAG: 8 381 ; EG-DAG: 8 382 ; EG-DAG: 8 383 ; EG-DAG: 8 384 ; EG-DAG: 8 385 ; EG-DAG: 8 386 ; EG-DAG: 8 387 define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 388 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 389 %ext = sext <16 x i8> %load to <16 x i32> 390 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 391 ret void 392 } 393 394 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32: 395 396 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 397 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 398 ; TODO: These should use DST, but for some there are redundant MOVs 399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 423 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 424 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 425 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 426 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 427 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 428 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 429 ; EG-DAG: 8 430 ; EG-DAG: 8 431 ; EG-DAG: 8 432 ; EG-DAG: 8 433 ; EG-DAG: 8 434 ; EG-DAG: 8 435 ; EG-DAG: 8 436 ; EG-DAG: 8 437 ; EG-DAG: 8 438 ; EG-DAG: 8 439 ; EG-DAG: 8 440 ; EG-DAG: 8 441 ; EG-DAG: 8 442 ; EG-DAG: 8 443 ; EG-DAG: 8 444 ; EG-DAG: 8 445 ; EG-DAG: 8 446 ; EG-DAG: 8 447 ; EG-DAG: 8 448 ; EG-DAG: 8 449 ; EG-DAG: 8 450 ; EG-DAG: 8 451 ; EG-DAG: 8 452 ; EG-DAG: 8 453 ; EG-DAG: 8 454 ; EG-DAG: 8 455 ; EG-DAG: 8 456 ; EG-DAG: 8 457 ; EG-DAG: 8 458 ; EG-DAG: 8 459 define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 460 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 461 %ext = zext <32 x i8> %load to <32 x i32> 462 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 463 ret void 464 } 465 466 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32: 467 468 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 469 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1 470 ; TODO: These should use DST, but for some there are redundant MOVs 471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 497 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 498 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 499 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 500 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 501 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 502 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 503 ; EG-DAG: 8 504 ; EG-DAG: 8 505 ; EG-DAG: 8 506 ; EG-DAG: 8 507 ; EG-DAG: 8 508 ; EG-DAG: 8 509 ; EG-DAG: 8 510 ; EG-DAG: 8 511 ; EG-DAG: 8 512 ; EG-DAG: 8 513 ; EG-DAG: 8 514 ; EG-DAG: 8 515 ; EG-DAG: 8 516 ; EG-DAG: 8 517 ; EG-DAG: 8 518 ; EG-DAG: 8 519 ; EG-DAG: 8 520 ; EG-DAG: 8 521 ; EG-DAG: 8 522 ; EG-DAG: 8 523 ; EG-DAG: 8 524 ; EG-DAG: 8 525 ; EG-DAG: 8 526 ; EG-DAG: 8 527 ; EG-DAG: 8 528 ; EG-DAG: 8 529 ; EG-DAG: 8 530 ; EG-DAG: 8 531 ; EG-DAG: 8 532 ; EG-DAG: 8 533 ; EG-DAG: 8 534 ; EG-DAG: 8 535 define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 536 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 537 %ext = sext <32 x i8> %load to <32 x i32> 538 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 539 ret void 540 } 541 542 ; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32: 543 544 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 545 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 546 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 547 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 548 define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 549 %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 550 %ext = zext <64 x i8> %load to <64 x i32> 551 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 552 ret void 553 } 554 555 ; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32: 556 557 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 558 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 559 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 560 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 561 define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 562 %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 563 %ext = sext <64 x i8> %load to <64 x i32> 564 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 565 ret void 566 } 567 568 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i64: 569 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 570 571 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], 572 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 573 574 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], 575 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] 576 577 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 578 ; EG: MOV {{.*}}, 0.0 579 define amdgpu_kernel void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 580 %a = load i8, i8 addrspace(1)* %in 581 %ext = zext i8 %a to i64 582 store i64 %ext, i64 addrspace(1)* %out 583 ret void 584 } 585 586 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i64: 587 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], 588 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], 589 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 590 591 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 592 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 593 594 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 595 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 596 ; TODO: Why not 7 ? 597 ; EG: 31 598 define amdgpu_kernel void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 599 %a = load i8, i8 addrspace(1)* %in 600 %ext = sext i8 %a to i64 601 store i64 %ext, i64 addrspace(1)* %out 602 ret void 603 } 604 605 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64: 606 607 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 608 ; EG: MOV {{.*}}, 0.0 609 define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 610 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 611 %ext = zext <1 x i8> %load to <1 x i64> 612 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 613 ret void 614 } 615 616 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64: 617 618 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 619 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 620 ; TODO: Why not 7 ? 621 ; EG: 31 622 define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 623 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 624 %ext = sext <1 x i8> %load to <1 x i64> 625 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 626 ret void 627 } 628 629 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64: 630 631 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 632 define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 633 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 634 %ext = zext <2 x i8> %load to <2 x i64> 635 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 636 ret void 637 } 638 639 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64: 640 641 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 642 define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 643 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 644 %ext = sext <2 x i8> %load to <2 x i64> 645 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 646 ret void 647 } 648 649 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64: 650 651 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 652 define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 653 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 654 %ext = zext <4 x i8> %load to <4 x i64> 655 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 656 ret void 657 } 658 659 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64: 660 661 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 662 define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 663 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 664 %ext = sext <4 x i8> %load to <4 x i64> 665 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 666 ret void 667 } 668 669 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64: 670 671 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 672 define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 673 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 674 %ext = zext <8 x i8> %load to <8 x i64> 675 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 676 ret void 677 } 678 679 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64: 680 681 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 682 define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 683 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 684 %ext = sext <8 x i8> %load to <8 x i64> 685 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 686 ret void 687 } 688 689 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64: 690 691 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 692 define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 693 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 694 %ext = zext <16 x i8> %load to <16 x i64> 695 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 696 ret void 697 } 698 699 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64: 700 701 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 702 define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 703 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 704 %ext = sext <16 x i8> %load to <16 x i64> 705 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 706 ret void 707 } 708 709 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64: 710 711 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 712 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 713 define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 714 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 715 %ext = zext <32 x i8> %load to <32 x i64> 716 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 717 ret void 718 } 719 720 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64: 721 722 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 723 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 724 define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 725 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 726 %ext = sext <32 x i8> %load to <32 x i64> 727 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 728 ret void 729 } 730 731 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64: 732 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 733 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 734 ; %ext = zext <64 x i8> %load to <64 x i64> 735 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 736 ; ret void 737 ; } 738 739 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64: 740 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 741 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 742 ; %ext = sext <64 x i8> %load to <64 x i64> 743 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 744 ; ret void 745 ; } 746 747 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i16: 748 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], 749 ; GCN-NOHSA: buffer_store_short v[[VAL]] 750 751 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], 752 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 753 754 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 755 define amdgpu_kernel void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 756 %a = load i8, i8 addrspace(1)* %in 757 %ext = zext i8 %a to i16 758 store i16 %ext, i16 addrspace(1)* %out 759 ret void 760 } 761 762 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i16: 763 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], 764 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], 765 766 ; GCN-NOHSA: buffer_store_short v[[VAL]] 767 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 768 769 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 770 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 771 define amdgpu_kernel void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 { 772 %a = load i8, i8 addrspace(1)* %in 773 %ext = sext i8 %a to i16 774 store i16 %ext, i16 addrspace(1)* %out 775 ret void 776 } 777 778 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16: 779 780 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 781 define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 782 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 783 %ext = zext <1 x i8> %load to <1 x i16> 784 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 785 ret void 786 } 787 788 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16: 789 790 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 791 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 792 define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 { 793 %load = load <1 x i8>, <1 x i8> addrspace(1)* %in 794 %ext = sext <1 x i8> %load to <1 x i16> 795 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 796 ret void 797 } 798 799 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16: 800 801 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 802 define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 803 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 804 %ext = zext <2 x i8> %load to <2 x i16> 805 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 806 ret void 807 } 808 809 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16: 810 811 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 812 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 813 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 814 define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 { 815 %load = load <2 x i8>, <2 x i8> addrspace(1)* %in 816 %ext = sext <2 x i8> %load to <2 x i16> 817 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 818 ret void 819 } 820 821 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16: 822 823 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 824 define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 825 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 826 %ext = zext <4 x i8> %load to <4 x i16> 827 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 828 ret void 829 } 830 831 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16: 832 833 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 834 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 835 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 836 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 837 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 838 define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 { 839 %load = load <4 x i8>, <4 x i8> addrspace(1)* %in 840 %ext = sext <4 x i8> %load to <4 x i16> 841 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 842 ret void 843 } 844 845 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16: 846 847 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 848 define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 849 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 850 %ext = zext <8 x i8> %load to <8 x i16> 851 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 852 ret void 853 } 854 855 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16: 856 857 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 858 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 859 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 860 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 861 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 862 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 863 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 864 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 865 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 866 define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 { 867 %load = load <8 x i8>, <8 x i8> addrspace(1)* %in 868 %ext = sext <8 x i8> %load to <8 x i16> 869 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 870 ret void 871 } 872 873 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16: 874 875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 876 define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 877 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 878 %ext = zext <16 x i8> %load to <16 x i16> 879 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 880 ret void 881 } 882 883 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16: 884 885 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 892 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 893 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 894 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 895 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 896 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 897 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 898 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 899 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 900 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 901 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 902 define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 { 903 %load = load <16 x i8>, <16 x i8> addrspace(1)* %in 904 %ext = sext <16 x i8> %load to <16 x i16> 905 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 906 ret void 907 } 908 909 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16: 910 911 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 913 define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 914 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 915 %ext = zext <32 x i8> %load to <32 x i16> 916 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 917 ret void 918 } 919 920 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16: 921 922 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 923 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 946 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 947 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 948 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 949 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 950 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 951 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 952 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 953 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 954 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 955 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 956 define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 { 957 %load = load <32 x i8>, <32 x i8> addrspace(1)* %in 958 %ext = sext <32 x i8> %load to <32 x i16> 959 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 960 ret void 961 } 962 963 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16: 964 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 965 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 966 ; %ext = zext <64 x i8> %load to <64 x i16> 967 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 968 ; ret void 969 ; } 970 971 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16: 972 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 { 973 ; %load = load <64 x i8>, <64 x i8> addrspace(1)* %in 974 ; %ext = sext <64 x i8> %load to <64 x i16> 975 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 976 ; ret void 977 ; } 978 979 attributes #0 = { nounwind } 980