1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s 3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s 4 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s 5 6 7 ; FUNC-LABEL: {{^}}constant_load_i8: 8 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}} 9 ; GCN-HSA: flat_load_ubyte 10 11 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 12 ; TODO: NOT AND 13 define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 14 entry: 15 %ld = load i8, i8 addrspace(4)* %in 16 store i8 %ld, i8 addrspace(1)* %out 17 ret void 18 } 19 20 ; FUNC-LABEL: {{^}}constant_load_v2i8: 21 ; GCN-NOHSA: buffer_load_ushort v 22 ; GCN-HSA: flat_load_ushort v 23 24 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 25 define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 26 entry: 27 %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in 28 store <2 x i8> %ld, <2 x i8> addrspace(1)* %out 29 ret void 30 } 31 32 ; FUNC-LABEL: {{^}}constant_load_v3i8: 33 ; GCN: s_load_dword s 34 35 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 36 define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 37 entry: 38 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 39 store <3 x i8> %ld, <3 x i8> addrspace(1)* %out 40 ret void 41 } 42 43 ; FUNC-LABEL: {{^}}constant_load_v4i8: 44 ; GCN: s_load_dword s 45 46 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 47 define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 48 entry: 49 %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in 50 store <4 x i8> %ld, <4 x i8> addrspace(1)* %out 51 ret void 52 } 53 54 ; FUNC-LABEL: {{^}}constant_load_v8i8: 55 ; GCN: s_load_dwordx2 56 57 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 58 define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 59 entry: 60 %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in 61 store <8 x i8> %ld, <8 x i8> addrspace(1)* %out 62 ret void 63 } 64 65 ; FUNC-LABEL: {{^}}constant_load_v16i8: 66 ; GCN: s_load_dwordx4 67 68 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 69 define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 70 entry: 71 %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in 72 store <16 x i8> %ld, <16 x i8> addrspace(1)* %out 73 ret void 74 } 75 76 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32: 77 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}, 78 ; GCN-HSA: flat_load_ubyte 79 80 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 81 define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 82 %a = load i8, i8 addrspace(4)* %in 83 %ext = zext i8 %a to i32 84 store i32 %ext, i32 addrspace(1)* %out 85 ret void 86 } 87 88 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32: 89 ; GCN-NOHSA: buffer_load_sbyte 90 ; GCN-HSA: flat_load_sbyte 91 92 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 93 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 94 ; EG: 8 95 define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 96 %ld = load i8, i8 addrspace(4)* %in 97 %ext = sext i8 %ld to i32 98 store i32 %ext, i32 addrspace(1)* %out 99 ret void 100 } 101 102 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32: 103 104 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 105 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 106 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 107 %ext = zext <1 x i8> %load to <1 x i32> 108 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 109 ret void 110 } 111 112 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32: 113 114 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 115 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal 116 ; EG: 8 117 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 118 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 119 %ext = sext <1 x i8> %load to <1 x i32> 120 store <1 x i32> %ext, <1 x i32> addrspace(1)* %out 121 ret void 122 } 123 124 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32: 125 ; GCN-NOHSA: buffer_load_ushort 126 ; GCN-HSA: flat_load_ushort 127 128 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 129 ; TODO: This should use DST, but for some there are redundant MOVs 130 ; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 131 ; EG: 8 132 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 133 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 134 %ext = zext <2 x i8> %load to <2 x i32> 135 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 136 ret void 137 } 138 139 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32: 140 ; GCN-NOHSA: buffer_load_ushort 141 142 ; GCN-HSA: flat_load_ushort 143 144 ; GCN: v_bfe_i32 145 ; GCN: v_bfe_i32 146 147 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 148 ; TODO: These should use DST, but for some there are redundant MOVs 149 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 150 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 151 ; EG-DAG: 8 152 ; EG-DAG: 8 153 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 154 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 155 %ext = sext <2 x i8> %load to <2 x i32> 156 store <2 x i32> %ext, <2 x i32> addrspace(1)* %out 157 ret void 158 } 159 160 ; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32: 161 ; GCN: s_load_dword s 162 163 ; GCN-DAG: s_bfe_u32 164 ; GCN-DAG: s_bfe_u32 165 ; GCN-DAG: s_and_b32 166 167 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 168 ; TODO: These should use DST, but for some there are redundant MOVs 169 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 170 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 171 ; EG-DAG: 8 172 ; EG-DAG: 8 173 define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 174 entry: 175 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 176 %ext = zext <3 x i8> %ld to <3 x i32> 177 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 178 ret void 179 } 180 181 ; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32: 182 ; GCN: s_load_dword s 183 184 ; GCN-DAG: s_bfe_i32 185 ; GCN-DAG: s_bfe_i32 186 ; GCN-DAG: s_bfe_i32 187 188 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 189 ; TODO: These should use DST, but for some there are redundant MOVs 190 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 191 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 192 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 193 ; EG-DAG: 8 194 ; EG-DAG: 8 195 ; EG-DAG: 8 196 define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 { 197 entry: 198 %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in 199 %ext = sext <3 x i8> %ld to <3 x i32> 200 store <3 x i32> %ext, <3 x i32> addrspace(1)* %out 201 ret void 202 } 203 204 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32: 205 ; GCN: s_load_dword s 206 ; GCN-DAG: s_and_b32 207 ; GCN-DAG: s_lshr_b32 208 209 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 210 ; TODO: These should use DST, but for some there are redundant MOVs 211 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 212 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 213 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 214 ; EG-DAG: 8 215 ; EG-DAG: 8 216 ; EG-DAG: 8 217 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 218 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 219 %ext = zext <4 x i8> %load to <4 x i32> 220 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 221 ret void 222 } 223 224 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32: 225 ; GCN: s_load_dword s 226 ; GCN-DAG: s_sext_i32_i8 227 ; GCN-DAG: s_ashr_i32 228 229 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1 230 ; TODO: These should use DST, but for some there are redundant MOVs 231 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 232 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 233 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 234 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 235 ; EG-DAG: 8 236 ; EG-DAG: 8 237 ; EG-DAG: 8 238 ; EG-DAG: 8 239 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 240 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 241 %ext = sext <4 x i8> %load to <4 x i32> 242 store <4 x i32> %ext, <4 x i32> addrspace(1)* %out 243 ret void 244 } 245 246 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32: 247 ; GCN: s_load_dwordx2 248 ; GCN-DAG: s_and_b32 249 ; GCN-DAG: s_lshr_b32 250 251 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 252 ; TODO: These should use DST, but for some there are redundant MOVs 253 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 254 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 255 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 256 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 257 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 258 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 259 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 260 ; EG-DAG: 8 261 ; EG-DAG: 8 262 ; EG-DAG: 8 263 ; EG-DAG: 8 264 ; EG-DAG: 8 265 ; EG-DAG: 8 266 ; EG-DAG: 8 267 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 268 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 269 %ext = zext <8 x i8> %load to <8 x i32> 270 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 271 ret void 272 } 273 274 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32: 275 ; GCN: s_load_dwordx2 276 ; GCN-DAG: s_ashr_i32 277 ; GCN-DAG: s_sext_i32_i8 278 279 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1 280 ; TODO: These should use DST, but for some there are redundant MOVs 281 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 282 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 283 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 284 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 285 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 286 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 289 ; EG-DAG: 8 290 ; EG-DAG: 8 291 ; EG-DAG: 8 292 ; EG-DAG: 8 293 ; EG-DAG: 8 294 ; EG-DAG: 8 295 ; EG-DAG: 8 296 ; EG-DAG: 8 297 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 298 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 299 %ext = sext <8 x i8> %load to <8 x i32> 300 store <8 x i32> %ext, <8 x i32> addrspace(1)* %out 301 ret void 302 } 303 304 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32: 305 306 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 307 ; TODO: These should use DST, but for some there are redundant MOVs 308 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 309 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 310 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 311 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 312 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 313 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal 323 ; EG-DAG: 8 324 ; EG-DAG: 8 325 ; EG-DAG: 8 326 ; EG-DAG: 8 327 ; EG-DAG: 8 328 ; EG-DAG: 8 329 ; EG-DAG: 8 330 ; EG-DAG: 8 331 ; EG-DAG: 8 332 ; EG-DAG: 8 333 ; EG-DAG: 8 334 ; EG-DAG: 8 335 ; EG-DAG: 8 336 ; EG-DAG: 8 337 ; EG-DAG: 8 338 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 339 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 340 %ext = zext <16 x i8> %load to <16 x i32> 341 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 342 ret void 343 } 344 345 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32: 346 347 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 348 ; TODO: These should use DST, but for some there are redundant MOVs 349 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 350 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 351 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 352 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 353 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 354 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 365 ; EG-DAG: 8 366 ; EG-DAG: 8 367 ; EG-DAG: 8 368 ; EG-DAG: 8 369 ; EG-DAG: 8 370 ; EG-DAG: 8 371 ; EG-DAG: 8 372 ; EG-DAG: 8 373 ; EG-DAG: 8 374 ; EG-DAG: 8 375 ; EG-DAG: 8 376 ; EG-DAG: 8 377 ; EG-DAG: 8 378 ; EG-DAG: 8 379 ; EG-DAG: 8 380 ; EG-DAG: 8 381 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 382 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 383 %ext = sext <16 x i8> %load to <16 x i32> 384 store <16 x i32> %ext, <16 x i32> addrspace(1)* %out 385 ret void 386 } 387 388 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32: 389 390 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 391 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 392 ; TODO: These should use DST, but for some there are redundant MOVs 393 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 394 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 395 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 396 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 397 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 398 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal 423 ; EG-DAG: 8 424 ; EG-DAG: 8 425 ; EG-DAG: 8 426 ; EG-DAG: 8 427 ; EG-DAG: 8 428 ; EG-DAG: 8 429 ; EG-DAG: 8 430 ; EG-DAG: 8 431 ; EG-DAG: 8 432 ; EG-DAG: 8 433 ; EG-DAG: 8 434 ; EG-DAG: 8 435 ; EG-DAG: 8 436 ; EG-DAG: 8 437 ; EG-DAG: 8 438 ; EG-DAG: 8 439 ; EG-DAG: 8 440 ; EG-DAG: 8 441 ; EG-DAG: 8 442 ; EG-DAG: 8 443 ; EG-DAG: 8 444 ; EG-DAG: 8 445 ; EG-DAG: 8 446 ; EG-DAG: 8 447 ; EG-DAG: 8 448 ; EG-DAG: 8 449 ; EG-DAG: 8 450 ; EG-DAG: 8 451 ; EG-DAG: 8 452 ; EG-DAG: 8 453 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 454 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 455 %ext = zext <32 x i8> %load to <32 x i32> 456 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 457 ret void 458 } 459 460 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32: 461 462 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1 463 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1 464 ; TODO: These should use DST, but for some there are redundant MOVs 465 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 466 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 467 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 468 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 469 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 470 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal 497 ; EG-DAG: 8 498 ; EG-DAG: 8 499 ; EG-DAG: 8 500 ; EG-DAG: 8 501 ; EG-DAG: 8 502 ; EG-DAG: 8 503 ; EG-DAG: 8 504 ; EG-DAG: 8 505 ; EG-DAG: 8 506 ; EG-DAG: 8 507 ; EG-DAG: 8 508 ; EG-DAG: 8 509 ; EG-DAG: 8 510 ; EG-DAG: 8 511 ; EG-DAG: 8 512 ; EG-DAG: 8 513 ; EG-DAG: 8 514 ; EG-DAG: 8 515 ; EG-DAG: 8 516 ; EG-DAG: 8 517 ; EG-DAG: 8 518 ; EG-DAG: 8 519 ; EG-DAG: 8 520 ; EG-DAG: 8 521 ; EG-DAG: 8 522 ; EG-DAG: 8 523 ; EG-DAG: 8 524 ; EG-DAG: 8 525 ; EG-DAG: 8 526 ; EG-DAG: 8 527 ; EG-DAG: 8 528 ; EG-DAG: 8 529 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 530 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 531 %ext = sext <32 x i8> %load to <32 x i32> 532 store <32 x i32> %ext, <32 x i32> addrspace(1)* %out 533 ret void 534 } 535 536 ; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32: 537 538 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 539 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 540 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 541 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 542 define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 543 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 544 %ext = zext <64 x i8> %load to <64 x i32> 545 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 546 ret void 547 } 548 549 ; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32: 550 551 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1 552 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1 553 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1 554 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1 555 define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 556 %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 557 %ext = sext <64 x i8> %load to <64 x i32> 558 store <64 x i32> %ext, <64 x i32> addrspace(1)* %out 559 ret void 560 } 561 562 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64: 563 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}} 564 565 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]], 566 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]] 567 568 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]], 569 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]] 570 571 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 572 ; EG: MOV {{.*}}, 0.0 573 define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 574 %a = load i8, i8 addrspace(4)* %in 575 %ext = zext i8 %a to i64 576 store i64 %ext, i64 addrspace(1)* %out 577 ret void 578 } 579 580 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64: 581 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]], 582 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]], 583 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]] 584 585 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}} 586 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}} 587 588 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 589 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 590 ; TODO: Why not 7 ? 591 ; EG: 31 592 define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 593 %a = load i8, i8 addrspace(4)* %in 594 %ext = sext i8 %a to i64 595 store i64 %ext, i64 addrspace(1)* %out 596 ret void 597 } 598 599 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64: 600 601 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 602 ; EG: MOV {{.*}}, 0.0 603 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 604 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 605 %ext = zext <1 x i8> %load to <1 x i64> 606 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 607 ret void 608 } 609 610 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64: 611 612 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 613 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal 614 ; TODO: Why not 7 ? 615 ; EG: 31 616 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 617 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 618 %ext = sext <1 x i8> %load to <1 x i64> 619 store <1 x i64> %ext, <1 x i64> addrspace(1)* %out 620 ret void 621 } 622 623 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64: 624 625 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 626 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 627 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 628 %ext = zext <2 x i8> %load to <2 x i64> 629 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 630 ret void 631 } 632 633 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64: 634 635 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 636 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 637 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 638 %ext = sext <2 x i8> %load to <2 x i64> 639 store <2 x i64> %ext, <2 x i64> addrspace(1)* %out 640 ret void 641 } 642 643 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64: 644 645 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 646 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 647 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 648 %ext = zext <4 x i8> %load to <4 x i64> 649 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 650 ret void 651 } 652 653 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64: 654 655 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 656 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 657 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 658 %ext = sext <4 x i8> %load to <4 x i64> 659 store <4 x i64> %ext, <4 x i64> addrspace(1)* %out 660 ret void 661 } 662 663 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64: 664 665 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 666 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 667 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 668 %ext = zext <8 x i8> %load to <8 x i64> 669 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 670 ret void 671 } 672 673 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64: 674 675 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 676 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 677 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 678 %ext = sext <8 x i8> %load to <8 x i64> 679 store <8 x i64> %ext, <8 x i64> addrspace(1)* %out 680 ret void 681 } 682 683 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64: 684 685 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 686 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 687 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 688 %ext = zext <16 x i8> %load to <16 x i64> 689 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 690 ret void 691 } 692 693 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64: 694 695 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 696 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 697 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 698 %ext = sext <16 x i8> %load to <16 x i64> 699 store <16 x i64> %ext, <16 x i64> addrspace(1)* %out 700 ret void 701 } 702 703 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64: 704 705 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 706 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 707 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 708 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 709 %ext = zext <32 x i8> %load to <32 x i64> 710 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 711 ret void 712 } 713 714 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64: 715 716 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 717 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 718 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 719 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 720 %ext = sext <32 x i8> %load to <32 x i64> 721 store <32 x i64> %ext, <32 x i64> addrspace(1)* %out 722 ret void 723 } 724 725 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64: 726 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 727 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 728 ; %ext = zext <64 x i8> %load to <64 x i64> 729 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 730 ; ret void 731 ; } 732 733 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64: 734 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 735 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 736 ; %ext = sext <64 x i8> %load to <64 x i64> 737 ; store <64 x i64> %ext, <64 x i64> addrspace(1)* %out 738 ; ret void 739 ; } 740 741 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16: 742 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]], 743 ; GCN-NOHSA: buffer_store_short v[[VAL]] 744 745 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]], 746 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 747 define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 748 %a = load i8, i8 addrspace(4)* %in 749 %ext = zext i8 %a to i16 750 store i16 %ext, i16 addrspace(1)* %out 751 ret void 752 } 753 754 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16: 755 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]], 756 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]], 757 758 ; GCN-NOHSA: buffer_store_short v[[VAL]] 759 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]] 760 761 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 762 define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 { 763 %a = load i8, i8 addrspace(4)* %in 764 %ext = sext i8 %a to i16 765 store i16 %ext, i16 addrspace(1)* %out 766 ret void 767 } 768 769 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16: 770 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 771 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 772 %ext = zext <1 x i8> %load to <1 x i16> 773 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 774 ret void 775 } 776 777 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16: 778 779 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 780 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 781 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 { 782 %load = load <1 x i8>, <1 x i8> addrspace(4)* %in 783 %ext = sext <1 x i8> %load to <1 x i16> 784 store <1 x i16> %ext, <1 x i16> addrspace(1)* %out 785 ret void 786 } 787 788 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16: 789 790 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 791 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 792 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 793 %ext = zext <2 x i8> %load to <2 x i16> 794 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 795 ret void 796 } 797 798 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16: 799 800 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 801 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 802 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 803 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 { 804 %load = load <2 x i8>, <2 x i8> addrspace(4)* %in 805 %ext = sext <2 x i8> %load to <2 x i16> 806 store <2 x i16> %ext, <2 x i16> addrspace(1)* %out 807 ret void 808 } 809 810 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16: 811 812 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 813 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 814 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 815 %ext = zext <4 x i8> %load to <4 x i16> 816 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 817 ret void 818 } 819 820 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16: 821 822 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1 823 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 824 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 825 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 826 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 827 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 { 828 %load = load <4 x i8>, <4 x i8> addrspace(4)* %in 829 %ext = sext <4 x i8> %load to <4 x i16> 830 store <4 x i16> %ext, <4 x i16> addrspace(1)* %out 831 ret void 832 } 833 834 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16: 835 836 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 837 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 838 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 839 %ext = zext <8 x i8> %load to <8 x i16> 840 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 841 ret void 842 } 843 844 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16: 845 846 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1 847 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 848 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 849 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 850 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 851 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 852 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 853 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 854 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 855 856 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 { 857 %load = load <8 x i8>, <8 x i8> addrspace(4)* %in 858 %ext = sext <8 x i8> %load to <8 x i16> 859 store <8 x i16> %ext, <8 x i16> addrspace(1)* %out 860 ret void 861 } 862 863 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16: 864 865 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 866 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 867 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 868 %ext = zext <16 x i8> %load to <16 x i16> 869 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 870 ret void 871 } 872 873 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16: 874 875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 876 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 877 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 878 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 879 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 880 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 881 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 882 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 883 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 884 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 885 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 892 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 { 893 %load = load <16 x i8>, <16 x i8> addrspace(4)* %in 894 %ext = sext <16 x i8> %load to <16 x i16> 895 store <16 x i16> %ext, <16 x i16> addrspace(1)* %out 896 ret void 897 } 898 899 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16: 900 901 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 902 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 903 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 904 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 905 %ext = zext <32 x i8> %load to <32 x i16> 906 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 907 ret void 908 } 909 910 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16: 911 912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1 913 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1 914 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 915 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 916 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 917 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 918 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 919 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 920 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 921 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 922 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 923 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal 946 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 { 947 %load = load <32 x i8>, <32 x i8> addrspace(4)* %in 948 %ext = sext <32 x i8> %load to <32 x i16> 949 store <32 x i16> %ext, <32 x i16> addrspace(1)* %out 950 ret void 951 } 952 953 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16: 954 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 955 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 956 ; %ext = zext <64 x i8> %load to <64 x i16> 957 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 958 ; ret void 959 ; } 960 961 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16: 962 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 { 963 ; %load = load <64 x i8>, <64 x i8> addrspace(4)* %in 964 ; %ext = sext <64 x i8> %load to <64 x i16> 965 ; store <64 x i16> %ext, <64 x i16> addrspace(1)* %out 966 ; ret void 967 ; } 968 969 attributes #0 = { nounwind } 970