Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
      5 
      6 
      7 ; FUNC-LABEL: {{^}}constant_load_i8:
      8 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
      9 ; GCN-HSA: flat_load_ubyte
     10 
     11 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     12 ; TODO: NOT AND
     13 define amdgpu_kernel void @constant_load_i8(i8 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
     14 entry:
     15   %ld = load i8, i8 addrspace(4)* %in
     16   store i8 %ld, i8 addrspace(1)* %out
     17   ret void
     18 }
     19 
     20 ; FUNC-LABEL: {{^}}constant_load_v2i8:
     21 ; GCN-NOHSA: buffer_load_ushort v
     22 ; GCN-HSA: flat_load_ushort v
     23 
     24 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     25 define amdgpu_kernel void @constant_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
     26 entry:
     27   %ld = load <2 x i8>, <2 x i8> addrspace(4)* %in
     28   store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
     29   ret void
     30 }
     31 
     32 ; FUNC-LABEL: {{^}}constant_load_v3i8:
     33 ; GCN: s_load_dword s
     34 
     35 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     36 define amdgpu_kernel void @constant_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
     37 entry:
     38   %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
     39   store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
     40   ret void
     41 }
     42 
     43 ; FUNC-LABEL: {{^}}constant_load_v4i8:
     44 ; GCN: s_load_dword s
     45 
     46 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     47 define amdgpu_kernel void @constant_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
     48 entry:
     49   %ld = load <4 x i8>, <4 x i8> addrspace(4)* %in
     50   store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
     51   ret void
     52 }
     53 
     54 ; FUNC-LABEL: {{^}}constant_load_v8i8:
     55 ; GCN: s_load_dwordx2
     56 
     57 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
     58 define amdgpu_kernel void @constant_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
     59 entry:
     60   %ld = load <8 x i8>, <8 x i8> addrspace(4)* %in
     61   store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
     62   ret void
     63 }
     64 
     65 ; FUNC-LABEL: {{^}}constant_load_v16i8:
     66 ; GCN: s_load_dwordx4
     67 
     68 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
     69 define amdgpu_kernel void @constant_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
     70 entry:
     71   %ld = load <16 x i8>, <16 x i8> addrspace(4)* %in
     72   store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
     73   ret void
     74 }
     75 
     76 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i32:
     77 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
     78 ; GCN-HSA: flat_load_ubyte
     79 
     80 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     81 define amdgpu_kernel void @constant_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
     82   %a = load i8, i8 addrspace(4)* %in
     83   %ext = zext i8 %a to i32
     84   store i32 %ext, i32 addrspace(1)* %out
     85   ret void
     86 }
     87 
     88 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i32:
     89 ; GCN-NOHSA: buffer_load_sbyte
     90 ; GCN-HSA: flat_load_sbyte
     91 
     92 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
     93 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
     94 ; EG: 8
     95 define amdgpu_kernel void @constant_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
     96   %ld = load i8, i8 addrspace(4)* %in
     97   %ext = sext i8 %ld to i32
     98   store i32 %ext, i32 addrspace(1)* %out
     99   ret void
    100 }
    101 
    102 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i32:
    103 
    104 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    105 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    106   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    107   %ext = zext <1 x i8> %load to <1 x i32>
    108   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    109   ret void
    110 }
    111 
    112 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i32:
    113 
    114 ; EG: VTX_READ_8 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    115 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    116 ; EG: 8
    117 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    118   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    119   %ext = sext <1 x i8> %load to <1 x i32>
    120   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    121   ret void
    122 }
    123 
    124 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i32:
    125 ; GCN-NOHSA: buffer_load_ushort
    126 ; GCN-HSA: flat_load_ushort
    127 
    128 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    129 ; TODO: This should use DST, but for some there are redundant MOVs
    130 ; EG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    131 ; EG: 8
    132 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    133   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    134   %ext = zext <2 x i8> %load to <2 x i32>
    135   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    136   ret void
    137 }
    138 
    139 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i32:
    140 ; GCN-NOHSA: buffer_load_ushort
    141 
    142 ; GCN-HSA: flat_load_ushort
    143 
    144 ; GCN: v_bfe_i32
    145 ; GCN: v_bfe_i32
    146 
    147 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    148 ; TODO: These should use DST, but for some there are redundant MOVs
    149 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    150 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    151 ; EG-DAG: 8
    152 ; EG-DAG: 8
    153 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    154   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    155   %ext = sext <2 x i8> %load to <2 x i32>
    156   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    157   ret void
    158 }
    159 
    160 ; FUNC-LABEL: {{^}}constant_zextload_v3i8_to_v3i32:
    161 ; GCN: s_load_dword s
    162 
    163 ; GCN-DAG: s_bfe_u32
    164 ; GCN-DAG: s_bfe_u32
    165 ; GCN-DAG: s_and_b32
    166 
    167 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    168 ; TODO: These should use DST, but for some there are redundant MOVs
    169 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    170 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    171 ; EG-DAG: 8
    172 ; EG-DAG: 8
    173 define amdgpu_kernel void @constant_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
    174 entry:
    175   %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
    176   %ext = zext <3 x i8> %ld to <3 x i32>
    177   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    178   ret void
    179 }
    180 
    181 ; FUNC-LABEL: {{^}}constant_sextload_v3i8_to_v3i32:
    182 ; GCN: s_load_dword s
    183 
    184 ; GCN-DAG: s_bfe_i32
    185 ; GCN-DAG: s_bfe_i32
    186 ; GCN-DAG: s_bfe_i32
    187 
    188 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    189 ; TODO: These should use DST, but for some there are redundant MOVs
    190 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    191 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    192 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    193 ; EG-DAG: 8
    194 ; EG-DAG: 8
    195 ; EG-DAG: 8
    196 define amdgpu_kernel void @constant_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(4)* %in) #0 {
    197 entry:
    198   %ld = load <3 x i8>, <3 x i8> addrspace(4)* %in
    199   %ext = sext <3 x i8> %ld to <3 x i32>
    200   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    201   ret void
    202 }
    203 
    204 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i32:
    205 ; GCN: s_load_dword s
    206 ; GCN-DAG: s_and_b32
    207 ; GCN-DAG: s_lshr_b32
    208 
    209 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    210 ; TODO: These should use DST, but for some there are redundant MOVs
    211 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    212 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    213 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    214 ; EG-DAG: 8
    215 ; EG-DAG: 8
    216 ; EG-DAG: 8
    217 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    218   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    219   %ext = zext <4 x i8> %load to <4 x i32>
    220   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    221   ret void
    222 }
    223 
    224 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i32:
    225 ; GCN: s_load_dword s
    226 ; GCN-DAG: s_sext_i32_i8
    227 ; GCN-DAG: s_ashr_i32
    228 
    229 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    230 ; TODO: These should use DST, but for some there are redundant MOVs
    231 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    232 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    233 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    234 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    235 ; EG-DAG: 8
    236 ; EG-DAG: 8
    237 ; EG-DAG: 8
    238 ; EG-DAG: 8
    239 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    240   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    241   %ext = sext <4 x i8> %load to <4 x i32>
    242   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    243   ret void
    244 }
    245 
    246 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i32:
    247 ; GCN: s_load_dwordx2
    248 ; GCN-DAG: s_and_b32
    249 ; GCN-DAG: s_lshr_b32
    250 
    251 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    252 ; TODO: These should use DST, but for some there are redundant MOVs
    253 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    254 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    255 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    256 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    257 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    258 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    259 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    260 ; EG-DAG: 8
    261 ; EG-DAG: 8
    262 ; EG-DAG: 8
    263 ; EG-DAG: 8
    264 ; EG-DAG: 8
    265 ; EG-DAG: 8
    266 ; EG-DAG: 8
    267 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    268   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    269   %ext = zext <8 x i8> %load to <8 x i32>
    270   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    271   ret void
    272 }
    273 
    274 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i32:
    275 ; GCN: s_load_dwordx2
    276 ; GCN-DAG: s_ashr_i32
    277 ; GCN-DAG: s_sext_i32_i8
    278 
    279 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
    280 ; TODO: These should use DST, but for some there are redundant MOVs
    281 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    282 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    283 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    284 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    285 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    286 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    289 ; EG-DAG: 8
    290 ; EG-DAG: 8
    291 ; EG-DAG: 8
    292 ; EG-DAG: 8
    293 ; EG-DAG: 8
    294 ; EG-DAG: 8
    295 ; EG-DAG: 8
    296 ; EG-DAG: 8
    297 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    298   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    299   %ext = sext <8 x i8> %load to <8 x i32>
    300   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    301   ret void
    302 }
    303 
    304 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i32:
    305 
    306 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    307 ; TODO: These should use DST, but for some there are redundant MOVs
    308 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    309 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    310 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    311 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    312 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    313 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    323 ; EG-DAG: 8
    324 ; EG-DAG: 8
    325 ; EG-DAG: 8
    326 ; EG-DAG: 8
    327 ; EG-DAG: 8
    328 ; EG-DAG: 8
    329 ; EG-DAG: 8
    330 ; EG-DAG: 8
    331 ; EG-DAG: 8
    332 ; EG-DAG: 8
    333 ; EG-DAG: 8
    334 ; EG-DAG: 8
    335 ; EG-DAG: 8
    336 ; EG-DAG: 8
    337 ; EG-DAG: 8
    338 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    339   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    340   %ext = zext <16 x i8> %load to <16 x i32>
    341   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    342   ret void
    343 }
    344 
    345 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i32:
    346 
    347 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
    348 ; TODO: These should use DST, but for some there are redundant MOVs
    349 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    350 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    351 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    352 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    353 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    354 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    365 ; EG-DAG: 8
    366 ; EG-DAG: 8
    367 ; EG-DAG: 8
    368 ; EG-DAG: 8
    369 ; EG-DAG: 8
    370 ; EG-DAG: 8
    371 ; EG-DAG: 8
    372 ; EG-DAG: 8
    373 ; EG-DAG: 8
    374 ; EG-DAG: 8
    375 ; EG-DAG: 8
    376 ; EG-DAG: 8
    377 ; EG-DAG: 8
    378 ; EG-DAG: 8
    379 ; EG-DAG: 8
    380 ; EG-DAG: 8
    381 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    382   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    383   %ext = sext <16 x i8> %load to <16 x i32>
    384   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    385   ret void
    386 }
    387 
    388 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i32:
    389 
    390 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    391 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    392 ; TODO: These should use DST, but for some there are redundant MOVs
    393 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    394 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    395 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    396 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    397 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    398 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    423 ; EG-DAG: 8
    424 ; EG-DAG: 8
    425 ; EG-DAG: 8
    426 ; EG-DAG: 8
    427 ; EG-DAG: 8
    428 ; EG-DAG: 8
    429 ; EG-DAG: 8
    430 ; EG-DAG: 8
    431 ; EG-DAG: 8
    432 ; EG-DAG: 8
    433 ; EG-DAG: 8
    434 ; EG-DAG: 8
    435 ; EG-DAG: 8
    436 ; EG-DAG: 8
    437 ; EG-DAG: 8
    438 ; EG-DAG: 8
    439 ; EG-DAG: 8
    440 ; EG-DAG: 8
    441 ; EG-DAG: 8
    442 ; EG-DAG: 8
    443 ; EG-DAG: 8
    444 ; EG-DAG: 8
    445 ; EG-DAG: 8
    446 ; EG-DAG: 8
    447 ; EG-DAG: 8
    448 ; EG-DAG: 8
    449 ; EG-DAG: 8
    450 ; EG-DAG: 8
    451 ; EG-DAG: 8
    452 ; EG-DAG: 8
    453 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    454   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    455   %ext = zext <32 x i8> %load to <32 x i32>
    456   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    457   ret void
    458 }
    459 
    460 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i32:
    461 
    462 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
    463 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
    464 ; TODO: These should use DST, but for some there are redundant MOVs
    465 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    466 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    467 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    468 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    469 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    470 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    497 ; EG-DAG: 8
    498 ; EG-DAG: 8
    499 ; EG-DAG: 8
    500 ; EG-DAG: 8
    501 ; EG-DAG: 8
    502 ; EG-DAG: 8
    503 ; EG-DAG: 8
    504 ; EG-DAG: 8
    505 ; EG-DAG: 8
    506 ; EG-DAG: 8
    507 ; EG-DAG: 8
    508 ; EG-DAG: 8
    509 ; EG-DAG: 8
    510 ; EG-DAG: 8
    511 ; EG-DAG: 8
    512 ; EG-DAG: 8
    513 ; EG-DAG: 8
    514 ; EG-DAG: 8
    515 ; EG-DAG: 8
    516 ; EG-DAG: 8
    517 ; EG-DAG: 8
    518 ; EG-DAG: 8
    519 ; EG-DAG: 8
    520 ; EG-DAG: 8
    521 ; EG-DAG: 8
    522 ; EG-DAG: 8
    523 ; EG-DAG: 8
    524 ; EG-DAG: 8
    525 ; EG-DAG: 8
    526 ; EG-DAG: 8
    527 ; EG-DAG: 8
    528 ; EG-DAG: 8
    529 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    530   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    531   %ext = sext <32 x i8> %load to <32 x i32>
    532   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    533   ret void
    534 }
    535 
    536 ; FUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i32:
    537 
    538 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
    539 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
    540 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
    541 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
    542 define amdgpu_kernel void @constant_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    543   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    544   %ext = zext <64 x i8> %load to <64 x i32>
    545   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    546   ret void
    547 }
    548 
    549 ; FUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i32:
    550 
    551 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
    552 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
    553 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
    554 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
    555 define amdgpu_kernel void @constant_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    556   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    557   %ext = sext <64 x i8> %load to <64 x i32>
    558   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    559   ret void
    560 }
    561 
    562 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i64:
    563 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
    564 
    565 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
    566 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
    567 
    568 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
    569 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
    570 
    571 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    572 ; EG: MOV {{.*}}, 0.0
    573 define amdgpu_kernel void @constant_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
    574   %a = load i8, i8 addrspace(4)* %in
    575   %ext = zext i8 %a to i64
    576   store i64 %ext, i64 addrspace(1)* %out
    577   ret void
    578 }
    579 
    580 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i64:
    581 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
    582 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
    583 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    584 
    585 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    586 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    587 
    588 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    589 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
    590 ; TODO: Why not 7 ?
    591 ; EG: 31
    592 define amdgpu_kernel void @constant_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
    593   %a = load i8, i8 addrspace(4)* %in
    594   %ext = sext i8 %a to i64
    595   store i64 %ext, i64 addrspace(1)* %out
    596   ret void
    597 }
    598 
    599 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i64:
    600 
    601 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    602 ; EG: MOV {{.*}}, 0.0
    603 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    604   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    605   %ext = zext <1 x i8> %load to <1 x i64>
    606   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    607   ret void
    608 }
    609 
    610 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i64:
    611 
    612 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    613 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
    614 ; TODO: Why not 7 ?
    615 ; EG: 31
    616 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    617   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    618   %ext = sext <1 x i8> %load to <1 x i64>
    619   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    620   ret void
    621 }
    622 
    623 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i64:
    624 
    625 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    626 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    627   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    628   %ext = zext <2 x i8> %load to <2 x i64>
    629   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    630   ret void
    631 }
    632 
    633 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i64:
    634 
    635 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    636 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    637   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    638   %ext = sext <2 x i8> %load to <2 x i64>
    639   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    640   ret void
    641 }
    642 
    643 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i64:
    644 
    645 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    646 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    647   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    648   %ext = zext <4 x i8> %load to <4 x i64>
    649   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    650   ret void
    651 }
    652 
    653 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i64:
    654 
    655 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    656 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    657   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    658   %ext = sext <4 x i8> %load to <4 x i64>
    659   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    660   ret void
    661 }
    662 
    663 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i64:
    664 
    665 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    666 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    667   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    668   %ext = zext <8 x i8> %load to <8 x i64>
    669   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    670   ret void
    671 }
    672 
    673 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i64:
    674 
    675 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    676 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    677   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    678   %ext = sext <8 x i8> %load to <8 x i64>
    679   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    680   ret void
    681 }
    682 
    683 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i64:
    684 
    685 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    686 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    687   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    688   %ext = zext <16 x i8> %load to <16 x i64>
    689   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    690   ret void
    691 }
    692 
    693 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i64:
    694 
    695 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    696 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    697   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    698   %ext = sext <16 x i8> %load to <16 x i64>
    699   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    700   ret void
    701 }
    702 
    703 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i64:
    704 
    705 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    706 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    707 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    708   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    709   %ext = zext <32 x i8> %load to <32 x i64>
    710   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    711   ret void
    712 }
    713 
    714 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i64:
    715 
    716 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    717 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    718 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    719   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    720   %ext = sext <32 x i8> %load to <32 x i64>
    721   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    722   ret void
    723 }
    724 
    725 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i64:
    726 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    727 ;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    728 ;   %ext = zext <64 x i8> %load to <64 x i64>
    729 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    730 ;   ret void
    731 ; }
    732 
    733 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i64:
    734 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    735 ;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    736 ;   %ext = sext <64 x i8> %load to <64 x i64>
    737 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    738 ;   ret void
    739 ; }
    740 
    741 ; FUNC-LABEL: {{^}}constant_zextload_i8_to_i16:
    742 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
    743 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    744 
    745 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
    746 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    747 define amdgpu_kernel void @constant_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
    748   %a = load i8, i8 addrspace(4)* %in
    749   %ext = zext i8 %a to i16
    750   store i16 %ext, i16 addrspace(1)* %out
    751   ret void
    752 }
    753 
    754 ; FUNC-LABEL: {{^}}constant_sextload_i8_to_i16:
    755 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
    756 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
    757 
    758 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    759 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    760 
    761 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    762 define amdgpu_kernel void @constant_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(4)* %in) #0 {
    763   %a = load i8, i8 addrspace(4)* %in
    764   %ext = sext i8 %a to i16
    765   store i16 %ext, i16 addrspace(1)* %out
    766   ret void
    767 }
    768 
    769 ; FUNC-LABEL: {{^}}constant_zextload_v1i8_to_v1i16:
    770 define amdgpu_kernel void @constant_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    771   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    772   %ext = zext <1 x i8> %load to <1 x i16>
    773   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    774   ret void
    775 }
    776 
    777 ; FUNC-LABEL: {{^}}constant_sextload_v1i8_to_v1i16:
    778 
    779 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    780 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    781 define amdgpu_kernel void @constant_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(4)* %in) #0 {
    782   %load = load <1 x i8>, <1 x i8> addrspace(4)* %in
    783   %ext = sext <1 x i8> %load to <1 x i16>
    784   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    785   ret void
    786 }
    787 
    788 ; FUNC-LABEL: {{^}}constant_zextload_v2i8_to_v2i16:
    789 
    790 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    791 define amdgpu_kernel void @constant_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    792   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    793   %ext = zext <2 x i8> %load to <2 x i16>
    794   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    795   ret void
    796 }
    797 
    798 ; FUNC-LABEL: {{^}}constant_sextload_v2i8_to_v2i16:
    799 
    800 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    801 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    802 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    803 define amdgpu_kernel void @constant_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(4)* %in) #0 {
    804   %load = load <2 x i8>, <2 x i8> addrspace(4)* %in
    805   %ext = sext <2 x i8> %load to <2 x i16>
    806   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    807   ret void
    808 }
    809 
    810 ; FUNC-LABEL: {{^}}constant_zextload_v4i8_to_v4i16:
    811 
    812 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    813 define amdgpu_kernel void @constant_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    814   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    815   %ext = zext <4 x i8> %load to <4 x i16>
    816   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    817   ret void
    818 }
    819 
    820 ; FUNC-LABEL: {{^}}constant_sextload_v4i8_to_v4i16:
    821 
    822 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    823 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    824 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    825 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    826 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    827 define amdgpu_kernel void @constant_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(4)* %in) #0 {
    828   %load = load <4 x i8>, <4 x i8> addrspace(4)* %in
    829   %ext = sext <4 x i8> %load to <4 x i16>
    830   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    831   ret void
    832 }
    833 
    834 ; FUNC-LABEL: {{^}}constant_zextload_v8i8_to_v8i16:
    835 
    836 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    837 define amdgpu_kernel void @constant_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    838   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    839   %ext = zext <8 x i8> %load to <8 x i16>
    840   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    841   ret void
    842 }
    843 
    844 ; FUNC-LABEL: {{^}}constant_sextload_v8i8_to_v8i16:
    845 
    846 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    847 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    848 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    849 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    850 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    851 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    852 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    853 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    854 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    855 
    856 define amdgpu_kernel void @constant_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(4)* %in) #0 {
    857   %load = load <8 x i8>, <8 x i8> addrspace(4)* %in
    858   %ext = sext <8 x i8> %load to <8 x i16>
    859   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    860   ret void
    861 }
    862 
    863 ; FUNC-LABEL: {{^}}constant_zextload_v16i8_to_v16i16:
    864 
    865 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    866 define amdgpu_kernel void @constant_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    867   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    868   %ext = zext <16 x i8> %load to <16 x i16>
    869   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    870   ret void
    871 }
    872 
    873 ; FUNC-LABEL: {{^}}constant_sextload_v16i8_to_v16i16:
    874 
    875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    876 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    877 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    878 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    879 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    880 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    881 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    882 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    883 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    884 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    885 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    892 define amdgpu_kernel void @constant_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(4)* %in) #0 {
    893   %load = load <16 x i8>, <16 x i8> addrspace(4)* %in
    894   %ext = sext <16 x i8> %load to <16 x i16>
    895   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    896   ret void
    897 }
    898 
    899 ; FUNC-LABEL: {{^}}constant_zextload_v32i8_to_v32i16:
    900 
    901 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    902 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    903 define amdgpu_kernel void @constant_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    904   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    905   %ext = zext <32 x i8> %load to <32 x i16>
    906   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    907   ret void
    908 }
    909 
    910 ; FUNC-LABEL: {{^}}constant_sextload_v32i8_to_v32i16:
    911 
    912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    913 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    914 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    915 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    916 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    917 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    918 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    919 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    920 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    921 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    922 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    923 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    946 define amdgpu_kernel void @constant_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(4)* %in) #0 {
    947   %load = load <32 x i8>, <32 x i8> addrspace(4)* %in
    948   %ext = sext <32 x i8> %load to <32 x i16>
    949   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    950   ret void
    951 }
    952 
    953 ; XFUNC-LABEL: {{^}}constant_zextload_v64i8_to_v64i16:
    954 ; define amdgpu_kernel void @constant_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    955 ;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    956 ;   %ext = zext <64 x i8> %load to <64 x i16>
    957 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    958 ;   ret void
    959 ; }
    960 
    961 ; XFUNC-LABEL: {{^}}constant_sextload_v64i8_to_v64i16:
    962 ; define amdgpu_kernel void @constant_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(4)* %in) #0 {
    963 ;   %load = load <64 x i8>, <64 x i8> addrspace(4)* %in
    964 ;   %ext = sext <64 x i8> %load to <64 x i16>
    965 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    966 ;   ret void
    967 ; }
    968 
    969 attributes #0 = { nounwind }
    970