Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,SI,FUNC %s
      2 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-HSA,SI,FUNC %s
      3 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=amdgcn -mcpu=tonga -mattr=-flat-for-global -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefixes=GCN,GCN-NOHSA,VI,FUNC %s
      4 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=redwood -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
      5 ; RUN:  llc -amdgpu-scalarize-global-loads=false  -march=r600 -mcpu=cayman -verify-machineinstrs < %s | FileCheck -allow-deprecated-dag-overlap -check-prefix=EG -check-prefix=FUNC %s
      6 
      7 
      8 ; FUNC-LABEL: {{^}}global_load_i8:
      9 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
     10 ; GCN-HSA: flat_load_ubyte
     11 
     12 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     13 ; TODO: NOT AND
     14 define amdgpu_kernel void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
     15 entry:
     16   %ld = load i8, i8 addrspace(1)* %in
     17   store i8 %ld, i8 addrspace(1)* %out
     18   ret void
     19 }
     20 
     21 ; FUNC-LABEL: {{^}}global_load_v2i8:
     22 ; GCN-NOHSA: buffer_load_ushort v
     23 ; GCN-HSA: flat_load_ushort v
     24 
     25 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     26 define amdgpu_kernel void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
     27 entry:
     28   %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
     29   store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
     30   ret void
     31 }
     32 
     33 ; FUNC-LABEL: {{^}}global_load_v3i8:
     34 ; GCN-NOHSA: buffer_load_dword v
     35 ; GCN-HSA: flat_load_dword v
     36 
     37 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     38 define amdgpu_kernel void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
     39 entry:
     40   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
     41   store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
     42   ret void
     43 }
     44 
     45 ; FUNC-LABEL: {{^}}global_load_v4i8:
     46 ; GCN-NOHSA: buffer_load_dword v
     47 ; GCN-HSA: flat_load_dword v
     48 
     49 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     50 define amdgpu_kernel void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
     51 entry:
     52   %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
     53   store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
     54   ret void
     55 }
     56 
     57 ; FUNC-LABEL: {{^}}global_load_v8i8:
     58 ; GCN-NOHSA: buffer_load_dwordx2
     59 ; GCN-HSA: flat_load_dwordx2
     60 
     61 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
     62 define amdgpu_kernel void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
     63 entry:
     64   %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
     65   store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
     66   ret void
     67 }
     68 
     69 ; FUNC-LABEL: {{^}}global_load_v16i8:
     70 ; GCN-NOHSA: buffer_load_dwordx4
     71 
     72 ; GCN-HSA: flat_load_dwordx4
     73 
     74 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
     75 define amdgpu_kernel void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
     76 entry:
     77   %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
     78   store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
     79   ret void
     80 }
     81 
     82 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
     83 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
     84 ; GCN-HSA: flat_load_ubyte
     85 
     86 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
     87 define amdgpu_kernel void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
     88   %a = load i8, i8 addrspace(1)* %in
     89   %ext = zext i8 %a to i32
     90   store i32 %ext, i32 addrspace(1)* %out
     91   ret void
     92 }
     93 
     94 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
     95 ; GCN-NOHSA: buffer_load_sbyte
     96 ; GCN-HSA: flat_load_sbyte
     97 
     98 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
     99 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    100 ; EG: 8
    101 define amdgpu_kernel void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    102   %ld = load i8, i8 addrspace(1)* %in
    103   %ext = sext i8 %ld to i32
    104   store i32 %ext, i32 addrspace(1)* %out
    105   ret void
    106 }
    107 
    108 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
    109 
    110 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    111 define amdgpu_kernel void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    112   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    113   %ext = zext <1 x i8> %load to <1 x i32>
    114   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    115   ret void
    116 }
    117 
    118 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
    119 
    120 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]], 0, #1
    121 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
    122 ; EG: 8
    123 define amdgpu_kernel void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    124   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    125   %ext = sext <1 x i8> %load to <1 x i32>
    126   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    127   ret void
    128 }
    129 
    130 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
    131 ; GCN-NOHSA: buffer_load_ushort
    132 ; GCN-HSA: flat_load_ushort
    133 
    134 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    135 ; TODO: These should use DST, but for some there are redundant MOVs
    136 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, literal
    137 ; EG-DAG: 8
    138 define amdgpu_kernel void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    139   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    140   %ext = zext <2 x i8> %load to <2 x i32>
    141   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    142   ret void
    143 }
    144 
    145 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
    146 ; GCN-NOHSA: buffer_load_ushort
    147 ; GCN-HSA: flat_load_ushort
    148 
    149 ; EG: VTX_READ_16 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    150 ; TODO: These should use DST, but for some there are redundant MOVs
    151 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    152 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    153 ; EG-DAG: 8
    154 ; EG-DAG: 8
    155 define amdgpu_kernel void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    156   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    157   %ext = sext <2 x i8> %load to <2 x i32>
    158   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    159   ret void
    160 }
    161 
    162 ; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
    163 ; GCN-NOHSA: buffer_load_dword v
    164 ; GCN-HSA: flat_load_dword v
    165 
    166 ; SI-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
    167 ; VI-DAG: v_lshrrev_b16_e32 v{{[0-9]+}}, 8, v{{[0-9]+}}
    168 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
    169 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
    170 
    171 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    172 ; TODO: These should use DST, but for some there are redundant MOVs
    173 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    174 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    175 ; EG-DAG: 8
    176 ; EG-DAG: 8
    177 define amdgpu_kernel void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
    178 entry:
    179   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
    180   %ext = zext <3 x i8> %ld to <3 x i32>
    181   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    182   ret void
    183 }
    184 
    185 ; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
    186 ; GCN-NOHSA: buffer_load_dword v
    187 ; GCN-HSA: flat_load_dword v
    188 
    189 ;FIXME: Need to optimize this sequence to avoid extra shift on VI.
    190 
    191 ; t23: i16 = truncate t18
    192 ; t49: i16 = srl t23, Constant:i32<8>
    193 ; t57: i32 = any_extend t49
    194 ; t58: i32 = sign_extend_inreg t57, ValueType:ch:i8
    195 
    196 ; SI-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
    197 ; VI-DAG: v_lshrrev_b16_e32 [[SHIFT:v[0-9]+]], 8, v{{[0-9]+}}
    198 ; VI-DAG: v_bfe_i32 v{{[0-9]+}}, [[SHIFT]], 0, 8
    199 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    200 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
    201 
    202 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    203 ; TODO: These should use DST, but for some there are redundant MOVs
    204 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    205 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    206 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    207 ; EG-DAG: 8
    208 ; EG-DAG: 8
    209 ; EG-DAG: 8
    210 define amdgpu_kernel void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
    211 entry:
    212   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
    213   %ext = sext <3 x i8> %ld to <3 x i32>
    214   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    215   ret void
    216 }
    217 
    218 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
    219 ; GCN-NOHSA: buffer_load_dword
    220 ; GCN-HSA: flat_load_dword
    221 
    222 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    223 ; TODO: These should use DST, but for some there are redundant MOVs
    224 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    225 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    226 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    227 ; EG-DAG: 8
    228 ; EG-DAG: 8
    229 ; EG-DAG: 8
    230 define amdgpu_kernel void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    231   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    232   %ext = zext <4 x i8> %load to <4 x i32>
    233   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    234   ret void
    235 }
    236 
    237 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
    238 ; GCN-NOHSA: buffer_load_dword
    239 ; GCN-HSA: flat_load_dword
    240 
    241 ; EG: VTX_READ_32 [[DST:T[0-9]+\.X]], T{{[0-9]+}}.X, 0, #1
    242 ; TODO: These should use DST, but for some there are redundant MOVs
    243 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    244 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    245 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    246 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    247 ; EG-DAG: 8
    248 ; EG-DAG: 8
    249 ; EG-DAG: 8
    250 ; EG-DAG: 8
    251 define amdgpu_kernel void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    252   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    253   %ext = sext <4 x i8> %load to <4 x i32>
    254   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    255   ret void
    256 }
    257 
    258 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
    259 
    260 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    261 ; TODO: These should use DST, but for some there are redundant MOVs
    262 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    263 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    264 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    265 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    266 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    267 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    268 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    269 ; EG-DAG: 8
    270 ; EG-DAG: 8
    271 ; EG-DAG: 8
    272 ; EG-DAG: 8
    273 ; EG-DAG: 8
    274 ; EG-DAG: 8
    275 ; EG-DAG: 8
    276 define amdgpu_kernel void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    277   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    278   %ext = zext <8 x i8> %load to <8 x i32>
    279   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    280   ret void
    281 }
    282 
    283 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
    284 
    285 ; EG: VTX_READ_64 [[DST:T[0-9]+\.XY]], T{{[0-9]+}}.X, 0, #1
    286 ; TODO: These should use DST, but for some there are redundant MOVs
    287 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    288 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    289 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    290 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    291 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    292 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    293 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    294 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    295 ; EG-DAG: 8
    296 ; EG-DAG: 8
    297 ; EG-DAG: 8
    298 ; EG-DAG: 8
    299 ; EG-DAG: 8
    300 ; EG-DAG: 8
    301 ; EG-DAG: 8
    302 ; EG-DAG: 8
    303 define amdgpu_kernel void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    304   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    305   %ext = sext <8 x i8> %load to <8 x i32>
    306   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    307   ret void
    308 }
    309 
    310 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
    311 
    312 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    313 ; TODO: These should use DST, but for some there are redundant MOVs
    314 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    315 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    316 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    317 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    318 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    319 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    320 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    321 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    322 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    323 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    324 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    325 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    326 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    327 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    328 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, {{.*}}literal
    329 ; EG-DAG: 8
    330 ; EG-DAG: 8
    331 ; EG-DAG: 8
    332 ; EG-DAG: 8
    333 ; EG-DAG: 8
    334 ; EG-DAG: 8
    335 ; EG-DAG: 8
    336 ; EG-DAG: 8
    337 ; EG-DAG: 8
    338 ; EG-DAG: 8
    339 ; EG-DAG: 8
    340 ; EG-DAG: 8
    341 ; EG-DAG: 8
    342 ; EG-DAG: 8
    343 ; EG-DAG: 8
    344 define amdgpu_kernel void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    345   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    346   %ext = zext <16 x i8> %load to <16 x i32>
    347   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    348   ret void
    349 }
    350 
    351 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
    352 
    353 ; EG: VTX_READ_128 [[DST:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
    354 ; TODO: These should use DST, but for some there are redundant MOVs
    355 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    356 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    357 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    358 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    359 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    360 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    361 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    362 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    363 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    364 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    365 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    366 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    367 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    368 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    369 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    370 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]*.[XYZW]}}, {{.*}}, 0.0, literal
    371 ; EG-DAG: 8
    372 ; EG-DAG: 8
    373 ; EG-DAG: 8
    374 ; EG-DAG: 8
    375 ; EG-DAG: 8
    376 ; EG-DAG: 8
    377 ; EG-DAG: 8
    378 ; EG-DAG: 8
    379 ; EG-DAG: 8
    380 ; EG-DAG: 8
    381 ; EG-DAG: 8
    382 ; EG-DAG: 8
    383 ; EG-DAG: 8
    384 ; EG-DAG: 8
    385 ; EG-DAG: 8
    386 ; EG-DAG: 8
    387 define amdgpu_kernel void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    388   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    389   %ext = sext <16 x i8> %load to <16 x i32>
    390   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    391   ret void
    392 }
    393 
    394 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
    395 
    396 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    397 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    398 ; TODO: These should use DST, but for some there are redundant MOVs
    399 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    400 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    401 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    402 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    403 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    404 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    405 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    406 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    407 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    408 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    409 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    410 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    411 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    412 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    413 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    414 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    415 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    416 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    417 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    418 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    419 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    420 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    421 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    422 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    423 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    424 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    425 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    426 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    427 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    428 ; EG-DAG: BFE_UINT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, {{.*}}literal
    429 ; EG-DAG: 8
    430 ; EG-DAG: 8
    431 ; EG-DAG: 8
    432 ; EG-DAG: 8
    433 ; EG-DAG: 8
    434 ; EG-DAG: 8
    435 ; EG-DAG: 8
    436 ; EG-DAG: 8
    437 ; EG-DAG: 8
    438 ; EG-DAG: 8
    439 ; EG-DAG: 8
    440 ; EG-DAG: 8
    441 ; EG-DAG: 8
    442 ; EG-DAG: 8
    443 ; EG-DAG: 8
    444 ; EG-DAG: 8
    445 ; EG-DAG: 8
    446 ; EG-DAG: 8
    447 ; EG-DAG: 8
    448 ; EG-DAG: 8
    449 ; EG-DAG: 8
    450 ; EG-DAG: 8
    451 ; EG-DAG: 8
    452 ; EG-DAG: 8
    453 ; EG-DAG: 8
    454 ; EG-DAG: 8
    455 ; EG-DAG: 8
    456 ; EG-DAG: 8
    457 ; EG-DAG: 8
    458 ; EG-DAG: 8
    459 define amdgpu_kernel void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    460   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    461   %ext = zext <32 x i8> %load to <32 x i32>
    462   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    463   ret void
    464 }
    465 
    466 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
    467 
    468 ; EG-DAG: VTX_READ_128 [[DST_LO:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 0, #1
    469 ; EG-DAG: VTX_READ_128 [[DST_HI:T[0-9]+\.XYZW]], T{{[0-9]+}}.X, 16, #1
    470 ; TODO: These should use DST, but for some there are redundant MOVs
    471 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    472 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    473 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    474 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    475 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    476 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    477 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    478 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    479 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    480 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    481 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    482 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    483 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    484 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    485 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    486 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    487 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    488 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    489 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    490 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    491 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    492 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    493 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    494 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    495 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    496 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    497 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    498 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    499 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    500 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    501 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    502 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9]+.[XYZW]}}, {{.*}}, 0.0, literal
    503 ; EG-DAG: 8
    504 ; EG-DAG: 8
    505 ; EG-DAG: 8
    506 ; EG-DAG: 8
    507 ; EG-DAG: 8
    508 ; EG-DAG: 8
    509 ; EG-DAG: 8
    510 ; EG-DAG: 8
    511 ; EG-DAG: 8
    512 ; EG-DAG: 8
    513 ; EG-DAG: 8
    514 ; EG-DAG: 8
    515 ; EG-DAG: 8
    516 ; EG-DAG: 8
    517 ; EG-DAG: 8
    518 ; EG-DAG: 8
    519 ; EG-DAG: 8
    520 ; EG-DAG: 8
    521 ; EG-DAG: 8
    522 ; EG-DAG: 8
    523 ; EG-DAG: 8
    524 ; EG-DAG: 8
    525 ; EG-DAG: 8
    526 ; EG-DAG: 8
    527 ; EG-DAG: 8
    528 ; EG-DAG: 8
    529 ; EG-DAG: 8
    530 ; EG-DAG: 8
    531 ; EG-DAG: 8
    532 ; EG-DAG: 8
    533 ; EG-DAG: 8
    534 ; EG-DAG: 8
    535 define amdgpu_kernel void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    536   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    537   %ext = sext <32 x i8> %load to <32 x i32>
    538   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    539   ret void
    540 }
    541 
    542 ; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
    543 
    544 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
    545 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
    546 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
    547 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
    548 define amdgpu_kernel void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    549   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    550   %ext = zext <64 x i8> %load to <64 x i32>
    551   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    552   ret void
    553 }
    554 
    555 ; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
    556 
    557 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 0, #1
    558 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 16, #1
    559 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 32, #1
    560 ; EG-DAG: VTX_READ_128 {{T[0-9]+\.XYZW}}, T{{[0-9]+}}.X, 48, #1
    561 define amdgpu_kernel void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    562   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    563   %ext = sext <64 x i8> %load to <64 x i32>
    564   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    565   ret void
    566 }
    567 
    568 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
    569 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
    570 
    571 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
    572 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
    573 
    574 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
    575 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
    576 
    577 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    578 ; EG: MOV {{.*}}, 0.0
    579 define amdgpu_kernel void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    580   %a = load i8, i8 addrspace(1)* %in
    581   %ext = zext i8 %a to i64
    582   store i64 %ext, i64 addrspace(1)* %out
    583   ret void
    584 }
    585 
    586 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
    587 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
    588 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
    589 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    590 
    591 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    592 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    593 
    594 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    595 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
    596 ; TODO: Why not 7 ?
    597 ; EG: 31
    598 define amdgpu_kernel void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    599   %a = load i8, i8 addrspace(1)* %in
    600   %ext = sext i8 %a to i64
    601   store i64 %ext, i64 addrspace(1)* %out
    602   ret void
    603 }
    604 
    605 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
    606 
    607 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    608 ; EG: MOV {{.*}}, 0.0
    609 define amdgpu_kernel void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    610   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    611   %ext = zext <1 x i8> %load to <1 x i64>
    612   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    613   ret void
    614 }
    615 
    616 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
    617 
    618 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    619 ; EG: ASHR {{\**}} {{T[0-9]\.[XYZW]}}, {{.*}}, literal
    620 ; TODO: Why not 7 ?
    621 ; EG: 31
    622 define amdgpu_kernel void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    623   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    624   %ext = sext <1 x i8> %load to <1 x i64>
    625   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    626   ret void
    627 }
    628 
    629 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
    630 
    631 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    632 define amdgpu_kernel void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    633   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    634   %ext = zext <2 x i8> %load to <2 x i64>
    635   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    636   ret void
    637 }
    638 
    639 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
    640 
    641 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    642 define amdgpu_kernel void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    643   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    644   %ext = sext <2 x i8> %load to <2 x i64>
    645   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    646   ret void
    647 }
    648 
    649 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
    650 
    651 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    652 define amdgpu_kernel void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    653   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    654   %ext = zext <4 x i8> %load to <4 x i64>
    655   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    656   ret void
    657 }
    658 
    659 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
    660 
    661 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    662 define amdgpu_kernel void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    663   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    664   %ext = sext <4 x i8> %load to <4 x i64>
    665   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    666   ret void
    667 }
    668 
    669 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
    670 
    671 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    672 define amdgpu_kernel void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    673   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    674   %ext = zext <8 x i8> %load to <8 x i64>
    675   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    676   ret void
    677 }
    678 
    679 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
    680 
    681 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    682 define amdgpu_kernel void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    683   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    684   %ext = sext <8 x i8> %load to <8 x i64>
    685   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    686   ret void
    687 }
    688 
    689 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
    690 
    691 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    692 define amdgpu_kernel void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    693   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    694   %ext = zext <16 x i8> %load to <16 x i64>
    695   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    696   ret void
    697 }
    698 
    699 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
    700 
    701 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    702 define amdgpu_kernel void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    703   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    704   %ext = sext <16 x i8> %load to <16 x i64>
    705   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    706   ret void
    707 }
    708 
    709 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
    710 
    711 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    712 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    713 define amdgpu_kernel void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    714   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    715   %ext = zext <32 x i8> %load to <32 x i64>
    716   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    717   ret void
    718 }
    719 
    720 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
    721 
    722 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    723 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    724 define amdgpu_kernel void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    725   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    726   %ext = sext <32 x i8> %load to <32 x i64>
    727   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    728   ret void
    729 }
    730 
    731 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
    732 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    733 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    734 ;   %ext = zext <64 x i8> %load to <64 x i64>
    735 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    736 ;   ret void
    737 ; }
    738 
    739 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
    740 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    741 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    742 ;   %ext = sext <64 x i8> %load to <64 x i64>
    743 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    744 ;   ret void
    745 ; }
    746 
    747 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
    748 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
    749 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    750 
    751 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
    752 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    753 
    754 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    755 define amdgpu_kernel void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    756   %a = load i8, i8 addrspace(1)* %in
    757   %ext = zext i8 %a to i16
    758   store i16 %ext, i16 addrspace(1)* %out
    759   ret void
    760 }
    761 
    762 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
    763 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
    764 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
    765 
    766 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    767 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    768 
    769 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    770 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    771 define amdgpu_kernel void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    772   %a = load i8, i8 addrspace(1)* %in
    773   %ext = sext i8 %a to i16
    774   store i16 %ext, i16 addrspace(1)* %out
    775   ret void
    776 }
    777 
    778 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
    779 
    780 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    781 define amdgpu_kernel void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    782   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    783   %ext = zext <1 x i8> %load to <1 x i16>
    784   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    785   ret void
    786 }
    787 
    788 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
    789 
    790 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    791 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    792 define amdgpu_kernel void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    793   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    794   %ext = sext <1 x i8> %load to <1 x i16>
    795   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    796   ret void
    797 }
    798 
    799 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
    800 
    801 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    802 define amdgpu_kernel void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    803   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    804   %ext = zext <2 x i8> %load to <2 x i16>
    805   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    806   ret void
    807 }
    808 
    809 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
    810 
    811 ; EG: VTX_READ_16 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    812 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    813 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    814 define amdgpu_kernel void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    815   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    816   %ext = sext <2 x i8> %load to <2 x i16>
    817   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    818   ret void
    819 }
    820 
    821 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
    822 
    823 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    824 define amdgpu_kernel void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    825   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    826   %ext = zext <4 x i8> %load to <4 x i16>
    827   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    828   ret void
    829 }
    830 
    831 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
    832 
    833 ; EG: VTX_READ_32 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0, #1
    834 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    835 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    836 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    837 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    838 define amdgpu_kernel void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    839   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    840   %ext = sext <4 x i8> %load to <4 x i16>
    841   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    842   ret void
    843 }
    844 
    845 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
    846 
    847 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    848 define amdgpu_kernel void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    849   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    850   %ext = zext <8 x i8> %load to <8 x i16>
    851   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    852   ret void
    853 }
    854 
    855 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
    856 
    857 ; EG: VTX_READ_64 T{{[0-9]+}}.XY, T{{[0-9]+}}.X, 0, #1
    858 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    859 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    860 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    861 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    862 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    863 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    864 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    865 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    866 define amdgpu_kernel void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    867   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    868   %ext = sext <8 x i8> %load to <8 x i16>
    869   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    870   ret void
    871 }
    872 
    873 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
    874 
    875 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    876 define amdgpu_kernel void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    877   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    878   %ext = zext <16 x i8> %load to <16 x i16>
    879   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    880   ret void
    881 }
    882 
    883 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
    884 
    885 ; EG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    886 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    887 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    888 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    889 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    890 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    891 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    892 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    893 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    894 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    895 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    896 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    897 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    898 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    899 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    900 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    901 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    902 define amdgpu_kernel void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    903   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    904   %ext = sext <16 x i8> %load to <16 x i16>
    905   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    906   ret void
    907 }
    908 
    909 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
    910 
    911 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    912 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    913 define amdgpu_kernel void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    914   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    915   %ext = zext <32 x i8> %load to <32 x i16>
    916   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    917   ret void
    918 }
    919 
    920 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
    921 
    922 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 0, #1
    923 ; EG-DAG: VTX_READ_128 T{{[0-9]+}}.XYZW, T{{[0-9]+}}.X, 16, #1
    924 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    925 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    926 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    927 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    928 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    929 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    930 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    931 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    932 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    933 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    934 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    935 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    936 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    937 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    938 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    939 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    940 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    941 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    942 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    943 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    944 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    945 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    946 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    947 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    948 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    949 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    950 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    951 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    952 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    953 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    954 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    955 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, {{.*}}, 0.0, literal
    956 define amdgpu_kernel void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    957   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    958   %ext = sext <32 x i8> %load to <32 x i16>
    959   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    960   ret void
    961 }
    962 
    963 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
    964 ; define amdgpu_kernel void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    965 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    966 ;   %ext = zext <64 x i8> %load to <64 x i16>
    967 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    968 ;   ret void
    969 ; }
    970 
    971 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
    972 ; define amdgpu_kernel void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    973 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    974 ;   %ext = sext <64 x i8> %load to <64 x i16>
    975 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    976 ;   ret void
    977 ; }
    978 
    979 attributes #0 = { nounwind }
    980