Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      2 ; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=kaveri -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-HSA -check-prefix=FUNC %s
      3 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=GCN-NOHSA -check-prefix=FUNC %s
      4 ; RUN: llc -march=r600 -mcpu=redwood < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      5 ; RUN: llc -march=r600 -mcpu=cayman < %s | FileCheck -check-prefix=EG -check-prefix=FUNC %s
      6 
      7 
      8 ; FUNC-LABEL: {{^}}global_load_i8:
      9 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}}
     10 ; GCN-HSA: flat_load_ubyte
     11 
     12 ; EG: VTX_READ_8 T{{[0-9]+}}.X, T{{[0-9]+}}.X, 0
     13 define void @global_load_i8(i8 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
     14 entry:
     15   %ld = load i8, i8 addrspace(1)* %in
     16   store i8 %ld, i8 addrspace(1)* %out
     17   ret void
     18 }
     19 
     20 ; FUNC-LABEL: {{^}}global_load_v2i8:
     21 ; GCN-NOHSA: buffer_load_ushort v
     22 ; GCN-HSA: flat_load_ushort v
     23 
     24 ; EG: VTX_READ_16
     25 define void @global_load_v2i8(<2 x i8> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
     26 entry:
     27   %ld = load <2 x i8>, <2 x i8> addrspace(1)* %in
     28   store <2 x i8> %ld, <2 x i8> addrspace(1)* %out
     29   ret void
     30 }
     31 
     32 ; FUNC-LABEL: {{^}}global_load_v3i8:
     33 ; GCN-NOHSA: buffer_load_dword v
     34 ; GCN-HSA: flat_load_dword v
     35 
     36 ; EG-DAG: VTX_READ_32
     37 define void @global_load_v3i8(<3 x i8> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
     38 entry:
     39   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
     40   store <3 x i8> %ld, <3 x i8> addrspace(1)* %out
     41   ret void
     42 }
     43 
     44 ; FUNC-LABEL: {{^}}global_load_v4i8:
     45 ; GCN-NOHSA: buffer_load_dword v
     46 ; GCN-HSA: flat_load_dword v
     47 
     48 ; EG: VTX_READ_32
     49 define void @global_load_v4i8(<4 x i8> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
     50 entry:
     51   %ld = load <4 x i8>, <4 x i8> addrspace(1)* %in
     52   store <4 x i8> %ld, <4 x i8> addrspace(1)* %out
     53   ret void
     54 }
     55 
     56 ; FUNC-LABEL: {{^}}global_load_v8i8:
     57 ; GCN-NOHSA: buffer_load_dwordx2
     58 ; GCN-HSA: flat_load_dwordx2
     59 
     60 ; EG: VTX_READ_64
     61 define void @global_load_v8i8(<8 x i8> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
     62 entry:
     63   %ld = load <8 x i8>, <8 x i8> addrspace(1)* %in
     64   store <8 x i8> %ld, <8 x i8> addrspace(1)* %out
     65   ret void
     66 }
     67 
     68 ; FUNC-LABEL: {{^}}global_load_v16i8:
     69 ; GCN-NOHSA: buffer_load_dwordx4
     70 
     71 ; GCN-HSA: flat_load_dwordx4
     72 
     73 ; EG: VTX_READ_128
     74 define void @global_load_v16i8(<16 x i8> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
     75 entry:
     76   %ld = load <16 x i8>, <16 x i8> addrspace(1)* %in
     77   store <16 x i8> %ld, <16 x i8> addrspace(1)* %out
     78   ret void
     79 }
     80 
     81 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i32:
     82 ; GCN-NOHSA: buffer_load_ubyte v{{[0-9]+}},
     83 ; GCN-HSA: flat_load_ubyte
     84 
     85 ; EG: VTX_READ_8 T{{[0-9]+\.X, T[0-9]+\.X}}
     86 define void @global_zextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
     87   %a = load i8, i8 addrspace(1)* %in
     88   %ext = zext i8 %a to i32
     89   store i32 %ext, i32 addrspace(1)* %out
     90   ret void
     91 }
     92 
     93 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i32:
     94 ; GCN-NOHSA: buffer_load_sbyte
     95 ; GCN-HSA: flat_load_sbyte
     96 
     97 ; EG: VTX_READ_8 [[DST:T[0-9]\.[XYZW]]], [[DST]]
     98 ; EG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST]], 0.0, literal
     99 ; EG: 8
    100 define void @global_sextload_i8_to_i32(i32 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    101   %ld = load i8, i8 addrspace(1)* %in
    102   %ext = sext i8 %ld to i32
    103   store i32 %ext, i32 addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i32:
    108 define void @global_zextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    109   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    110   %ext = zext <1 x i8> %load to <1 x i32>
    111   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    112   ret void
    113 }
    114 
    115 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i32:
    116 define void @global_sextload_v1i8_to_v1i32(<1 x i32> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    117   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    118   %ext = sext <1 x i8> %load to <1 x i32>
    119   store <1 x i32> %ext, <1 x i32> addrspace(1)* %out
    120   ret void
    121 }
    122 
    123 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i32:
    124 ; GCN-NOHSA: buffer_load_ushort
    125 ; GCN-HSA: flat_load_ushort
    126 
    127 ; EG: VTX_READ_8
    128 ; EG: VTX_READ_8
    129 define void @global_zextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    130   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    131   %ext = zext <2 x i8> %load to <2 x i32>
    132   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    133   ret void
    134 }
    135 
    136 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i32:
    137 ; GCN-NOHSA: buffer_load_ushort
    138 ; GCN-HSA: flat_load_ushort
    139 
    140 ; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    141 ; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    142 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
    143 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
    144 ; EG-DAG: 8
    145 ; EG-DAG: 8
    146 define void @global_sextload_v2i8_to_v2i32(<2 x i32> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    147   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    148   %ext = sext <2 x i8> %load to <2 x i32>
    149   store <2 x i32> %ext, <2 x i32> addrspace(1)* %out
    150   ret void
    151 }
    152 
    153 ; FUNC-LABEL: {{^}}global_zextload_v3i8_to_v3i32:
    154 ; GCN-NOHSA: buffer_load_dword v
    155 ; GCN-HSA: flat_load_dword v
    156 
    157 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
    158 ; GCN-DAG: v_bfe_u32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
    159 ; GCN-DAG: v_and_b32_e32 v{{[0-9]+}}, 0xff,
    160 define void @global_zextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
    161 entry:
    162   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
    163   %ext = zext <3 x i8> %ld to <3 x i32>
    164   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    165   ret void
    166 }
    167 
    168 ; FUNC-LABEL: {{^}}global_sextload_v3i8_to_v3i32:
    169 ; GCN-NOHSA: buffer_load_dword v
    170 ; GCN-HSA: flat_load_dword v
    171 
    172 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 8, 8
    173 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 0, 8
    174 ; GCN-DAG: v_bfe_i32 v{{[0-9]+}}, v{{[0-9]+}}, 16, 8
    175 define void @global_sextload_v3i8_to_v3i32(<3 x i32> addrspace(1)* %out, <3 x i8> addrspace(1)* %in) #0 {
    176 entry:
    177   %ld = load <3 x i8>, <3 x i8> addrspace(1)* %in
    178   %ext = sext <3 x i8> %ld to <3 x i32>
    179   store <3 x i32> %ext, <3 x i32> addrspace(1)* %out
    180   ret void
    181 }
    182 
    183 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i32:
    184 ; GCN-NOHSA: buffer_load_dword
    185 ; GCN-HSA: flat_load_dword
    186 
    187 ; EG: VTX_READ_8
    188 ; EG: VTX_READ_8
    189 ; EG: VTX_READ_8
    190 ; EG: VTX_READ_8
    191 define void @global_zextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    192   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    193   %ext = zext <4 x i8> %load to <4 x i32>
    194   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    195   ret void
    196 }
    197 
    198 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i32:
    199 ; GCN-NOHSA: buffer_load_dword
    200 ; GCN-HSA: flat_load_dword
    201 
    202 ; EG-DAG: VTX_READ_8 [[DST_X:T[0-9]\.[XYZW]]], [[DST_X]]
    203 ; EG-DAG: VTX_READ_8 [[DST_Y:T[0-9]\.[XYZW]]], [[DST_Y]]
    204 ; EG-DAG: VTX_READ_8 [[DST_Z:T[0-9]\.[XYZW]]], [[DST_Z]]
    205 ; EG-DAG: VTX_READ_8 [[DST_W:T[0-9]\.[XYZW]]], [[DST_W]]
    206 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_X]], 0.0, literal
    207 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Y]], 0.0, literal
    208 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_Z]], 0.0, literal
    209 ; EG-DAG: BFE_INT {{[* ]*}}T{{[0-9].[XYZW]}}, [[DST_W]], 0.0, literal
    210 ; EG-DAG: 8
    211 ; EG-DAG: 8
    212 ; EG-DAG: 8
    213 ; EG-DAG: 8
    214 define void @global_sextload_v4i8_to_v4i32(<4 x i32> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    215   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    216   %ext = sext <4 x i8> %load to <4 x i32>
    217   store <4 x i32> %ext, <4 x i32> addrspace(1)* %out
    218   ret void
    219 }
    220 
    221 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i32:
    222 define void @global_zextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    223   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    224   %ext = zext <8 x i8> %load to <8 x i32>
    225   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    226   ret void
    227 }
    228 
    229 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i32:
    230 define void @global_sextload_v8i8_to_v8i32(<8 x i32> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    231   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    232   %ext = sext <8 x i8> %load to <8 x i32>
    233   store <8 x i32> %ext, <8 x i32> addrspace(1)* %out
    234   ret void
    235 }
    236 
    237 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i32:
    238 define void @global_zextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    239   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    240   %ext = zext <16 x i8> %load to <16 x i32>
    241   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    242   ret void
    243 }
    244 
    245 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i32:
    246 define void @global_sextload_v16i8_to_v16i32(<16 x i32> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    247   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    248   %ext = sext <16 x i8> %load to <16 x i32>
    249   store <16 x i32> %ext, <16 x i32> addrspace(1)* %out
    250   ret void
    251 }
    252 
    253 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i32:
    254 define void @global_zextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    255   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    256   %ext = zext <32 x i8> %load to <32 x i32>
    257   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    258   ret void
    259 }
    260 
    261 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i32:
    262 define void @global_sextload_v32i8_to_v32i32(<32 x i32> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    263   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    264   %ext = sext <32 x i8> %load to <32 x i32>
    265   store <32 x i32> %ext, <32 x i32> addrspace(1)* %out
    266   ret void
    267 }
    268 
    269 ; FUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i32:
    270 define void @global_zextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    271   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    272   %ext = zext <64 x i8> %load to <64 x i32>
    273   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    274   ret void
    275 }
    276 
    277 ; FUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i32:
    278 define void @global_sextload_v64i8_to_v64i32(<64 x i32> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    279   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    280   %ext = sext <64 x i8> %load to <64 x i32>
    281   store <64 x i32> %ext, <64 x i32> addrspace(1)* %out
    282   ret void
    283 }
    284 
    285 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i64:
    286 ; GCN-DAG: v_mov_b32_e32 v[[HI:[0-9]+]], 0{{$}}
    287 
    288 ; GCN-NOHSA-DAG: buffer_load_ubyte v[[LO:[0-9]+]],
    289 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]]
    290 
    291 ; GCN-HSA-DAG: flat_load_ubyte v[[LO:[0-9]+]],
    292 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]]
    293 define void @global_zextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    294   %a = load i8, i8 addrspace(1)* %in
    295   %ext = zext i8 %a to i64
    296   store i64 %ext, i64 addrspace(1)* %out
    297   ret void
    298 }
    299 
    300 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i64:
    301 ; GCN-NOHSA: buffer_load_sbyte v[[LO:[0-9]+]],
    302 ; GCN-HSA: flat_load_sbyte v[[LO:[0-9]+]],
    303 ; GCN: v_ashrrev_i32_e32 v[[HI:[0-9]+]], 31, v[[LO]]
    304 
    305 ; GCN-NOHSA: buffer_store_dwordx2 v{{\[}}[[LO]]:[[HI]]{{\]}}
    306 ; GCN-HSA: flat_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, v{{\[}}[[LO]]:[[HI]]{{\]}}
    307 define void @global_sextload_i8_to_i64(i64 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    308   %a = load i8, i8 addrspace(1)* %in
    309   %ext = sext i8 %a to i64
    310   store i64 %ext, i64 addrspace(1)* %out
    311   ret void
    312 }
    313 
    314 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i64:
    315 define void @global_zextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    316   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    317   %ext = zext <1 x i8> %load to <1 x i64>
    318   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    319   ret void
    320 }
    321 
    322 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i64:
    323 define void @global_sextload_v1i8_to_v1i64(<1 x i64> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    324   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    325   %ext = sext <1 x i8> %load to <1 x i64>
    326   store <1 x i64> %ext, <1 x i64> addrspace(1)* %out
    327   ret void
    328 }
    329 
    330 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i64:
    331 define void @global_zextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    332   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    333   %ext = zext <2 x i8> %load to <2 x i64>
    334   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    335   ret void
    336 }
    337 
    338 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i64:
    339 define void @global_sextload_v2i8_to_v2i64(<2 x i64> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    340   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    341   %ext = sext <2 x i8> %load to <2 x i64>
    342   store <2 x i64> %ext, <2 x i64> addrspace(1)* %out
    343   ret void
    344 }
    345 
    346 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i64:
    347 define void @global_zextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    348   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    349   %ext = zext <4 x i8> %load to <4 x i64>
    350   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    351   ret void
    352 }
    353 
    354 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i64:
    355 define void @global_sextload_v4i8_to_v4i64(<4 x i64> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    356   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    357   %ext = sext <4 x i8> %load to <4 x i64>
    358   store <4 x i64> %ext, <4 x i64> addrspace(1)* %out
    359   ret void
    360 }
    361 
    362 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i64:
    363 define void @global_zextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    364   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    365   %ext = zext <8 x i8> %load to <8 x i64>
    366   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    367   ret void
    368 }
    369 
    370 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i64:
    371 define void @global_sextload_v8i8_to_v8i64(<8 x i64> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    372   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    373   %ext = sext <8 x i8> %load to <8 x i64>
    374   store <8 x i64> %ext, <8 x i64> addrspace(1)* %out
    375   ret void
    376 }
    377 
    378 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i64:
    379 define void @global_zextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    380   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    381   %ext = zext <16 x i8> %load to <16 x i64>
    382   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    383   ret void
    384 }
    385 
    386 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i64:
    387 define void @global_sextload_v16i8_to_v16i64(<16 x i64> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    388   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    389   %ext = sext <16 x i8> %load to <16 x i64>
    390   store <16 x i64> %ext, <16 x i64> addrspace(1)* %out
    391   ret void
    392 }
    393 
    394 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i64:
    395 define void @global_zextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    396   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    397   %ext = zext <32 x i8> %load to <32 x i64>
    398   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    399   ret void
    400 }
    401 
    402 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i64:
    403 define void @global_sextload_v32i8_to_v32i64(<32 x i64> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    404   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    405   %ext = sext <32 x i8> %load to <32 x i64>
    406   store <32 x i64> %ext, <32 x i64> addrspace(1)* %out
    407   ret void
    408 }
    409 
    410 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i64:
    411 ; define void @global_zextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    412 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    413 ;   %ext = zext <64 x i8> %load to <64 x i64>
    414 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    415 ;   ret void
    416 ; }
    417 
    418 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i64:
    419 ; define void @global_sextload_v64i8_to_v64i64(<64 x i64> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    420 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    421 ;   %ext = sext <64 x i8> %load to <64 x i64>
    422 ;   store <64 x i64> %ext, <64 x i64> addrspace(1)* %out
    423 ;   ret void
    424 ; }
    425 
    426 ; FUNC-LABEL: {{^}}global_zextload_i8_to_i16:
    427 ; GCN-NOHSA: buffer_load_ubyte v[[VAL:[0-9]+]],
    428 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    429 
    430 ; GCN-HSA: flat_load_ubyte v[[VAL:[0-9]+]],
    431 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    432 define void @global_zextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    433   %a = load i8, i8 addrspace(1)* %in
    434   %ext = zext i8 %a to i16
    435   store i16 %ext, i16 addrspace(1)* %out
    436   ret void
    437 }
    438 
    439 ; FUNC-LABEL: {{^}}global_sextload_i8_to_i16:
    440 ; GCN-NOHSA: buffer_load_sbyte v[[VAL:[0-9]+]],
    441 ; GCN-HSA: flat_load_sbyte v[[VAL:[0-9]+]],
    442 
    443 ; GCN-NOHSA: buffer_store_short v[[VAL]]
    444 ; GCN-HSA: flat_store_short v{{\[[0-9]+:[0-9]+\]}}, v[[VAL]]
    445 define void @global_sextload_i8_to_i16(i16 addrspace(1)* %out, i8 addrspace(1)* %in) #0 {
    446   %a = load i8, i8 addrspace(1)* %in
    447   %ext = sext i8 %a to i16
    448   store i16 %ext, i16 addrspace(1)* %out
    449   ret void
    450 }
    451 
    452 ; FUNC-LABEL: {{^}}global_zextload_v1i8_to_v1i16:
    453 define void @global_zextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    454   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    455   %ext = zext <1 x i8> %load to <1 x i16>
    456   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    457   ret void
    458 }
    459 
    460 ; FUNC-LABEL: {{^}}global_sextload_v1i8_to_v1i16:
    461 define void @global_sextload_v1i8_to_v1i16(<1 x i16> addrspace(1)* %out, <1 x i8> addrspace(1)* %in) #0 {
    462   %load = load <1 x i8>, <1 x i8> addrspace(1)* %in
    463   %ext = sext <1 x i8> %load to <1 x i16>
    464   store <1 x i16> %ext, <1 x i16> addrspace(1)* %out
    465   ret void
    466 }
    467 
    468 ; FUNC-LABEL: {{^}}global_zextload_v2i8_to_v2i16:
    469 define void @global_zextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    470   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    471   %ext = zext <2 x i8> %load to <2 x i16>
    472   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    473   ret void
    474 }
    475 
    476 ; FUNC-LABEL: {{^}}global_sextload_v2i8_to_v2i16:
    477 define void @global_sextload_v2i8_to_v2i16(<2 x i16> addrspace(1)* %out, <2 x i8> addrspace(1)* %in) #0 {
    478   %load = load <2 x i8>, <2 x i8> addrspace(1)* %in
    479   %ext = sext <2 x i8> %load to <2 x i16>
    480   store <2 x i16> %ext, <2 x i16> addrspace(1)* %out
    481   ret void
    482 }
    483 
    484 ; FUNC-LABEL: {{^}}global_zextload_v4i8_to_v4i16:
    485 define void @global_zextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    486   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    487   %ext = zext <4 x i8> %load to <4 x i16>
    488   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    489   ret void
    490 }
    491 
    492 ; FUNC-LABEL: {{^}}global_sextload_v4i8_to_v4i16:
    493 define void @global_sextload_v4i8_to_v4i16(<4 x i16> addrspace(1)* %out, <4 x i8> addrspace(1)* %in) #0 {
    494   %load = load <4 x i8>, <4 x i8> addrspace(1)* %in
    495   %ext = sext <4 x i8> %load to <4 x i16>
    496   store <4 x i16> %ext, <4 x i16> addrspace(1)* %out
    497   ret void
    498 }
    499 
    500 ; FUNC-LABEL: {{^}}global_zextload_v8i8_to_v8i16:
    501 define void @global_zextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    502   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    503   %ext = zext <8 x i8> %load to <8 x i16>
    504   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    505   ret void
    506 }
    507 
    508 ; FUNC-LABEL: {{^}}global_sextload_v8i8_to_v8i16:
    509 define void @global_sextload_v8i8_to_v8i16(<8 x i16> addrspace(1)* %out, <8 x i8> addrspace(1)* %in) #0 {
    510   %load = load <8 x i8>, <8 x i8> addrspace(1)* %in
    511   %ext = sext <8 x i8> %load to <8 x i16>
    512   store <8 x i16> %ext, <8 x i16> addrspace(1)* %out
    513   ret void
    514 }
    515 
    516 ; FUNC-LABEL: {{^}}global_zextload_v16i8_to_v16i16:
    517 define void @global_zextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    518   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    519   %ext = zext <16 x i8> %load to <16 x i16>
    520   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    521   ret void
    522 }
    523 
    524 ; FUNC-LABEL: {{^}}global_sextload_v16i8_to_v16i16:
    525 define void @global_sextload_v16i8_to_v16i16(<16 x i16> addrspace(1)* %out, <16 x i8> addrspace(1)* %in) #0 {
    526   %load = load <16 x i8>, <16 x i8> addrspace(1)* %in
    527   %ext = sext <16 x i8> %load to <16 x i16>
    528   store <16 x i16> %ext, <16 x i16> addrspace(1)* %out
    529   ret void
    530 }
    531 
    532 ; FUNC-LABEL: {{^}}global_zextload_v32i8_to_v32i16:
    533 define void @global_zextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    534   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    535   %ext = zext <32 x i8> %load to <32 x i16>
    536   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    537   ret void
    538 }
    539 
    540 ; FUNC-LABEL: {{^}}global_sextload_v32i8_to_v32i16:
    541 define void @global_sextload_v32i8_to_v32i16(<32 x i16> addrspace(1)* %out, <32 x i8> addrspace(1)* %in) #0 {
    542   %load = load <32 x i8>, <32 x i8> addrspace(1)* %in
    543   %ext = sext <32 x i8> %load to <32 x i16>
    544   store <32 x i16> %ext, <32 x i16> addrspace(1)* %out
    545   ret void
    546 }
    547 
    548 ; XFUNC-LABEL: {{^}}global_zextload_v64i8_to_v64i16:
    549 ; define void @global_zextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    550 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    551 ;   %ext = zext <64 x i8> %load to <64 x i16>
    552 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    553 ;   ret void
    554 ; }
    555 
    556 ; XFUNC-LABEL: {{^}}global_sextload_v64i8_to_v64i16:
    557 ; define void @global_sextload_v64i8_to_v64i16(<64 x i16> addrspace(1)* %out, <64 x i8> addrspace(1)* %in) #0 {
    558 ;   %load = load <64 x i8>, <64 x i8> addrspace(1)* %in
    559 ;   %ext = sext <64 x i8> %load to <64 x i16>
    560 ;   store <64 x i16> %ext, <64 x i16> addrspace(1)* %out
    561 ;   ret void
    562 ; }
    563 
    564 attributes #0 = { nounwind }
    565