Home | History | Annotate | Download | only in AMDGPU
      1 ; RUN: llc -march=amdgcn -mcpu=SI -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=SI %s
      2 ; RUN: llc -march=amdgcn -mcpu=tonga -verify-machineinstrs < %s | FileCheck -check-prefix=GCN -check-prefix=VI %s
      3 
      4 ; half args should be promoted to float
      5 
      6 ; GCN-LABEL: {{^}}load_f16_arg:
      7 ; GCN: s_load_dword [[ARG:s[0-9]+]]
      8 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[ARG]]
      9 ; GCN: buffer_store_short [[CVT]]
     10 define void @load_f16_arg(half addrspace(1)* %out, half %arg) #0 {
     11   store half %arg, half addrspace(1)* %out
     12   ret void
     13 }
     14 
     15 ; GCN-LABEL: {{^}}load_v2f16_arg:
     16 ; GCN-DAG: buffer_load_ushort [[V0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:44
     17 ; GCN-DAG: buffer_load_ushort [[V1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:46
     18 ; GCN-DAG: buffer_store_short [[V0]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
     19 ; GCN-DAG: buffer_store_short [[V1]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
     20 ; GCN: s_endpgm
     21 define void @load_v2f16_arg(<2 x half> addrspace(1)* %out, <2 x half> %arg) #0 {
     22   store <2 x half> %arg, <2 x half> addrspace(1)* %out
     23   ret void
     24 }
     25 
     26 ; GCN-LABEL: {{^}}load_v3f16_arg:
     27 ; GCN: buffer_load_ushort
     28 ; GCN: buffer_load_ushort
     29 ; GCN: buffer_load_ushort
     30 ; GCN-NOT: buffer_load
     31 ; GCN-DAG: buffer_store_dword
     32 ; GCN-DAG: buffer_store_short
     33 ; GCN-NOT: buffer_store
     34 ; GCN: s_endpgm
     35 define void @load_v3f16_arg(<3 x half> addrspace(1)* %out, <3 x half> %arg) #0 {
     36   store <3 x half> %arg, <3 x half> addrspace(1)* %out
     37   ret void
     38 }
     39 
     40 ; GCN-LABEL: {{^}}load_v4f16_arg:
     41 ; GCN: buffer_load_ushort
     42 ; GCN: buffer_load_ushort
     43 ; GCN: buffer_load_ushort
     44 ; GCN: buffer_load_ushort
     45 ; GCN: buffer_store_short
     46 ; GCN: buffer_store_short
     47 ; GCN: buffer_store_short
     48 ; GCN: buffer_store_short
     49 ; GCN: s_endpgm
     50 define void @load_v4f16_arg(<4 x half> addrspace(1)* %out, <4 x half> %arg) #0 {
     51   store <4 x half> %arg, <4 x half> addrspace(1)* %out
     52   ret void
     53 }
     54 
     55 ; GCN-LABEL: {{^}}load_v8f16_arg:
     56 define void @load_v8f16_arg(<8 x half> addrspace(1)* %out, <8 x half> %arg) #0 {
     57   store <8 x half> %arg, <8 x half> addrspace(1)* %out
     58   ret void
     59 }
     60 
     61 ; GCN-LABEL: {{^}}extload_v2f16_arg:
     62 define void @extload_v2f16_arg(<2 x float> addrspace(1)* %out, <2 x half> %in) #0 {
     63   %fpext = fpext <2 x half> %in to <2 x float>
     64   store <2 x float> %fpext, <2 x float> addrspace(1)* %out
     65   ret void
     66 }
     67 
     68 ; GCN-LABEL: {{^}}extload_f16_to_f32_arg:
     69 define void @extload_f16_to_f32_arg(float addrspace(1)* %out, half %arg) #0 {
     70   %ext = fpext half %arg to float
     71   store float %ext, float addrspace(1)* %out
     72   ret void
     73 }
     74 
     75 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f32_arg:
     76 define void @extload_v2f16_to_v2f32_arg(<2 x float> addrspace(1)* %out, <2 x half> %arg) #0 {
     77   %ext = fpext <2 x half> %arg to <2 x float>
     78   store <2 x float> %ext, <2 x float> addrspace(1)* %out
     79   ret void
     80 }
     81 
     82 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f32_arg:
     83 ; GCN: buffer_load_ushort
     84 ; GCN: buffer_load_ushort
     85 ; GCN: buffer_load_ushort
     86 ; GCN-NOT: buffer_load
     87 ; GCN: v_cvt_f32_f16_e32
     88 ; GCN: v_cvt_f32_f16_e32
     89 ; GCN: v_cvt_f32_f16_e32
     90 ; GCN-NOT: v_cvt_f32_f16
     91 ; GCN-DAG: buffer_store_dword
     92 ; GCN-DAG: buffer_store_dwordx2
     93 ; GCN: s_endpgm
     94 define void @extload_v3f16_to_v3f32_arg(<3 x float> addrspace(1)* %out, <3 x half> %arg) #0 {
     95   %ext = fpext <3 x half> %arg to <3 x float>
     96   store <3 x float> %ext, <3 x float> addrspace(1)* %out
     97   ret void
     98 }
     99 
    100 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f32_arg:
    101 define void @extload_v4f16_to_v4f32_arg(<4 x float> addrspace(1)* %out, <4 x half> %arg) #0 {
    102   %ext = fpext <4 x half> %arg to <4 x float>
    103   store <4 x float> %ext, <4 x float> addrspace(1)* %out
    104   ret void
    105 }
    106 
    107 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f32_arg:
    108 ; GCN: buffer_load_ushort
    109 ; GCN: buffer_load_ushort
    110 ; GCN: buffer_load_ushort
    111 ; GCN: buffer_load_ushort
    112 ; GCN: buffer_load_ushort
    113 ; GCN: buffer_load_ushort
    114 ; GCN: buffer_load_ushort
    115 ; GCN: buffer_load_ushort
    116 
    117 ; GCN: v_cvt_f32_f16_e32
    118 ; GCN: v_cvt_f32_f16_e32
    119 ; GCN: v_cvt_f32_f16_e32
    120 ; GCN: v_cvt_f32_f16_e32
    121 ; GCN: v_cvt_f32_f16_e32
    122 ; GCN: v_cvt_f32_f16_e32
    123 ; GCN: v_cvt_f32_f16_e32
    124 ; GCN: v_cvt_f32_f16_e32
    125 
    126 ; GCN: buffer_store_dwordx4
    127 ; GCN: buffer_store_dwordx4
    128 define void @extload_v8f16_to_v8f32_arg(<8 x float> addrspace(1)* %out, <8 x half> %arg) #0 {
    129   %ext = fpext <8 x half> %arg to <8 x float>
    130   store <8 x float> %ext, <8 x float> addrspace(1)* %out
    131   ret void
    132 }
    133 
    134 ; GCN-LABEL: {{^}}extload_f16_to_f64_arg:
    135 ; SI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0xb{{$}}
    136 ; VI: s_load_dword [[ARG:s[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0x2c{{$}}
    137 ; GCN: v_cvt_f64_f32_e32 [[RESULT:v\[[0-9]+:[0-9]+\]]], [[ARG]]
    138 ; GCN: buffer_store_dwordx2 [[RESULT]]
    139 define void @extload_f16_to_f64_arg(double addrspace(1)* %out, half %arg) #0 {
    140   %ext = fpext half %arg to double
    141   store double %ext, double addrspace(1)* %out
    142   ret void
    143 }
    144 
    145 ; GCN-LABEL: {{^}}extload_v2f16_to_v2f64_arg:
    146 ; GCN-DAG: buffer_load_ushort v
    147 ; GCN-DAG: buffer_load_ushort v
    148 ; GCN-DAG: v_cvt_f32_f16_e32
    149 ; GCN-DAG: v_cvt_f32_f16_e32
    150 ; GCN-DAG: v_cvt_f64_f32_e32
    151 ; GCN-DAG: v_cvt_f64_f32_e32
    152 ; GCN: s_endpgm
    153 define void @extload_v2f16_to_v2f64_arg(<2 x double> addrspace(1)* %out, <2 x half> %arg) #0 {
    154   %ext = fpext <2 x half> %arg to <2 x double>
    155   store <2 x double> %ext, <2 x double> addrspace(1)* %out
    156   ret void
    157 }
    158 
    159 ; GCN-LABEL: {{^}}extload_v3f16_to_v3f64_arg:
    160 ; GCN-DAG: buffer_load_ushort v
    161 ; GCN-DAG: buffer_load_ushort v
    162 ; GCN-DAG: buffer_load_ushort v
    163 ; GCN-DAG: v_cvt_f32_f16_e32
    164 ; GCN-DAG: v_cvt_f32_f16_e32
    165 ; GCN-DAG: v_cvt_f32_f16_e32
    166 ; GCN-DAG: v_cvt_f64_f32_e32
    167 ; GCN-DAG: v_cvt_f64_f32_e32
    168 ; GCN-DAG: v_cvt_f64_f32_e32
    169 ; GCN: s_endpgm
    170 define void @extload_v3f16_to_v3f64_arg(<3 x double> addrspace(1)* %out, <3 x half> %arg) #0 {
    171   %ext = fpext <3 x half> %arg to <3 x double>
    172   store <3 x double> %ext, <3 x double> addrspace(1)* %out
    173   ret void
    174 }
    175 
    176 ; GCN-LABEL: {{^}}extload_v4f16_to_v4f64_arg:
    177 ; GCN-DAG: buffer_load_ushort v
    178 ; GCN-DAG: buffer_load_ushort v
    179 ; GCN-DAG: buffer_load_ushort v
    180 ; GCN-DAG: buffer_load_ushort v
    181 ; GCN-DAG: v_cvt_f32_f16_e32
    182 ; GCN-DAG: v_cvt_f32_f16_e32
    183 ; GCN-DAG: v_cvt_f32_f16_e32
    184 ; GCN-DAG: v_cvt_f32_f16_e32
    185 ; GCN-DAG: v_cvt_f64_f32_e32
    186 ; GCN-DAG: v_cvt_f64_f32_e32
    187 ; GCN-DAG: v_cvt_f64_f32_e32
    188 ; GCN-DAG: v_cvt_f64_f32_e32
    189 ; GCN: s_endpgm
    190 define void @extload_v4f16_to_v4f64_arg(<4 x double> addrspace(1)* %out, <4 x half> %arg) #0 {
    191   %ext = fpext <4 x half> %arg to <4 x double>
    192   store <4 x double> %ext, <4 x double> addrspace(1)* %out
    193   ret void
    194 }
    195 
    196 ; GCN-LABEL: {{^}}extload_v8f16_to_v8f64_arg:
    197 ; GCN-DAG: buffer_load_ushort v
    198 ; GCN-DAG: buffer_load_ushort v
    199 ; GCN-DAG: buffer_load_ushort v
    200 ; GCN-DAG: buffer_load_ushort v
    201 
    202 ; GCN-DAG: buffer_load_ushort v
    203 ; GCN-DAG: buffer_load_ushort v
    204 ; GCN-DAG: buffer_load_ushort v
    205 ; GCN-DAG: buffer_load_ushort v
    206 
    207 ; GCN-DAG: v_cvt_f32_f16_e32
    208 ; GCN-DAG: v_cvt_f32_f16_e32
    209 ; GCN-DAG: v_cvt_f32_f16_e32
    210 ; GCN-DAG: v_cvt_f32_f16_e32
    211 
    212 ; GCN-DAG: v_cvt_f32_f16_e32
    213 ; GCN-DAG: v_cvt_f32_f16_e32
    214 ; GCN-DAG: v_cvt_f32_f16_e32
    215 ; GCN-DAG: v_cvt_f32_f16_e32
    216 
    217 ; GCN-DAG: v_cvt_f64_f32_e32
    218 ; GCN-DAG: v_cvt_f64_f32_e32
    219 ; GCN-DAG: v_cvt_f64_f32_e32
    220 ; GCN-DAG: v_cvt_f64_f32_e32
    221 
    222 ; GCN-DAG: v_cvt_f64_f32_e32
    223 ; GCN-DAG: v_cvt_f64_f32_e32
    224 ; GCN-DAG: v_cvt_f64_f32_e32
    225 ; GCN-DAG: v_cvt_f64_f32_e32
    226 
    227 ; GCN: s_endpgm
    228 define void @extload_v8f16_to_v8f64_arg(<8 x double> addrspace(1)* %out, <8 x half> %arg) #0 {
    229   %ext = fpext <8 x half> %arg to <8 x double>
    230   store <8 x double> %ext, <8 x double> addrspace(1)* %out
    231   ret void
    232 }
    233 
    234 ; GCN-LABEL: {{^}}global_load_store_f16:
    235 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
    236 ; GCN: buffer_store_short [[TMP]]
    237 define void @global_load_store_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
    238   %val = load half, half addrspace(1)* %in
    239   store half %val, half addrspace(1)* %out
    240   ret void
    241 }
    242 
    243 ; GCN-LABEL: {{^}}global_load_store_v2f16:
    244 ; GCN: buffer_load_dword [[TMP:v[0-9]+]]
    245 ; GCN: buffer_store_dword [[TMP]]
    246 define void @global_load_store_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
    247   %val = load <2 x half>, <2 x half> addrspace(1)* %in
    248   store <2 x half> %val, <2 x half> addrspace(1)* %out
    249   ret void
    250 }
    251 
    252 ; GCN-LABEL: {{^}}global_load_store_v4f16:
    253 ; GCN: buffer_load_dwordx2 [[TMP:v\[[0-9]+:[0-9]+\]]]
    254 ; GCN: buffer_store_dwordx2 [[TMP]]
    255 define void @global_load_store_v4f16(<4 x half> addrspace(1)* %in, <4 x half> addrspace(1)* %out) #0 {
    256   %val = load <4 x half>, <4 x half> addrspace(1)* %in
    257   store <4 x half> %val, <4 x half> addrspace(1)* %out
    258   ret void
    259 }
    260 
    261 ; GCN-LABEL: {{^}}global_load_store_v8f16:
    262 ; GCN: buffer_load_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
    263 ; GCN: buffer_store_dwordx4 [[TMP:v\[[0-9]+:[0-9]+\]]]
    264 ; GCN: s_endpgm
    265 define void @global_load_store_v8f16(<8 x half> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
    266   %val = load <8 x half>, <8 x half> addrspace(1)* %in
    267   store <8 x half> %val, <8 x half> addrspace(1)* %out
    268   ret void
    269 }
    270 
    271 ; GCN-LABEL: {{^}}global_extload_f16_to_f32:
    272 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
    273 ; GCN: v_cvt_f32_f16_e32 [[CVT:v[0-9]+]], [[LOAD]]
    274 ; GCN: buffer_store_dword [[CVT]]
    275 define void @global_extload_f16_to_f32(float addrspace(1)* %out, half addrspace(1)* %in) #0 {
    276   %val = load half, half addrspace(1)* %in
    277   %cvt = fpext half %val to float
    278   store float %cvt, float addrspace(1)* %out
    279   ret void
    280 }
    281 
    282 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f32:
    283 ; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    284 ; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
    285 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
    286 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
    287 ; GCN-DAG: buffer_store_dwordx2 v{{\[}}[[CVT0]]:[[CVT1]]{{\]}}
    288 ; GCN: s_endpgm
    289 define void @global_extload_v2f16_to_v2f32(<2 x float> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
    290   %val = load <2 x half>, <2 x half> addrspace(1)* %in
    291   %cvt = fpext <2 x half> %val to <2 x float>
    292   store <2 x float> %cvt, <2 x float> addrspace(1)* %out
    293   ret void
    294 }
    295 
    296 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f32:
    297 define void @global_extload_v3f16_to_v3f32(<3 x float> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
    298   %val = load <3 x half>, <3 x half> addrspace(1)* %in
    299   %cvt = fpext <3 x half> %val to <3 x float>
    300   store <3 x float> %cvt, <3 x float> addrspace(1)* %out
    301   ret void
    302 }
    303 
    304 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f32:
    305 define void @global_extload_v4f16_to_v4f32(<4 x float> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
    306   %val = load <4 x half>, <4 x half> addrspace(1)* %in
    307   %cvt = fpext <4 x half> %val to <4 x float>
    308   store <4 x float> %cvt, <4 x float> addrspace(1)* %out
    309   ret void
    310 }
    311 
    312 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f32:
    313 define void @global_extload_v8f16_to_v8f32(<8 x float> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
    314   %val = load <8 x half>, <8 x half> addrspace(1)* %in
    315   %cvt = fpext <8 x half> %val to <8 x float>
    316   store <8 x float> %cvt, <8 x float> addrspace(1)* %out
    317   ret void
    318 }
    319 
    320 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f32:
    321 ; GCN: buffer_load_ushort
    322 ; GCN: buffer_load_ushort
    323 ; GCN: buffer_load_ushort
    324 ; GCN: buffer_load_ushort
    325 ; GCN: buffer_load_ushort
    326 ; GCN: buffer_load_ushort
    327 ; GCN: buffer_load_ushort
    328 ; GCN: buffer_load_ushort
    329 ; GCN: buffer_load_ushort
    330 ; GCN: buffer_load_ushort
    331 ; GCN: buffer_load_ushort
    332 ; GCN: buffer_load_ushort
    333 ; GCN: buffer_load_ushort
    334 ; GCN: buffer_load_ushort
    335 ; GCN: buffer_load_ushort
    336 ; GCN: buffer_load_ushort
    337 
    338 ; GCN: v_cvt_f32_f16_e32
    339 ; GCN: v_cvt_f32_f16_e32
    340 ; GCN: v_cvt_f32_f16_e32
    341 ; GCN: v_cvt_f32_f16_e32
    342 ; GCN: v_cvt_f32_f16_e32
    343 ; GCN: v_cvt_f32_f16_e32
    344 ; GCN: v_cvt_f32_f16_e32
    345 ; GCN: v_cvt_f32_f16_e32
    346 ; GCN: v_cvt_f32_f16_e32
    347 ; GCN: v_cvt_f32_f16_e32
    348 ; GCN: v_cvt_f32_f16_e32
    349 ; GCN: v_cvt_f32_f16_e32
    350 ; GCN: v_cvt_f32_f16_e32
    351 ; GCN: v_cvt_f32_f16_e32
    352 ; GCN: v_cvt_f32_f16_e32
    353 ; GCN: v_cvt_f32_f16_e32
    354 
    355 ; GCN: buffer_store_dwordx4
    356 ; GCN: buffer_store_dwordx4
    357 ; GCN: buffer_store_dwordx4
    358 ; GCN: buffer_store_dwordx4
    359 
    360 ; GCN: s_endpgm
    361 define void @global_extload_v16f16_to_v16f32(<16 x float> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
    362   %val = load <16 x half>, <16 x half> addrspace(1)* %in
    363   %cvt = fpext <16 x half> %val to <16 x float>
    364   store <16 x float> %cvt, <16 x float> addrspace(1)* %out
    365   ret void
    366 }
    367 
    368 ; GCN-LABEL: {{^}}global_extload_f16_to_f64:
    369 ; GCN: buffer_load_ushort [[LOAD:v[0-9]+]]
    370 ; GCN: v_cvt_f32_f16_e32 [[CVT0:v[0-9]+]], [[LOAD]]
    371 ; GCN: v_cvt_f64_f32_e32 [[CVT1:v\[[0-9]+:[0-9]+\]]], [[CVT0]]
    372 ; GCN: buffer_store_dwordx2 [[CVT1]]
    373 define void @global_extload_f16_to_f64(double addrspace(1)* %out, half addrspace(1)* %in) #0 {
    374   %val = load half, half addrspace(1)* %in
    375   %cvt = fpext half %val to double
    376   store double %cvt, double addrspace(1)* %out
    377   ret void
    378 }
    379 
    380 ; GCN-LABEL: {{^}}global_extload_v2f16_to_v2f64:
    381 ; GCN-DAG: buffer_load_ushort [[LOAD0:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    382 ; GCN-DAG: buffer_load_ushort [[LOAD1:v[0-9]+]], s{{\[[0-9]+:[0-9]+\]}}, 0 offset:2{{$}}
    383 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT0:[0-9]+]], [[LOAD0]]
    384 ; GCN-DAG: v_cvt_f32_f16_e32 v[[CVT1:[0-9]+]], [[LOAD1]]
    385 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT2_LO:[0-9]+]]:[[CVT2_HI:[0-9]+]]{{\]}}, v[[CVT0]]
    386 ; GCN-DAG: v_cvt_f64_f32_e32 v{{\[}}[[CVT3_LO:[0-9]+]]:[[CVT3_HI:[0-9]+]]{{\]}}, v[[CVT1]]
    387 ; GCN-DAG: buffer_store_dwordx4 v{{\[}}[[CVT2_LO]]:[[CVT3_HI]]{{\]}}
    388 ; GCN: s_endpgm
    389 define void @global_extload_v2f16_to_v2f64(<2 x double> addrspace(1)* %out, <2 x half> addrspace(1)* %in) #0 {
    390   %val = load <2 x half>, <2 x half> addrspace(1)* %in
    391   %cvt = fpext <2 x half> %val to <2 x double>
    392   store <2 x double> %cvt, <2 x double> addrspace(1)* %out
    393   ret void
    394 }
    395 
    396 ; GCN-LABEL: {{^}}global_extload_v3f16_to_v3f64:
    397 
    398 ; GCN: buffer_load_dwordx2 [[LOAD:v\[[0-9]+:[0-9]+\]]]
    399 ; SI: v_lshr_b64 v{{\[[0-9]+:[0-9]+\]}}, [[LOAD]], 32
    400 ; VI: v_lshrrev_b64 v{{\[[0-9]+:[0-9]+\]}}, 32, [[LOAD]]
    401 ; GCN: v_lshrrev_b32_e32 {{v[0-9]+}}, 16, {{v[0-9]+}}
    402 
    403 ; GCN: v_cvt_f32_f16_e32
    404 ; GCN: v_cvt_f32_f16_e32
    405 ; GCN: v_cvt_f32_f16_e32
    406 ; GCN-NOT: v_cvt_f32_f16_e32
    407 
    408 ; GCN: v_cvt_f64_f32_e32
    409 ; GCN: v_cvt_f64_f32_e32
    410 ; GCN: v_cvt_f64_f32_e32
    411 ; GCN-NOT: v_cvt_f64_f32_e32
    412 
    413 ; GCN-DAG: buffer_store_dwordx4 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0{{$}}
    414 ; GCN-DAG: buffer_store_dwordx2 v{{\[[0-9]+:[0-9]+\]}}, s{{\[[0-9]+:[0-9]+\]}}, 0 offset:16
    415 ; GCN: s_endpgm
    416 define void @global_extload_v3f16_to_v3f64(<3 x double> addrspace(1)* %out, <3 x half> addrspace(1)* %in) #0 {
    417   %val = load <3 x half>, <3 x half> addrspace(1)* %in
    418   %cvt = fpext <3 x half> %val to <3 x double>
    419   store <3 x double> %cvt, <3 x double> addrspace(1)* %out
    420   ret void
    421 }
    422 
    423 ; GCN-LABEL: {{^}}global_extload_v4f16_to_v4f64:
    424 define void @global_extload_v4f16_to_v4f64(<4 x double> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
    425   %val = load <4 x half>, <4 x half> addrspace(1)* %in
    426   %cvt = fpext <4 x half> %val to <4 x double>
    427   store <4 x double> %cvt, <4 x double> addrspace(1)* %out
    428   ret void
    429 }
    430 
    431 ; GCN-LABEL: {{^}}global_extload_v8f16_to_v8f64:
    432 define void @global_extload_v8f16_to_v8f64(<8 x double> addrspace(1)* %out, <8 x half> addrspace(1)* %in) #0 {
    433   %val = load <8 x half>, <8 x half> addrspace(1)* %in
    434   %cvt = fpext <8 x half> %val to <8 x double>
    435   store <8 x double> %cvt, <8 x double> addrspace(1)* %out
    436   ret void
    437 }
    438 
    439 ; GCN-LABEL: {{^}}global_extload_v16f16_to_v16f64:
    440 define void @global_extload_v16f16_to_v16f64(<16 x double> addrspace(1)* %out, <16 x half> addrspace(1)* %in) #0 {
    441   %val = load <16 x half>, <16 x half> addrspace(1)* %in
    442   %cvt = fpext <16 x half> %val to <16 x double>
    443   store <16 x double> %cvt, <16 x double> addrspace(1)* %out
    444   ret void
    445 }
    446 
    447 ; GCN-LABEL: {{^}}global_truncstore_f32_to_f16:
    448 ; GCN: buffer_load_dword [[LOAD:v[0-9]+]]
    449 ; GCN: v_cvt_f16_f32_e32 [[CVT:v[0-9]+]], [[LOAD]]
    450 ; GCN: buffer_store_short [[CVT]]
    451 define void @global_truncstore_f32_to_f16(half addrspace(1)* %out, float addrspace(1)* %in) #0 {
    452   %val = load float, float addrspace(1)* %in
    453   %cvt = fptrunc float %val to half
    454   store half %cvt, half addrspace(1)* %out
    455   ret void
    456 }
    457 
    458 ; GCN-LABEL: {{^}}global_truncstore_v2f32_to_v2f16:
    459 ; GCN: buffer_load_dwordx2 v{{\[}}[[LO:[0-9]+]]:[[HI:[0-9]+]]{{\]}}
    460 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT0:v[0-9]+]], v[[LO]]
    461 ; GCN-DAG: v_cvt_f16_f32_e32 [[CVT1:v[0-9]+]], v[[HI]]
    462 ; GCN-DAG: buffer_store_short [[CVT0]]
    463 ; GCN-DAG: buffer_store_short [[CVT1]]
    464 ; GCN: s_endpgm
    465 define void @global_truncstore_v2f32_to_v2f16(<2 x half> addrspace(1)* %out, <2 x float> addrspace(1)* %in) #0 {
    466   %val = load <2 x float>, <2 x float> addrspace(1)* %in
    467   %cvt = fptrunc <2 x float> %val to <2 x half>
    468   store <2 x half> %cvt, <2 x half> addrspace(1)* %out
    469   ret void
    470 }
    471 
    472 ; GCN-LABEL: {{^}}global_truncstore_v3f32_to_v3f16:
    473 ; GCN: buffer_load_dwordx4
    474 ; GCN: v_cvt_f16_f32_e32
    475 ; GCN: v_cvt_f16_f32_e32
    476 ; GCN: v_cvt_f16_f32_e32
    477 ; GCN-NOT: v_cvt_f16_f32_e32
    478 ; GCN: buffer_store_short
    479 ; GCN: buffer_store_dword
    480 ; GCN: s_endpgm
    481 define void @global_truncstore_v3f32_to_v3f16(<3 x half> addrspace(1)* %out, <3 x float> addrspace(1)* %in) #0 {
    482   %val = load <3 x float>, <3 x float> addrspace(1)* %in
    483   %cvt = fptrunc <3 x float> %val to <3 x half>
    484   store <3 x half> %cvt, <3 x half> addrspace(1)* %out
    485   ret void
    486 }
    487 
    488 ; GCN-LABEL: {{^}}global_truncstore_v4f32_to_v4f16:
    489 ; GCN: buffer_load_dwordx4
    490 ; GCN: v_cvt_f16_f32_e32
    491 ; GCN: v_cvt_f16_f32_e32
    492 ; GCN: v_cvt_f16_f32_e32
    493 ; GCN: v_cvt_f16_f32_e32
    494 ; GCN: buffer_store_short
    495 ; GCN: buffer_store_short
    496 ; GCN: buffer_store_short
    497 ; GCN: buffer_store_short
    498 ; GCN: s_endpgm
    499 define void @global_truncstore_v4f32_to_v4f16(<4 x half> addrspace(1)* %out, <4 x float> addrspace(1)* %in) #0 {
    500   %val = load <4 x float>, <4 x float> addrspace(1)* %in
    501   %cvt = fptrunc <4 x float> %val to <4 x half>
    502   store <4 x half> %cvt, <4 x half> addrspace(1)* %out
    503   ret void
    504 }
    505 
    506 ; GCN-LABEL: {{^}}global_truncstore_v8f32_to_v8f16:
    507 ; GCN: buffer_load_dwordx4
    508 ; GCN: buffer_load_dwordx4
    509 ; GCN: v_cvt_f16_f32_e32
    510 ; GCN: v_cvt_f16_f32_e32
    511 ; GCN: v_cvt_f16_f32_e32
    512 ; GCN: v_cvt_f16_f32_e32
    513 ; GCN: v_cvt_f16_f32_e32
    514 ; GCN: v_cvt_f16_f32_e32
    515 ; GCN: v_cvt_f16_f32_e32
    516 ; GCN: v_cvt_f16_f32_e32
    517 ; GCN: buffer_store_short
    518 ; GCN: buffer_store_short
    519 ; GCN: buffer_store_short
    520 ; GCN: buffer_store_short
    521 ; GCN: buffer_store_short
    522 ; GCN: buffer_store_short
    523 ; GCN: buffer_store_short
    524 ; GCN: buffer_store_short
    525 ; GCN: s_endpgm
    526 define void @global_truncstore_v8f32_to_v8f16(<8 x half> addrspace(1)* %out, <8 x float> addrspace(1)* %in) #0 {
    527   %val = load <8 x float>, <8 x float> addrspace(1)* %in
    528   %cvt = fptrunc <8 x float> %val to <8 x half>
    529   store <8 x half> %cvt, <8 x half> addrspace(1)* %out
    530   ret void
    531 }
    532 
    533 ; GCN-LABEL: {{^}}global_truncstore_v16f32_to_v16f16:
    534 ; GCN: buffer_load_dwordx4
    535 ; GCN: buffer_load_dwordx4
    536 ; GCN: buffer_load_dwordx4
    537 ; GCN: buffer_load_dwordx4
    538 ; GCN-DAG: v_cvt_f16_f32_e32
    539 ; GCN-DAG: v_cvt_f16_f32_e32
    540 ; GCN-DAG: v_cvt_f16_f32_e32
    541 ; GCN-DAG: v_cvt_f16_f32_e32
    542 ; GCN-DAG: v_cvt_f16_f32_e32
    543 ; GCN-DAG: v_cvt_f16_f32_e32
    544 ; GCN-DAG: v_cvt_f16_f32_e32
    545 ; GCN-DAG: v_cvt_f16_f32_e32
    546 ; GCN-DAG: v_cvt_f16_f32_e32
    547 ; GCN-DAG: v_cvt_f16_f32_e32
    548 ; GCN-DAG: v_cvt_f16_f32_e32
    549 ; GCN-DAG: v_cvt_f16_f32_e32
    550 ; GCN-DAG: v_cvt_f16_f32_e32
    551 ; GCN-DAG: v_cvt_f16_f32_e32
    552 ; GCN-DAG: v_cvt_f16_f32_e32
    553 ; GCN-DAG: v_cvt_f16_f32_e32
    554 ; GCN-DAG: buffer_store_short
    555 ; GCN-DAG: buffer_store_short
    556 ; GCN-DAG: buffer_store_short
    557 ; GCN-DAG: buffer_store_short
    558 ; GCN-DAG: buffer_store_short
    559 ; GCN-DAG: buffer_store_short
    560 ; GCN-DAG: buffer_store_short
    561 ; GCN-DAG: buffer_store_short
    562 ; GCN-DAG: buffer_store_short
    563 ; GCN-DAG: buffer_store_short
    564 ; GCN-DAG: buffer_store_short
    565 ; GCN-DAG: buffer_store_short
    566 ; GCN-DAG: buffer_store_short
    567 ; GCN-DAG: buffer_store_short
    568 ; GCN-DAG: buffer_store_short
    569 ; GCN-DAG: buffer_store_short
    570 ; GCN: s_endpgm
    571 define void @global_truncstore_v16f32_to_v16f16(<16 x half> addrspace(1)* %out, <16 x float> addrspace(1)* %in) #0 {
    572   %val = load <16 x float>, <16 x float> addrspace(1)* %in
    573   %cvt = fptrunc <16 x float> %val to <16 x half>
    574   store <16 x half> %cvt, <16 x half> addrspace(1)* %out
    575   ret void
    576 }
    577 
    578 ; FIXME: Unsafe math should fold conversions away
    579 ; GCN-LABEL: {{^}}fadd_f16:
    580 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
    581 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
    582 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
    583 ; SI-DAG: v_cvt_f32_f16_e32 v{{[0-9]+}},
    584 ; SI: v_add_f32
    585 ; GCN: s_endpgm
    586 define void @fadd_f16(half addrspace(1)* %out, half %a, half %b) #0 {
    587    %add = fadd half %a, %b
    588    store half %add, half addrspace(1)* %out, align 4
    589    ret void
    590 }
    591 
    592 ; GCN-LABEL: {{^}}fadd_v2f16:
    593 ; SI: v_add_f32
    594 ; SI: v_add_f32
    595 ; GCN: s_endpgm
    596 define void @fadd_v2f16(<2 x half> addrspace(1)* %out, <2 x half> %a, <2 x half> %b) #0 {
    597   %add = fadd <2 x half> %a, %b
    598   store <2 x half> %add, <2 x half> addrspace(1)* %out, align 8
    599   ret void
    600 }
    601 
    602 ; GCN-LABEL: {{^}}fadd_v4f16:
    603 ; SI: v_add_f32
    604 ; SI: v_add_f32
    605 ; SI: v_add_f32
    606 ; SI: v_add_f32
    607 ; GCN: s_endpgm
    608 define void @fadd_v4f16(<4 x half> addrspace(1)* %out, <4 x half> addrspace(1)* %in) #0 {
    609   %b_ptr = getelementptr <4 x half>, <4 x half> addrspace(1)* %in, i32 1
    610   %a = load <4 x half>, <4 x half> addrspace(1)* %in, align 16
    611   %b = load <4 x half>, <4 x half> addrspace(1)* %b_ptr, align 16
    612   %result = fadd <4 x half> %a, %b
    613   store <4 x half> %result, <4 x half> addrspace(1)* %out, align 16
    614   ret void
    615 }
    616 
    617 ; GCN-LABEL: {{^}}fadd_v8f16:
    618 ; SI: v_add_f32
    619 ; SI: v_add_f32
    620 ; SI: v_add_f32
    621 ; SI: v_add_f32
    622 ; SI: v_add_f32
    623 ; SI: v_add_f32
    624 ; SI: v_add_f32
    625 ; SI: v_add_f32
    626 ; GCN: s_endpgm
    627 define void @fadd_v8f16(<8 x half> addrspace(1)* %out, <8 x half> %a, <8 x half> %b) #0 {
    628   %add = fadd <8 x half> %a, %b
    629   store <8 x half> %add, <8 x half> addrspace(1)* %out, align 32
    630   ret void
    631 }
    632 
    633 ; GCN-LABEL: {{^}}fsub_f16:
    634 ; GCN: v_subrev_f32_e32
    635 ; GCN: s_endpgm
    636 define void @fsub_f16(half addrspace(1)* %out, half addrspace(1)* %in) #0 {
    637   %b_ptr = getelementptr half, half addrspace(1)* %in, i32 1
    638   %a = load half, half addrspace(1)* %in
    639   %b = load half, half addrspace(1)* %b_ptr
    640   %sub = fsub half %a, %b
    641   store half %sub, half addrspace(1)* %out
    642   ret void
    643 }
    644 
    645 ; GCN-LABEL: {{^}}test_bitcast_from_half:
    646 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
    647 ; GCN: buffer_store_short [[TMP]]
    648 define void @test_bitcast_from_half(half addrspace(1)* %in, i16 addrspace(1)* %out) #0 {
    649   %val = load half, half addrspace(1)* %in
    650   %val_int = bitcast half %val to i16
    651   store i16 %val_int, i16 addrspace(1)* %out
    652   ret void
    653 }
    654 
    655 ; GCN-LABEL: {{^}}test_bitcast_to_half:
    656 ; GCN: buffer_load_ushort [[TMP:v[0-9]+]]
    657 ; GCN: buffer_store_short [[TMP]]
    658 define void @test_bitcast_to_half(half addrspace(1)* %out, i16 addrspace(1)* %in) #0 {
    659   %val = load i16, i16 addrspace(1)* %in
    660   %val_fp = bitcast i16 %val to half
    661   store half %val_fp, half addrspace(1)* %out
    662   ret void
    663 }
    664 
    665 attributes #0 = { nounwind }
    666