Home | History | Annotate | Download | only in NVPTX
      1 ; ## Full FP16 support enabled by default.
      2 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
      3 ; RUN:          -O0 -disable-post-ra -disable-fp-elim -verify-machineinstrs \
      4 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-F16 %s
      5 ; ## FP16 support explicitly disabled.
      6 ; RUN: llc < %s -mtriple=nvptx64-nvidia-cuda -mcpu=sm_53 -asm-verbose=false \
      7 ; RUN:          -O0 -disable-post-ra -disable-fp-elim --nvptx-no-f16-math \
      8 ; RUN:           -verify-machineinstrs \
      9 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
     10 ; ## FP16 is not supported by hardware.
     11 ; RUN: llc < %s -O0 -mtriple=nvptx64-nvidia-cuda -mcpu=sm_52 -asm-verbose=false \
     12 ; RUN:          -disable-post-ra -disable-fp-elim -verify-machineinstrs \
     13 ; RUN: | FileCheck -check-prefixes CHECK,CHECK-NOF16 %s
     14 
     15 target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128"
     16 
     17 ; CHECK-LABEL: test_ret_const(
     18 ; CHECK:      mov.b16         [[R:%h[0-9]+]], 0x3C00;
     19 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     20 ; CHECK-NEXT: ret;
     21 define half @test_ret_const() #0 {
     22   ret half 1.0
     23 }
     24 
     25 ; CHECK-LABEL: test_fadd(
     26 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_param_0];
     27 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_param_1];
     28 ; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
     29 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
     30 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
     31 ; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
     32 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
     33 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     34 ; CHECK-NEXT: ret;
     35 define half @test_fadd(half %a, half %b) #0 {
     36   %r = fadd half %a, %b
     37   ret half %r
     38 }
     39 
     40 ; CHECK-LABEL: test_fadd_v1f16(
     41 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fadd_v1f16_param_0];
     42 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_v1f16_param_1];
     43 ; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
     44 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
     45 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
     46 ; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
     47 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
     48 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     49 ; CHECK-NEXT: ret;
     50 define <1 x half> @test_fadd_v1f16(<1 x half> %a, <1 x half> %b) #0 {
     51   %r = fadd <1 x half> %a, %b
     52   ret <1 x half> %r
     53 }
     54 
     55 ; Check that we can lower fadd with immediate arguments.
     56 ; CHECK-LABEL: test_fadd_imm_0(
     57 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_0_param_0];
     58 ; CHECK-F16-DAG:    mov.b16        [[A:%h[0-9]+]], 0x3C00;
     59 ; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[B]], [[A]];
     60 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
     61 ; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
     62 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
     63 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     64 ; CHECK-NEXT: ret;
     65 define half @test_fadd_imm_0(half %b) #0 {
     66   %r = fadd half 1.0, %b
     67   ret half %r
     68 }
     69 
     70 ; CHECK-LABEL: test_fadd_imm_1(
     71 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fadd_imm_1_param_0];
     72 ; CHECK-F16-DAG:    mov.b16        [[A:%h[0-9]+]], 0x3C00;
     73 ; CHECK-F16-NEXT:   add.rn.f16     [[R:%h[0-9]+]], [[B]], [[A]];
     74 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
     75 ; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], 0f3F800000;
     76 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
     77 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     78 ; CHECK-NEXT: ret;
     79 define half @test_fadd_imm_1(half %a) #0 {
     80   %r = fadd half %a, 1.0
     81   ret half %r
     82 }
     83 
     84 ; CHECK-LABEL: test_fsub(
     85 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fsub_param_0];
     86 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fsub_param_1];
     87 ; CHECK-F16-NEXT:   sub.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]];
     88 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
     89 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
     90 ; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
     91 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
     92 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
     93 ; CHECK-NEXT: ret;
     94 define half @test_fsub(half %a, half %b) #0 {
     95   %r = fsub half %a, %b
     96   ret half %r
     97 }
     98 
     99 ; CHECK-LABEL: test_fneg(
    100 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fneg_param_0];
    101 ; CHECK-F16-NEXT:   mov.b16        [[Z:%h[0-9]+]], 0x0000
    102 ; CHECK-F16-NEXT:   sub.rn.f16     [[R:%h[0-9]+]], [[Z]], [[A]];
    103 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
    104 ; CHECK-NOF16-DAG:  mov.f32        [[Z:%f[0-9]+]], 0f00000000;
    105 ; CHECK-NOF16-NEXT: sub.rn.f32     [[R32:%f[0-9]+]], [[Z]], [[A32]];
    106 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
    107 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    108 ; CHECK-NEXT: ret;
    109 define half @test_fneg(half %a) #0 {
    110   %r = fsub half 0.0, %a
    111   ret half %r
    112 }
    113 
    114 ; CHECK-LABEL: test_fmul(
    115 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fmul_param_0];
    116 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fmul_param_1];
    117 ; CHECK-F16-NEXT: mul.rn.f16      [[R:%h[0-9]+]], [[A]], [[B]];
    118 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
    119 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
    120 ; CHECK-NOF16-NEXT: mul.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]];
    121 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
    122 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    123 ; CHECK-NEXT: ret;
    124 define half @test_fmul(half %a, half %b) #0 {
    125   %r = fmul half %a, %b
    126   ret half %r
    127 }
    128 
    129 ; CHECK-LABEL: test_fdiv(
    130 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fdiv_param_0];
    131 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fdiv_param_1];
    132 ; CHECK-DAG:  cvt.f32.f16     [[F0:%f[0-9]+]], [[A]];
    133 ; CHECK-DAG:  cvt.f32.f16     [[F1:%f[0-9]+]], [[B]];
    134 ; CHECK-NEXT: div.rn.f32      [[FR:%f[0-9]+]], [[F0]], [[F1]];
    135 ; CHECK-NEXT: cvt.rn.f16.f32  [[R:%h[0-9]+]], [[FR]];
    136 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    137 ; CHECK-NEXT: ret;
    138 define half @test_fdiv(half %a, half %b) #0 {
    139   %r = fdiv half %a, %b
    140   ret half %r
    141 }
    142 
    143 ; CHECK-LABEL: test_frem(
    144 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_frem_param_0];
    145 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_frem_param_1];
    146 ; CHECK-DAG:  cvt.f32.f16     [[FA:%f[0-9]+]], [[A]];
    147 ; CHECK-DAG:  cvt.f32.f16     [[FB:%f[0-9]+]], [[B]];
    148 ; CHECK-NEXT: div.rn.f32      [[D:%f[0-9]+]], [[FA]], [[FB]];
    149 ; CHECK-NEXT: cvt.rmi.f32.f32 [[DI:%f[0-9]+]], [[D]];
    150 ; CHECK-NEXT: mul.f32         [[RI:%f[0-9]+]], [[DI]], [[FB]];
    151 ; CHECK-NEXT: sub.f32         [[RF:%f[0-9]+]], [[FA]], [[RI]];
    152 ; CHECK-NEXT: cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    153 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    154 ; CHECK-NEXT: ret;
    155 define half @test_frem(half %a, half %b) #0 {
    156   %r = frem half %a, %b
    157   ret half %r
    158 }
    159 
    160 ; CHECK-LABEL: test_store(
    161 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_store_param_0];
    162 ; CHECK-DAG:  ld.param.u64    %[[PTR:rd[0-9]+]], [test_store_param_1];
    163 ; CHECK-NEXT: st.b16          [%[[PTR]]], [[A]];
    164 ; CHECK-NEXT: ret;
    165 define void @test_store(half %a, half* %b) #0 {
    166   store half %a, half* %b
    167   ret void
    168 }
    169 
    170 ; CHECK-LABEL: test_load(
    171 ; CHECK:      ld.param.u64    %[[PTR:rd[0-9]+]], [test_load_param_0];
    172 ; CHECK-NEXT: ld.b16          [[R:%h[0-9]+]], [%[[PTR]]];
    173 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    174 ; CHECK-NEXT: ret;
    175 define half @test_load(half* %a) #0 {
    176   %r = load half, half* %a
    177   ret half %r
    178 }
    179 
    180 ; CHECK-LABEL: .visible .func test_halfp0a1(
    181 ; CHECK-DAG: ld.param.u64 %[[FROM:rd?[0-9]+]], [test_halfp0a1_param_0];
    182 ; CHECK-DAG: ld.param.u64 %[[TO:rd?[0-9]+]], [test_halfp0a1_param_1];
    183 ; CHECK-DAG: ld.u8        [[B0:%r[sd]?[0-9]+]], [%[[FROM]]]
    184 ; CHECK-DAG: st.u8        [%[[TO]]], [[B0]]
    185 ; CHECK-DAG: ld.u8        [[B1:%r[sd]?[0-9]+]], [%[[FROM]]+1]
    186 ; CHECK-DAG: st.u8        [%[[TO]]+1], [[B1]]
    187 ; CHECK: ret
    188 define void @test_halfp0a1(half * noalias readonly %from, half * %to) {
    189   %1 = load half, half * %from , align 1
    190   store half %1, half * %to , align 1
    191   ret void
    192 }
    193 
    194 declare half @test_callee(half %a, half %b) #0
    195 
    196 ; CHECK-LABEL: test_call(
    197 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_call_param_0];
    198 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_call_param_1];
    199 ; CHECK:      {
    200 ; CHECK-DAG:  .param .b32 param0;
    201 ; CHECK-DAG:  .param .b32 param1;
    202 ; CHECK-DAG:  st.param.b16    [param0+0], [[A]];
    203 ; CHECK-DAG:  st.param.b16    [param1+0], [[B]];
    204 ; CHECK-DAG:  .param .b32 retval0;
    205 ; CHECK:      call.uni (retval0),
    206 ; CHECK-NEXT:        test_callee,
    207 ; CHECK:      );
    208 ; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
    209 ; CHECK-NEXT: }
    210 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    211 ; CHECK-NEXT: ret;
    212 define half @test_call(half %a, half %b) #0 {
    213   %r = call half @test_callee(half %a, half %b)
    214   ret half %r
    215 }
    216 
    217 ; CHECK-LABEL: test_call_flipped(
    218 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_call_flipped_param_0];
    219 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_call_flipped_param_1];
    220 ; CHECK:      {
    221 ; CHECK-DAG:  .param .b32 param0;
    222 ; CHECK-DAG:  .param .b32 param1;
    223 ; CHECK-DAG:  st.param.b16    [param0+0], [[B]];
    224 ; CHECK-DAG:  st.param.b16    [param1+0], [[A]];
    225 ; CHECK-DAG:  .param .b32 retval0;
    226 ; CHECK:      call.uni (retval0),
    227 ; CHECK-NEXT:        test_callee,
    228 ; CHECK:      );
    229 ; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
    230 ; CHECK-NEXT: }
    231 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    232 ; CHECK-NEXT: ret;
    233 define half @test_call_flipped(half %a, half %b) #0 {
    234   %r = call half @test_callee(half %b, half %a)
    235   ret half %r
    236 }
    237 
    238 ; CHECK-LABEL: test_tailcall_flipped(
    239 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_tailcall_flipped_param_0];
    240 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_tailcall_flipped_param_1];
    241 ; CHECK:      {
    242 ; CHECK-DAG:  .param .b32 param0;
    243 ; CHECK-DAG:  .param .b32 param1;
    244 ; CHECK-DAG:  st.param.b16    [param0+0], [[B]];
    245 ; CHECK-DAG:  st.param.b16    [param1+0], [[A]];
    246 ; CHECK-DAG:  .param .b32 retval0;
    247 ; CHECK:      call.uni (retval0),
    248 ; CHECK-NEXT:        test_callee,
    249 ; CHECK:      );
    250 ; CHECK-NEXT: ld.param.b16    [[R:%h[0-9]+]], [retval0+0];
    251 ; CHECK-NEXT: }
    252 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    253 ; CHECK-NEXT: ret;
    254 define half @test_tailcall_flipped(half %a, half %b) #0 {
    255   %r = tail call half @test_callee(half %b, half %a)
    256   ret half %r
    257 }
    258 
    259 ; CHECK-LABEL: test_select(
    260 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_param_0];
    261 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_param_1];
    262 ; CHECK-DAG:  setp.eq.b16     [[PRED:%p[0-9]+]], %rs{{.*}}, 1;
    263 ; CHECK-NEXT: selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
    264 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    265 ; CHECK-NEXT: ret;
    266 define half @test_select(half %a, half %b, i1 zeroext %c) #0 {
    267   %r = select i1 %c, half %a, half %b
    268   ret half %r
    269 }
    270 
    271 ; CHECK-LABEL: test_select_cc(
    272 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_cc_param_0];
    273 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_cc_param_1];
    274 ; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_select_cc_param_2];
    275 ; CHECK-DAG:  ld.param.b16    [[D:%h[0-9]+]], [test_select_cc_param_3];
    276 ; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
    277 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
    278 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
    279 ; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
    280 ; CHECK:      selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
    281 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    282 ; CHECK-NEXT: ret;
    283 define half @test_select_cc(half %a, half %b, half %c, half %d) #0 {
    284   %cc = fcmp une half %c, %d
    285   %r = select i1 %cc, half %a, half %b
    286   ret half %r
    287 }
    288 
    289 ; CHECK-LABEL: test_select_cc_f32_f16(
    290 ; CHECK-DAG:  ld.param.f32    [[A:%f[0-9]+]], [test_select_cc_f32_f16_param_0];
    291 ; CHECK-DAG:  ld.param.f32    [[B:%f[0-9]+]], [test_select_cc_f32_f16_param_1];
    292 ; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_select_cc_f32_f16_param_2];
    293 ; CHECK-DAG:  ld.param.b16    [[D:%h[0-9]+]], [test_select_cc_f32_f16_param_3];
    294 ; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[C]], [[D]]
    295 ; CHECK-NOF16-DAG: cvt.f32.f16 [[DF:%f[0-9]+]], [[D]];
    296 ; CHECK-NOF16-DAG: cvt.f32.f16 [[CF:%f[0-9]+]], [[C]];
    297 ; CHECK-NOF16: setp.neu.f32    [[PRED:%p[0-9]+]], [[CF]], [[DF]]
    298 ; CHECK-NEXT: selp.f32        [[R:%f[0-9]+]], [[A]], [[B]], [[PRED]];
    299 ; CHECK-NEXT: st.param.f32    [func_retval0+0], [[R]];
    300 ; CHECK-NEXT: ret;
    301 define float @test_select_cc_f32_f16(float %a, float %b, half %c, half %d) #0 {
    302   %cc = fcmp une half %c, %d
    303   %r = select i1 %cc, float %a, float %b
    304   ret float %r
    305 }
    306 
    307 ; CHECK-LABEL: test_select_cc_f16_f32(
    308 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_select_cc_f16_f32_param_0];
    309 ; CHECK-DAG:  ld.param.f32    [[C:%f[0-9]+]], [test_select_cc_f16_f32_param_2];
    310 ; CHECK-DAG:  ld.param.f32    [[D:%f[0-9]+]], [test_select_cc_f16_f32_param_3];
    311 ; CHECK-DAG:  setp.neu.f32    [[PRED:%p[0-9]+]], [[C]], [[D]]
    312 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_select_cc_f16_f32_param_1];
    313 ; CHECK-NEXT: selp.b16        [[R:%h[0-9]+]], [[A]], [[B]], [[PRED]];
    314 ; CHECK-NEXT: st.param.b16    [func_retval0+0], [[R]];
    315 ; CHECK-NEXT: ret;
    316 define half @test_select_cc_f16_f32(half %a, half %b, float %c, float %d) #0 {
    317   %cc = fcmp une float %c, %d
    318   %r = select i1 %cc, half %a, half %b
    319   ret half %r
    320 }
    321 
    322 ; CHECK-LABEL: test_fcmp_une(
    323 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_une_param_0];
    324 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_une_param_1];
    325 ; CHECK-F16:  setp.neu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    326 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    327 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    328 ; CHECK-NOF16: setp.neu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    329 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    330 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    331 ; CHECK-NEXT: ret;
    332 define i1 @test_fcmp_une(half %a, half %b) #0 {
    333   %r = fcmp une half %a, %b
    334   ret i1 %r
    335 }
    336 
    337 ; CHECK-LABEL: test_fcmp_ueq(
    338 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ueq_param_0];
    339 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ueq_param_1];
    340 ; CHECK-F16:  setp.equ.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    341 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    342 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    343 ; CHECK-NOF16: setp.equ.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    344 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    345 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    346 ; CHECK-NEXT: ret;
    347 define i1 @test_fcmp_ueq(half %a, half %b) #0 {
    348   %r = fcmp ueq half %a, %b
    349   ret i1 %r
    350 }
    351 
    352 ; CHECK-LABEL: test_fcmp_ugt(
    353 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ugt_param_0];
    354 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ugt_param_1];
    355 ; CHECK-F16:  setp.gtu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    356 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    357 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    358 ; CHECK-NOF16: setp.gtu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    359 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    360 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    361 ; CHECK-NEXT: ret;
    362 define i1 @test_fcmp_ugt(half %a, half %b) #0 {
    363   %r = fcmp ugt half %a, %b
    364   ret i1 %r
    365 }
    366 
    367 ; CHECK-LABEL: test_fcmp_uge(
    368 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_uge_param_0];
    369 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_uge_param_1];
    370 ; CHECK-F16:  setp.geu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    371 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    372 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    373 ; CHECK-NOF16: setp.geu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    374 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    375 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    376 ; CHECK-NEXT: ret;
    377 define i1 @test_fcmp_uge(half %a, half %b) #0 {
    378   %r = fcmp uge half %a, %b
    379   ret i1 %r
    380 }
    381 
    382 ; CHECK-LABEL: test_fcmp_ult(
    383 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ult_param_0];
    384 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ult_param_1];
    385 ; CHECK-F16:  setp.ltu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    386 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    387 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    388 ; CHECK-NOF16: setp.ltu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    389 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    390 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    391 ; CHECK-NEXT: ret;
    392 define i1 @test_fcmp_ult(half %a, half %b) #0 {
    393   %r = fcmp ult half %a, %b
    394   ret i1 %r
    395 }
    396 
    397 ; CHECK-LABEL: test_fcmp_ule(
    398 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ule_param_0];
    399 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ule_param_1];
    400 ; CHECK-F16:  setp.leu.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    401 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    402 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    403 ; CHECK-NOF16: setp.leu.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    404 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    405 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    406 ; CHECK-NEXT: ret;
    407 define i1 @test_fcmp_ule(half %a, half %b) #0 {
    408   %r = fcmp ule half %a, %b
    409   ret i1 %r
    410 }
    411 
    412 
    413 ; CHECK-LABEL: test_fcmp_uno(
    414 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_uno_param_0];
    415 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_uno_param_1];
    416 ; CHECK-F16:  setp.nan.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    417 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    418 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    419 ; CHECK-NOF16: setp.nan.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    420 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    421 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    422 ; CHECK-NEXT: ret;
    423 define i1 @test_fcmp_uno(half %a, half %b) #0 {
    424   %r = fcmp uno half %a, %b
    425   ret i1 %r
    426 }
    427 
    428 ; CHECK-LABEL: test_fcmp_one(
    429 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_one_param_0];
    430 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_one_param_1];
    431 ; CHECK-F16:  setp.ne.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    432 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    433 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    434 ; CHECK-NOF16: setp.ne.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    435 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    436 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    437 ; CHECK-NEXT: ret;
    438 define i1 @test_fcmp_one(half %a, half %b) #0 {
    439   %r = fcmp one half %a, %b
    440   ret i1 %r
    441 }
    442 
    443 ; CHECK-LABEL: test_fcmp_oeq(
    444 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_oeq_param_0];
    445 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_oeq_param_1];
    446 ; CHECK-F16:  setp.eq.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    447 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    448 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    449 ; CHECK-NOF16: setp.eq.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    450 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    451 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    452 ; CHECK-NEXT: ret;
    453 define i1 @test_fcmp_oeq(half %a, half %b) #0 {
    454   %r = fcmp oeq half %a, %b
    455   ret i1 %r
    456 }
    457 
    458 ; CHECK-LABEL: test_fcmp_ogt(
    459 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ogt_param_0];
    460 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ogt_param_1];
    461 ; CHECK-F16:  setp.gt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    462 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    463 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    464 ; CHECK-NOF16: setp.gt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    465 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    466 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    467 ; CHECK-NEXT: ret;
    468 define i1 @test_fcmp_ogt(half %a, half %b) #0 {
    469   %r = fcmp ogt half %a, %b
    470   ret i1 %r
    471 }
    472 
    473 ; CHECK-LABEL: test_fcmp_oge(
    474 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_oge_param_0];
    475 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_oge_param_1];
    476 ; CHECK-F16:  setp.ge.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    477 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    478 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    479 ; CHECK-NOF16: setp.ge.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    480 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    481 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    482 ; CHECK-NEXT: ret;
    483 define i1 @test_fcmp_oge(half %a, half %b) #0 {
    484   %r = fcmp oge half %a, %b
    485   ret i1 %r
    486 }
    487 
    488 ; XCHECK-LABEL: test_fcmp_olt(
    489 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_olt_param_0];
    490 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_olt_param_1];
    491 ; CHECK-F16:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    492 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    493 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    494 ; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    495 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    496 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    497 ; CHECK-NEXT: ret;
    498 define i1 @test_fcmp_olt(half %a, half %b) #0 {
    499   %r = fcmp olt half %a, %b
    500   ret i1 %r
    501 }
    502 
    503 ; XCHECK-LABEL: test_fcmp_ole(
    504 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ole_param_0];
    505 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ole_param_1];
    506 ; CHECK-F16:  setp.le.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    507 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    508 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    509 ; CHECK-NOF16: setp.le.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    510 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    511 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    512 ; CHECK-NEXT: ret;
    513 define i1 @test_fcmp_ole(half %a, half %b) #0 {
    514   %r = fcmp ole half %a, %b
    515   ret i1 %r
    516 }
    517 
    518 ; CHECK-LABEL: test_fcmp_ord(
    519 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fcmp_ord_param_0];
    520 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fcmp_ord_param_1];
    521 ; CHECK-F16:  setp.num.f16    [[PRED:%p[0-9]+]], [[A]], [[B]]
    522 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    523 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    524 ; CHECK-NOF16: setp.num.f32   [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    525 ; CHECK-NEXT: selp.u32        [[R:%r[0-9]+]], 1, 0, [[PRED]];
    526 ; CHECK-NEXT: st.param.b32    [func_retval0+0], [[R]];
    527 ; CHECK-NEXT: ret;
    528 define i1 @test_fcmp_ord(half %a, half %b) #0 {
    529   %r = fcmp ord half %a, %b
    530   ret i1 %r
    531 }
    532 
    533 ; CHECK-LABEL: test_br_cc(
    534 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_br_cc_param_0];
    535 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_br_cc_param_1];
    536 ; CHECK-DAG:  ld.param.u64    %[[C:rd[0-9]+]], [test_br_cc_param_2];
    537 ; CHECK-DAG:  ld.param.u64    %[[D:rd[0-9]+]], [test_br_cc_param_3];
    538 ; CHECK-F16:  setp.lt.f16     [[PRED:%p[0-9]+]], [[A]], [[B]]
    539 ; CHECK-NOF16-DAG: cvt.f32.f16 [[AF:%f[0-9]+]], [[A]];
    540 ; CHECK-NOF16-DAG: cvt.f32.f16 [[BF:%f[0-9]+]], [[B]];
    541 ; CHECK-NOF16: setp.lt.f32    [[PRED:%p[0-9]+]], [[AF]], [[BF]]
    542 ; CHECK-NEXT: @[[PRED]] bra   [[LABEL:LBB.*]];
    543 ; CHECK:      st.u32  [%[[C]]],
    544 ; CHECK:      [[LABEL]]:
    545 ; CHECK:      st.u32  [%[[D]]],
    546 ; CHECK:      ret;
    547 define void @test_br_cc(half %a, half %b, i32* %p1, i32* %p2) #0 {
    548   %c = fcmp uge half %a, %b
    549   br i1 %c, label %then, label %else
    550 then:
    551   store i32 0, i32* %p1
    552   ret void
    553 else:
    554   store i32 0, i32* %p2
    555   ret void
    556 }
    557 
    558 ; CHECK-LABEL: test_phi(
    559 ; CHECK:      ld.param.u64    %[[P1:rd[0-9]+]], [test_phi_param_0];
    560 ; CHECK:      ld.b16  {{%h[0-9]+}}, [%[[P1]]];
    561 ; CHECK: [[LOOP:LBB[0-9_]+]]:
    562 ; CHECK:      mov.b16 [[R:%h[0-9]+]], [[AB:%h[0-9]+]];
    563 ; CHECK:      ld.b16  [[AB:%h[0-9]+]], [%[[P1]]];
    564 ; CHECK:      {
    565 ; CHECK:      st.param.b64    [param0+0], %[[P1]];
    566 ; CHECK:      call.uni (retval0),
    567 ; CHECK-NEXT: test_dummy
    568 ; CHECK:      }
    569 ; CHECK:      setp.eq.b32     [[PRED:%p[0-9]+]], %r{{[0-9]+}}, 1;
    570 ; CHECK:      @[[PRED]] bra   [[LOOP]];
    571 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    572 ; CHECK:      ret;
    573 define half @test_phi(half* %p1) #0 {
    574 entry:
    575   %a = load half, half* %p1
    576   br label %loop
    577 loop:
    578   %r = phi half [%a, %entry], [%b, %loop]
    579   %b = load half, half* %p1
    580   %c = call i1 @test_dummy(half* %p1)
    581   br i1 %c, label %loop, label %return
    582 return:
    583   ret half %r
    584 }
    585 declare i1 @test_dummy(half* %p1) #0
    586 
    587 ; CHECK-LABEL: test_fptosi_i32(
    588 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptosi_i32_param_0];
    589 ; CHECK:      cvt.rzi.s32.f16 [[R:%r[0-9]+]], [[A]];
    590 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    591 ; CHECK:      ret;
    592 define i32 @test_fptosi_i32(half %a) #0 {
    593   %r = fptosi half %a to i32
    594   ret i32 %r
    595 }
    596 
    597 ; CHECK-LABEL: test_fptosi_i64(
    598 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptosi_i64_param_0];
    599 ; CHECK:      cvt.rzi.s64.f16 [[R:%rd[0-9]+]], [[A]];
    600 ; CHECK:      st.param.b64    [func_retval0+0], [[R]];
    601 ; CHECK:      ret;
    602 define i64 @test_fptosi_i64(half %a) #0 {
    603   %r = fptosi half %a to i64
    604   ret i64 %r
    605 }
    606 
    607 ; CHECK-LABEL: test_fptoui_i32(
    608 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptoui_i32_param_0];
    609 ; CHECK:      cvt.rzi.u32.f16 [[R:%r[0-9]+]], [[A]];
    610 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    611 ; CHECK:      ret;
    612 define i32 @test_fptoui_i32(half %a) #0 {
    613   %r = fptoui half %a to i32
    614   ret i32 %r
    615 }
    616 
    617 ; CHECK-LABEL: test_fptoui_i64(
    618 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fptoui_i64_param_0];
    619 ; CHECK:      cvt.rzi.u64.f16 [[R:%rd[0-9]+]], [[A]];
    620 ; CHECK:      st.param.b64    [func_retval0+0], [[R]];
    621 ; CHECK:      ret;
    622 define i64 @test_fptoui_i64(half %a) #0 {
    623   %r = fptoui half %a to i64
    624   ret i64 %r
    625 }
    626 
    627 ; CHECK-LABEL: test_uitofp_i32(
    628 ; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_param_0];
    629 ; CHECK:      cvt.rn.f16.u32  [[R:%h[0-9]+]], [[A]];
    630 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    631 ; CHECK:      ret;
    632 define half @test_uitofp_i32(i32 %a) #0 {
    633   %r = uitofp i32 %a to half
    634   ret half %r
    635 }
    636 
    637 ; CHECK-LABEL: test_uitofp_i64(
    638 ; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_uitofp_i64_param_0];
    639 ; CHECK:      cvt.rn.f16.u64  [[R:%h[0-9]+]], [[A]];
    640 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    641 ; CHECK:      ret;
    642 define half @test_uitofp_i64(i64 %a) #0 {
    643   %r = uitofp i64 %a to half
    644   ret half %r
    645 }
    646 
    647 ; CHECK-LABEL: test_sitofp_i32(
    648 ; CHECK:      ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_param_0];
    649 ; CHECK:      cvt.rn.f16.s32  [[R:%h[0-9]+]], [[A]];
    650 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    651 ; CHECK:      ret;
    652 define half @test_sitofp_i32(i32 %a) #0 {
    653   %r = sitofp i32 %a to half
    654   ret half %r
    655 }
    656 
    657 ; CHECK-LABEL: test_sitofp_i64(
    658 ; CHECK:      ld.param.u64    [[A:%rd[0-9]+]], [test_sitofp_i64_param_0];
    659 ; CHECK:      cvt.rn.f16.s64  [[R:%h[0-9]+]], [[A]];
    660 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    661 ; CHECK:      ret;
    662 define half @test_sitofp_i64(i64 %a) #0 {
    663   %r = sitofp i64 %a to half
    664   ret half %r
    665 }
    666 
    667 ; CHECK-LABEL: test_uitofp_i32_fadd(
    668 ; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_uitofp_i32_fadd_param_0];
    669 ; CHECK-DAG:  cvt.rn.f16.u32  [[C:%h[0-9]+]], [[A]];
    670 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_uitofp_i32_fadd_param_1];
    671 ; CHECK-F16:       add.rn.f16      [[R:%h[0-9]+]], [[B]], [[C]];
    672 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
    673 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
    674 ; CHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
    675 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
    676 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    677 ; CHECK:      ret;
    678 define half @test_uitofp_i32_fadd(i32 %a, half %b) #0 {
    679   %c = uitofp i32 %a to half
    680   %r = fadd half %b, %c
    681   ret half %r
    682 }
    683 
    684 ; CHECK-LABEL: test_sitofp_i32_fadd(
    685 ; CHECK-DAG:  ld.param.u32    [[A:%r[0-9]+]], [test_sitofp_i32_fadd_param_0];
    686 ; CHECK-DAG:  cvt.rn.f16.s32  [[C:%h[0-9]+]], [[A]];
    687 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_sitofp_i32_fadd_param_1];
    688 ; CHECK-F16:         add.rn.f16     [[R:%h[0-9]+]], [[B]], [[C]];
    689 ; XCHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
    690 ; XCHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
    691 ; XCHECK-NOF16-NEXT: add.rn.f32     [[R32:%f[0-9]+]], [[B32]], [[C32]];
    692 ; XCHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
    693 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    694 ; CHECK:      ret;
    695 define half @test_sitofp_i32_fadd(i32 %a, half %b) #0 {
    696   %c = sitofp i32 %a to half
    697   %r = fadd half %b, %c
    698   ret half %r
    699 }
    700 
    701 ; CHECK-LABEL: test_fptrunc_float(
    702 ; CHECK:      ld.param.f32    [[A:%f[0-9]+]], [test_fptrunc_float_param_0];
    703 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[A]];
    704 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    705 ; CHECK:      ret;
    706 define half @test_fptrunc_float(float %a) #0 {
    707   %r = fptrunc float %a to half
    708   ret half %r
    709 }
    710 
    711 ; CHECK-LABEL: test_fptrunc_double(
    712 ; CHECK:      ld.param.f64    [[A:%fd[0-9]+]], [test_fptrunc_double_param_0];
    713 ; CHECK:      cvt.rn.f16.f64  [[R:%h[0-9]+]], [[A]];
    714 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    715 ; CHECK:      ret;
    716 define half @test_fptrunc_double(double %a) #0 {
    717   %r = fptrunc double %a to half
    718   ret half %r
    719 }
    720 
    721 ; CHECK-LABEL: test_fpext_float(
    722 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fpext_float_param_0];
    723 ; CHECK:      cvt.f32.f16     [[R:%f[0-9]+]], [[A]];
    724 ; CHECK:      st.param.f32    [func_retval0+0], [[R]];
    725 ; CHECK:      ret;
    726 define float @test_fpext_float(half %a) #0 {
    727   %r = fpext half %a to float
    728   ret float %r
    729 }
    730 
    731 ; CHECK-LABEL: test_fpext_double(
    732 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fpext_double_param_0];
    733 ; CHECK:      cvt.f64.f16     [[R:%fd[0-9]+]], [[A]];
    734 ; CHECK:      st.param.f64    [func_retval0+0], [[R]];
    735 ; CHECK:      ret;
    736 define double @test_fpext_double(half %a) #0 {
    737   %r = fpext half %a to double
    738   ret double %r
    739 }
    740 
    741 
    742 ; CHECK-LABEL: test_bitcast_halftoi16(
    743 ; CHECK:      ld.param.b16    [[AH:%h[0-9]+]], [test_bitcast_halftoi16_param_0];
    744 ; CHECK:      mov.b16         [[AS:%rs[0-9]+]], [[AH]]
    745 ; CHECK:      cvt.u32.u16     [[R:%r[0-9]+]], [[AS]]
    746 ; CHECK:      st.param.b32    [func_retval0+0], [[R]];
    747 ; CHECK:      ret;
    748 define i16 @test_bitcast_halftoi16(half %a) #0 {
    749   %r = bitcast half %a to i16
    750   ret i16 %r
    751 }
    752 
    753 ; CHECK-LABEL: test_bitcast_i16tohalf(
    754 ; CHECK:      ld.param.u16    [[AS:%rs[0-9]+]], [test_bitcast_i16tohalf_param_0];
    755 ; CHECK:      mov.b16         [[AH:%h[0-9]+]], [[AS]]
    756 ; CHECK:      st.param.b16    [func_retval0+0], [[AH]];
    757 ; CHECK:      ret;
    758 define half @test_bitcast_i16tohalf(i16 %a) #0 {
    759   %r = bitcast i16 %a to half
    760   ret half %r
    761 }
    762 
    763 
    764 declare half @llvm.sqrt.f16(half %a) #0
    765 declare half @llvm.powi.f16(half %a, i32 %b) #0
    766 declare half @llvm.sin.f16(half %a) #0
    767 declare half @llvm.cos.f16(half %a) #0
    768 declare half @llvm.pow.f16(half %a, half %b) #0
    769 declare half @llvm.exp.f16(half %a) #0
    770 declare half @llvm.exp2.f16(half %a) #0
    771 declare half @llvm.log.f16(half %a) #0
    772 declare half @llvm.log10.f16(half %a) #0
    773 declare half @llvm.log2.f16(half %a) #0
    774 declare half @llvm.fma.f16(half %a, half %b, half %c) #0
    775 declare half @llvm.fabs.f16(half %a) #0
    776 declare half @llvm.minnum.f16(half %a, half %b) #0
    777 declare half @llvm.maxnum.f16(half %a, half %b) #0
    778 declare half @llvm.copysign.f16(half %a, half %b) #0
    779 declare half @llvm.floor.f16(half %a) #0
    780 declare half @llvm.ceil.f16(half %a) #0
    781 declare half @llvm.trunc.f16(half %a) #0
    782 declare half @llvm.rint.f16(half %a) #0
    783 declare half @llvm.nearbyint.f16(half %a) #0
    784 declare half @llvm.round.f16(half %a) #0
    785 declare half @llvm.fmuladd.f16(half %a, half %b, half %c) #0
    786 
    787 ; CHECK-LABEL: test_sqrt(
    788 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_sqrt_param_0];
    789 ; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    790 ; CHECK:      sqrt.rn.f32     [[RF:%f[0-9]+]], [[AF]];
    791 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    792 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    793 ; CHECK:      ret;
    794 define half @test_sqrt(half %a) #0 {
    795   %r = call half @llvm.sqrt.f16(half %a)
    796   ret half %r
    797 }
    798 
    799 ;;; Can't do this yet: requires libcall.
    800 ; XCHECK-LABEL: test_powi(
    801 ;define half @test_powi(half %a, i32 %b) #0 {
    802 ;  %r = call half @llvm.powi.f16(half %a, i32 %b)
    803 ;  ret half %r
    804 ;}
    805 
    806 ; CHECK-LABEL: test_sin(
    807 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_sin_param_0];
    808 ; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    809 ; CHECK:      sin.approx.f32  [[RF:%f[0-9]+]], [[AF]];
    810 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    811 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    812 ; CHECK:      ret;
    813 define half @test_sin(half %a) #0 #1 {
    814   %r = call half @llvm.sin.f16(half %a)
    815   ret half %r
    816 }
    817 
    818 ; CHECK-LABEL: test_cos(
    819 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_cos_param_0];
    820 ; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    821 ; CHECK:      cos.approx.f32  [[RF:%f[0-9]+]], [[AF]];
    822 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    823 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    824 ; CHECK:      ret;
    825 define half @test_cos(half %a) #0 #1 {
    826   %r = call half @llvm.cos.f16(half %a)
    827   ret half %r
    828 }
    829 
    830 ;;; Can't do this yet: requires libcall.
    831 ; XCHECK-LABEL: test_pow(
    832 ;define half @test_pow(half %a, half %b) #0 {
    833 ;  %r = call half @llvm.pow.f16(half %a, half %b)
    834 ;  ret half %r
    835 ;}
    836 
    837 ;;; Can't do this yet: requires libcall.
    838 ; XCHECK-LABEL: test_exp(
    839 ;define half @test_exp(half %a) #0 {
    840 ;  %r = call half @llvm.exp.f16(half %a)
    841 ;  ret half %r
    842 ;}
    843 
    844 ;;; Can't do this yet: requires libcall.
    845 ; XCHECK-LABEL: test_exp2(
    846 ;define half @test_exp2(half %a) #0 {
    847 ;  %r = call half @llvm.exp2.f16(half %a)
    848 ;  ret half %r
    849 ;}
    850 
    851 ;;; Can't do this yet: requires libcall.
    852 ; XCHECK-LABEL: test_log(
    853 ;define half @test_log(half %a) #0 {
    854 ;  %r = call half @llvm.log.f16(half %a)
    855 ;  ret half %r
    856 ;}
    857 
    858 ;;; Can't do this yet: requires libcall.
    859 ; XCHECK-LABEL: test_log10(
    860 ;define half @test_log10(half %a) #0 {
    861 ;  %r = call half @llvm.log10.f16(half %a)
    862 ;  ret half %r
    863 ;}
    864 
    865 ;;; Can't do this yet: requires libcall.
    866 ; XCHECK-LABEL: test_log2(
    867 ;define half @test_log2(half %a) #0 {
    868 ;  %r = call half @llvm.log2.f16(half %a)
    869 ;  ret half %r
    870 ;}
    871 
    872 ; CHECK-LABEL: test_fma(
    873 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fma_param_0];
    874 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fma_param_1];
    875 ; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_fma_param_2];
    876 ; CHECK-F16:      fma.rn.f16      [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
    877 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
    878 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
    879 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
    880 ; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
    881 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
    882 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    883 ; CHECK:      ret
    884 define half @test_fma(half %a, half %b, half %c) #0 {
    885   %r = call half @llvm.fma.f16(half %a, half %b, half %c)
    886   ret half %r
    887 }
    888 
    889 ; CHECK-LABEL: test_fabs(
    890 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_fabs_param_0];
    891 ; CHECK:      cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    892 ; CHECK:      abs.f32         [[RF:%f[0-9]+]], [[AF]];
    893 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    894 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    895 ; CHECK:      ret;
    896 define half @test_fabs(half %a) #0 {
    897   %r = call half @llvm.fabs.f16(half %a)
    898   ret half %r
    899 }
    900 
    901 ; CHECK-LABEL: test_minnum(
    902 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_minnum_param_0];
    903 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_minnum_param_1];
    904 ; CHECK-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    905 ; CHECK-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
    906 ; CHECK:      min.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
    907 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    908 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    909 ; CHECK:      ret;
    910 define half @test_minnum(half %a, half %b) #0 {
    911   %r = call half @llvm.minnum.f16(half %a, half %b)
    912   ret half %r
    913 }
    914 
    915 ; CHECK-LABEL: test_maxnum(
    916 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_maxnum_param_0];
    917 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_maxnum_param_1];
    918 ; CHECK-DAG:  cvt.f32.f16     [[AF:%f[0-9]+]], [[A]];
    919 ; CHECK-DAG:  cvt.f32.f16     [[BF:%f[0-9]+]], [[B]];
    920 ; CHECK:      max.f32         [[RF:%f[0-9]+]], [[AF]], [[BF]];
    921 ; CHECK:      cvt.rn.f16.f32  [[R:%h[0-9]+]], [[RF]];
    922 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    923 ; CHECK:      ret;
    924 define half @test_maxnum(half %a, half %b) #0 {
    925   %r = call half @llvm.maxnum.f16(half %a, half %b)
    926   ret half %r
    927 }
    928 
    929 ; CHECK-LABEL: test_copysign(
    930 ; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_param_0];
    931 ; CHECK-DAG:  ld.param.b16    [[BH:%h[0-9]+]], [test_copysign_param_1];
    932 ; CHECK-DAG:  mov.b16         [[AS:%rs[0-9]+]], [[AH]];
    933 ; CHECK-DAG:  mov.b16         [[BS:%rs[0-9]+]], [[BH]];
    934 ; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AS]], 32767;
    935 ; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BS]], -32768;
    936 ; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
    937 ; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
    938 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    939 ; CHECK:      ret;
    940 define half @test_copysign(half %a, half %b) #0 {
    941   %r = call half @llvm.copysign.f16(half %a, half %b)
    942   ret half %r
    943 }
    944 
    945 ; CHECK-LABEL: test_copysign_f32(
    946 ; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_f32_param_0];
    947 ; CHECK-DAG:  ld.param.f32    [[BF:%f[0-9]+]], [test_copysign_f32_param_1];
    948 ; CHECK-DAG:  mov.b16         [[A:%rs[0-9]+]], [[AH]];
    949 ; CHECK-DAG:  mov.b32         [[B:%r[0-9]+]], [[BF]];
    950 ; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[A]], 32767;
    951 ; CHECK-DAG:  and.b32         [[BX0:%r[0-9]+]], [[B]], -2147483648;
    952 ; CHECK-DAG:  shr.u32         [[BX1:%r[0-9]+]], [[BX0]], 16;
    953 ; CHECK-DAG:  cvt.u16.u32     [[BX2:%rs[0-9]+]], [[BX1]];
    954 ; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
    955 ; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
    956 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    957 ; CHECK:      ret;
    958 define half @test_copysign_f32(half %a, float %b) #0 {
    959   %tb = fptrunc float %b to half
    960   %r = call half @llvm.copysign.f16(half %a, half %tb)
    961   ret half %r
    962 }
    963 
    964 ; CHECK-LABEL: test_copysign_f64(
    965 ; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_f64_param_0];
    966 ; CHECK-DAG:  ld.param.f64    [[BD:%fd[0-9]+]], [test_copysign_f64_param_1];
    967 ; CHECK-DAG:  mov.b16         [[A:%rs[0-9]+]], [[AH]];
    968 ; CHECK-DAG:  mov.b64         [[B:%rd[0-9]+]], [[BD]];
    969 ; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[A]], 32767;
    970 ; CHECK-DAG:  and.b64         [[BX0:%rd[0-9]+]], [[B]], -9223372036854775808;
    971 ; CHECK-DAG:  shr.u64         [[BX1:%rd[0-9]+]], [[BX0]], 48;
    972 ; CHECK-DAG:  cvt.u16.u64     [[BX2:%rs[0-9]+]], [[BX1]];
    973 ; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX2]];
    974 ; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
    975 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
    976 ; CHECK:      ret;
    977 define half @test_copysign_f64(half %a, double %b) #0 {
    978   %tb = fptrunc double %b to half
    979   %r = call half @llvm.copysign.f16(half %a, half %tb)
    980   ret half %r
    981 }
    982 
    983 ; CHECK-LABEL: test_copysign_extended(
    984 ; CHECK-DAG:  ld.param.b16    [[AH:%h[0-9]+]], [test_copysign_extended_param_0];
    985 ; CHECK-DAG:  ld.param.b16    [[BH:%h[0-9]+]], [test_copysign_extended_param_1];
    986 ; CHECK-DAG:  mov.b16         [[AS:%rs[0-9]+]], [[AH]];
    987 ; CHECK-DAG:  mov.b16         [[BS:%rs[0-9]+]], [[BH]];
    988 ; CHECK-DAG:  and.b16         [[AX:%rs[0-9]+]], [[AS]], 32767;
    989 ; CHECK-DAG:  and.b16         [[BX:%rs[0-9]+]], [[BS]], -32768;
    990 ; CHECK:      or.b16          [[RX:%rs[0-9]+]], [[AX]], [[BX]];
    991 ; CHECK:      mov.b16         [[R:%h[0-9]+]], [[RX]];
    992 ; CHECK:      cvt.f32.f16     [[XR:%f[0-9]+]], [[R]];
    993 ; CHECK:      st.param.f32    [func_retval0+0], [[XR]];
    994 ; CHECK:      ret;
    995 define float @test_copysign_extended(half %a, half %b) #0 {
    996   %r = call half @llvm.copysign.f16(half %a, half %b)
    997   %xr = fpext half %r to float
    998   ret float %xr
    999 }
   1000 
   1001 ; CHECK-LABEL: test_floor(
   1002 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_floor_param_0];
   1003 ; CHECK:      cvt.rmi.f16.f16 [[R:%h[0-9]+]], [[A]];
   1004 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1005 ; CHECK:      ret;
   1006 define half @test_floor(half %a) #0 {
   1007   %r = call half @llvm.floor.f16(half %a)
   1008   ret half %r
   1009 }
   1010 
   1011 ; CHECK-LABEL: test_ceil(
   1012 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_ceil_param_0];
   1013 ; CHECK:      cvt.rpi.f16.f16 [[R:%h[0-9]+]], [[A]];
   1014 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1015 ; CHECK:      ret;
   1016 define half @test_ceil(half %a) #0 {
   1017   %r = call half @llvm.ceil.f16(half %a)
   1018   ret half %r
   1019 }
   1020 
   1021 ; CHECK-LABEL: test_trunc(
   1022 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_trunc_param_0];
   1023 ; CHECK:      cvt.rzi.f16.f16 [[R:%h[0-9]+]], [[A]];
   1024 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1025 ; CHECK:      ret;
   1026 define half @test_trunc(half %a) #0 {
   1027   %r = call half @llvm.trunc.f16(half %a)
   1028   ret half %r
   1029 }
   1030 
   1031 ; CHECK-LABEL: test_rint(
   1032 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_rint_param_0];
   1033 ; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
   1034 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1035 ; CHECK:      ret;
   1036 define half @test_rint(half %a) #0 {
   1037   %r = call half @llvm.rint.f16(half %a)
   1038   ret half %r
   1039 }
   1040 
   1041 ; CHECK-LABEL: test_nearbyint(
   1042 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_nearbyint_param_0];
   1043 ; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
   1044 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1045 ; CHECK:      ret;
   1046 define half @test_nearbyint(half %a) #0 {
   1047   %r = call half @llvm.nearbyint.f16(half %a)
   1048   ret half %r
   1049 }
   1050 
   1051 ; CHECK-LABEL: test_round(
   1052 ; CHECK:      ld.param.b16    [[A:%h[0-9]+]], [test_round_param_0];
   1053 ; CHECK:      cvt.rni.f16.f16 [[R:%h[0-9]+]], [[A]];
   1054 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1055 ; CHECK:      ret;
   1056 define half @test_round(half %a) #0 {
   1057   %r = call half @llvm.round.f16(half %a)
   1058   ret half %r
   1059 }
   1060 
   1061 ; CHECK-LABEL: test_fmuladd(
   1062 ; CHECK-DAG:  ld.param.b16    [[A:%h[0-9]+]], [test_fmuladd_param_0];
   1063 ; CHECK-DAG:  ld.param.b16    [[B:%h[0-9]+]], [test_fmuladd_param_1];
   1064 ; CHECK-DAG:  ld.param.b16    [[C:%h[0-9]+]], [test_fmuladd_param_2];
   1065 ; CHECK-F16:        fma.rn.f16     [[R:%h[0-9]+]], [[A]], [[B]], [[C]];
   1066 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[A32:%f[0-9]+]], [[A]]
   1067 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[B32:%f[0-9]+]], [[B]]
   1068 ; CHECK-NOF16-DAG:  cvt.f32.f16    [[C32:%f[0-9]+]], [[C]]
   1069 ; CHECK-NOF16-NEXT: fma.rn.f32     [[R32:%f[0-9]+]], [[A32]], [[B32]], [[C32]];
   1070 ; CHECK-NOF16-NEXT: cvt.rn.f16.f32 [[R:%h[0-9]+]], [[R32]]
   1071 ; CHECK:      st.param.b16    [func_retval0+0], [[R]];
   1072 ; CHECK:      ret;
   1073 define half @test_fmuladd(half %a, half %b, half %c) #0 {
   1074   %r = call half @llvm.fmuladd.f16(half %a, half %b, half %c)
   1075   ret half %r
   1076 }
   1077 
   1078 attributes #0 = { nounwind }
   1079 attributes #1 = { "unsafe-fp-math" = "true" }
   1080